简单爬虫查找

it2022-05-05  184

# ***** Copyright © SUNING *****# -*- coding: utf-8 -*-from Hephaestus.constant import CONFIG_DICTfrom Hephaestus.process import main_procfrom Hephaestus.forge import forgingimport sn_win as sn_winsn_win=forging(sn_win)import sncrawler as sncrawlersncrawler=forging(sncrawler)import sndict as sndictsndict=forging(sndict)import snweb as snwebsnweb=forging(snweb)@main_proc("else")def main(): # 主流程 #打开浏览器 driver = snweb.open_browser(browser_name="chrome",retry=True) #打开资金管理系统 snweb.open_url(url='***') #点击 snweb.click_element(locator="xpath=//INPUT[@id='username']",timeout=10,intr=True,index=0) #获取值 fundAccount = sndict.get_value(dic=CONFIG_DICT,key='fundAccount') #输入资金管理系统用户名 result = snweb.input_text(locator="xpath=//INPUT[@id='username']",text=fundAccount[0],timeout=10,intr=True,index=0) #清空文本框 snweb.clear_element(locator="xpath=//INPUT[@id='password']",timeout=10,intr=True,index=0) #输入资金管理系统密码 result = snweb.input_text(locator="xpath=//INPUT[@id='password']",text=fundAccount[1],timeout=10,intr=True,index=0) #点击登录 result = snweb.click_element(locator="xpath=//a[@id='loginbtn']",timeout=10,intr=True,index=0) cookieStr =driver.get_cookies() cookies_dict = {cookiedic["name"]: cookiedic["value"] for cookiedic in cookieStr} from requests.utils import cookiejar_from_dict from requests import Session from lxml import etree cookies = cookiejar_from_dict(cookies_dict) session = Session() session.cookies = cookies #data data = sndict.create_dict() #设置值 sndict.set_value(dic=data,key='bankAcc',value='32001881700052502564') #设置值 sndict.set_value(dic=data,key='isOnline',value='1') #设置明细类型 sndict.set_value(dic=data,key='balanceType',value=0) #设置公司代码 sndict.set_value(dic=data,key='corpCode',value='G002') #设置起始日期 sndict.set_value(dic=data,key='dateStart',value='2019-05-01') #设置截止日期 sndict.set_value(dic=data,key='dateEnd',value='2019-05-31') #设置排序 sndict.set_value(dic=data,key='orderByField',value='B.CORP_CODE,A.ROWID') text = session.post(url='http://funddc.cnsuning.com/snweb_datacenter/queryAccountDetails.do',params=None,data=data,json=None,files=None,headers=None,timeout=None,stream=None,allow_redirects=True,verify=True) #获取请求返回体的文本 text = sncrawler.get_req_text(r=text) #正则表达式查找全部 totalpage = sn_win.findallstr(sText=text,pattern=r"共(\d+)页") #正则表达式查找全部 totalnumber = sn_win.findallstr(sText=text,pattern=r'共(\d+)条记录') #设置值 sndict.set_value(dic=data,key='pageControlData.changePageNumber',value=totalpage) #设置值 sndict.set_value(dic=data,key='pageControlData.pageSize',value='50') #设置值 sndict.set_value(dic=data,key='formAction',value='list') #设置值 sndict.set_value(dic=data,key='pageControlData.currentPage',value='-1') #设置值 sndict.set_value(dic=data,key='pageControlData.resultCount',value=totalnumber) #header headDict = sndict.create_dict() #设置值 sndict.set_value(dic=headDict,key='Referer',value='http://funddc.cnsuning.com/snweb_datacenter/queryAccountDetails.do') #设置值 sndict.set_value(dic=headDict,key='Host',value='funddc.cnsuning.com') #设置值 sndict.set_value(dic=headDict,key='Content-Type',value='application/x-www-form-urlencoded') from bs4 import BeautifulSoup text = session.post(url='***',params=None,data=data,json=None,files=None,headers=headDict,timeout=None,stream=None,allow_redirects=True,verify=True) text = sncrawler.get_req_text(r=text) soup = BeautifulSoup(text, 'html.parser') money = soup.find('table', attrs={'class': 'list', 'align': 'center'}).find_all('tr')[-3].find_all('td')[24].text.strip() print(money)if __name__ == '__main__': main()

转载于:https://www.cnblogs.com/jessitommy/p/11075974.html


最新回复(0)