爬虫学习(四)——post请求爬取

it2022-05-05  132

百度翻译爬取数据

import urllib.requestimport urllib.parsepost_url = "https://fanyi.baidu.com/sug"headers = {"User-Agent":"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36"}word= input( "请输入要翻译的内容:" )data = {"kw":word}# 对表单数据进行处理时,先转换成为字符串,在转换成为字节# 只转化成为字符串形式 data1 = urllib.parse.urlencode(data)print(data1)# 输出结果# word=宝贝 字符串格式# 转换成为字节格式data = urllib.parse.urlencode(data).encode("utf8")print(data)# 显示结果:# b'word=宝贝' 字节格式request = urllib.request.Request(post_url,headers=headers)response = urllib.request.urlopen(request,data = data)print(response.read().decode("utf8"))

百度翻译爬取数据

import urllib.requestimport urllib.parseapiurl = "https://fanyi.baidu.com/v2transapi"在爬取目标网站时,先找准目标网站的接口,和需要传递的数据 data = { 'from': 'en', 'to': 'zh', 'query': 'baby', 'transtype': 'realtime', 'simple_means_flag': '3', 'sign': '814534.560887', 'token': '8b44713bb18ae29ba380245d18270565',} data1= urllib.parse.urlencode( data ).encode( "utf8" ) #post请求重点是请求头信息要详细,关键的属性不能省headers = { #'Accept': '*/*', #'Accept-Encoding': 'gzip, deflate, br', #'Accept-Language': 'zh-CN,zh;q=0.9', #'Connection': 'keep-alive', #'Content-Length': '121', #'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8', 'Cookie': 'PSTM=1528269920; BIDUPSID=7EE884F5F31114F0BCDC2588805B747F; REALTIME_TRANS_SWITCH=1; FANYI_WORD_SWITCH=1; HISTORY_SWITCH=1; SOUND_SPD_SWITCH=1; SOUND_PREFER_SWITCH=1; to_lang_often=[{"value":"en","text":"%u82F1%u8BED"},{"value":"zh","text":"%u4E2D%u6587"}]; from_lang_often=[{"value":"zh","text":"%u4E2D%u6587"},{"value":"en","text":"%u82F1%u8BED"}]; BDORZ=B490B5EBF6F3CD402E515D22BCDA1598; BAIDUID=C15EE352EEB61222BDA4C2F95822E5EF:SL=0:NR=10:FG=1; pgv_pvi=4516305920; delPer=0; H_PS_PSSID=1436_21101_28206_28131_26350_28139; PSINO=2; locale=zh; Hm_lvt_64ecd82404c51e03dc91cb9e8c025574=1546425466,1546425533,1546425602,1546484054; Hm_lpvt_64ecd82404c51e03dc91cb9e8c025574=1546487219', 'Host': 'fanyi.baidu.com', 'Origin': 'https://fanyi.baidu.com', 'Referer': 'https://fanyi.baidu.com/', 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36', 'X-Requested-With': 'XMLHttpRequest',} request = urllib.request.Request( url=apiurl, headers=headers )response = urllib.request.urlopen( request, data=data1 )print(response.read().decode( "utf8" ) )ajax的post请求爬取KFC的餐厅地址 import urllib.requestimport urllib.parseurl = "http://www.kfc.com.cn/kfccda/ashx/GetStoreList.ashx?op=keyword"headers = {"User-Agent":"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36"}# 表单输入地址信息keyword =input("请输入需要查询的城市")#请求网页需要进行传递的参数data = { "cname":"", "pid": "", "keyword": keyword, "pageIndex": "1", "pageSize": "10",}# 解析参数,编程字节格式data = urllib.parse.urlencode(data).encode("utf8")# 构建请求头信息request = urllib.request.Request(url,headers = headers)# 发送请求,获取相应数据,重点是附带参数的post请求。response = urllib.request.urlopen(request,data=data)# 读取响应信息print(response.read().decode("utf8"))

 

转载于:https://www.cnblogs.com/kuangkuangduangduang/p/10366222.html


最新回复(0)