如果你对python感兴趣,我这有个学习Python基地,里面有很多学习资料,感兴趣的
+Q群:
688244617
import re
import requests
from lxml
.html
import etree
url
= 'http://www.liyang.gov.cn/default.php?mod=article&fid=163250&s99679207_start=0'
rp
= requests
.get
(url
)
re_html
= etree
.HTML
(rp
.text
)
url_xpath
= '//*[@id="s99679207_content"]/table/tbody/tr/td/span[1]/span/a/@href'
title_xpath
= '//*[@id="s99679207_content"]/table/tbody/tr/td/span[1]/span/a/text()'
url_list
= re_html
.xpath
(url_xpath
)
title_list
= re_html
.xpath
(title_xpath
)
title_list
= title_list
[::-1]
data_url_list
= []
for url_end
in url_list
:
new_url
= f
'http://www.liyang.gov.cn/{url_end}'
print(new_url
)
rp_1
= requests
.get
(new_url
)
print(rp_1
.text
)
try:
re_1_html
= etree
.HTML
(rp_1
.text
)
data_url_xpth
= '//tbody/tr[1]/td[2]/a'
data_url
= re_1_html
.xpath
(data_url_xpth
)[0]
except:
data_list
= re
.findall
('<a href="(.*?)" target="_blank">', rp_1
.text
)
data_url
= data_list
[0]
print(data_url
)
data_url
= f
'http://www.liyang.gov.cn/{data_url}'
re
= requests
.get
(data_url
)
data
= re
.content
with open(f
'{title_list.pop()}.pdf', 'wb') as fw
:
fw
.write
(data
)
转载请注明原文地址: https://win8.8miu.com/read-1447674.html