import requests
import time
import re
import math
import sys
def auto_click(url
,num
):
while num
:
req_headers
={
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.108 Safari/537.36'
}
我还整理了 更多Python学习资料
进QQ群
688244617
群里还有小伙伴跟你一起交流学习
resp
=requests
.get
(url
,headers
=req_headers
)
if resp
.status_code
==requests
.codes
.ok
:
rr
= re
.compile(r
'pageSize = \d\d ;')
pageSize
= str(rr
.findall
(resp
.text
)[0])
pageSize
= pageSize
.replace
('pageSize = ','')
pageSize
= pageSize
.replace
(' ;','')
rr
= re
.compile(r
'var listTotal = \d+ ;')
listTotal
= str(rr
.findall
(resp
.text
)[0])
listTotal
= listTotal
.replace
('var listTotal = ','')
listTotal
= listTotal
.replace
(' ;','')
last_page_num
=math
.ceil
(int(listTotal
)/int(pageSize
))
base_page_link
='https://blog.csdn.net/weixin_43215250/article/list/'
for i
in range(1,last_page_num
+1):
real_page_link
=base_page_link
+str(i
)
resp
=requests
.get
(real_page_link
,headers
=req_headers
)
rr
= re
.compile(r
'<a href="https://blog.csdn.net/weixin_43215250/article/details/.*" target="_blank">')
links
= rr
.findall
(resp
.text
)
for ii
in range(len(links
)):
links
[ii
] = links
[ii
].replace
('<a href="https://blog.csdn.net/weixin_43215250/article/details/','')
links
[ii
] = links
[ii
].replace
('" target="_blank">','')
links
= list(set(links
))
temp
= 1
我还整理了 更多Python学习资料
进QQ群
688244617
群里还有小伙伴跟你一起交流学习
for article_link
in links
:
real_article_link
='https://blog.csdn.net/weixin_43215250/article/details/'+article_link
requests
.get
(real_article_link
,headers
=req_headers
)
print('正在第%d次点击'%temp
)
temp
= temp
+ 1
num
-= 1
time
.sleep
(2)
if __name__
== '__main__':
num
=500
url
='https://blog.csdn.net/weixin_43215250'
auto_click
(url
,num
)
转载请注明原文地址: https://win8.8miu.com/read-1588.html