1 # python3
2 # jiandan meizi tu
3 import urllib
4 import urllib.request as req
5 import os
6 import time
7 import random
8
9
10 def url_open(url):
11 req1 = urllib.request.Request(url, headers={
'User-Agent':
'Mozilla/4.0'})
12 req2 = urllib.request.Request(url, headers={
'User-Agent':
'Mozilla/4.1'})
13 req3 = urllib.request.Request(url, headers={
'User-Agent':
'Mozilla/4.5'})
14 req4 = urllib.request.Request(url, headers={
'User-Agent':
'Mozilla/5.1'})
15
16 req_list =
[req1, req2,req3, req4]
17 response =
urllib.request.urlopen(random.choice(req_list))
18 html =
response.read()
19 # print ('url_open done!')
20 return html
21
22 def url_open2(url):
23 req1 = urllib.request.Request(url, headers={
'User-Agent':
'Mozilla/4.0'})
24 req2 = urllib.request.Request(url, headers={
'User-Agent':
'Mozilla/4.1'})
25 req3 = urllib.request.Request(url, headers={
'User-Agent':
'Mozilla/4.5'})
26 req4 = urllib.request.Request(url, headers={
'User-Agent':
'Mozilla/5.1'})
27 req_list =
[req1, req2,req3, req4]
28
29 ip_list = [
'117.135.251.136:82']
30 ip =
random.choice(ip_list)
31 print (ip)
32
33 proxy = req.ProxyHandler({
'http': ip})
34 # auth = req.HTTPBasicAuthHandler()
35 opener =
req.build_opener(proxy, req.HTTPHandler)
36 req.install_opener(opener)
37 conn =
req.urlopen(random.choice(req_list))
38 return_str =
conn.read()
39 return return_str
40
41 def get_current_page(url):
42 html = url_open2(url).decode(
'utf-8')
43 a = html.find(
'current-comment-page') + 23
44 b = html.find(
']',a)
45 return html[a:b]
46
47 def find_imgs(url):
48 html = url_open2(url).decode(
'utf-8')
49 img_addrs =
[]
50 a = html.find(
'img src="http')
51 while a != -1
:
52 b = html.find(
'.jpg',a, a+255
)
53 if b != -1
:
54 img_addrs.append(html[a+9:b+4
])
55 else:
56 b = a + 13
57 a = html.find(
'img src="http', b)
58 return img_addrs
59
60 def save_imgs(folder,img_addrs):
61 for each
in img_addrs:
62 filename = each.split(
'/')[-1
]
63 with open(filename,
'wb') as f:
64 img =
url_open2(each)
65 f.write(img)
66
67
68 def download_mm(folder =
'xx',pages = 300
):
69 # os.mkdir(folder)
70 os.chdir(folder)
71
72 url =
'http://jandan.net/ooxx/'
73 current_page_num =
int(get_current_page(url))
74 for i
in range(pages):
75 print (time.strftime(
"%Y-%m-%d %H:%M:%S",time.localtime()),
'current_page_num', current_page_num)
76 if i%3 ==
0:
77 print (time.strftime(
"%Y-%m-%d %H:%M:%S",time.localtime()),
"sleep 2 seconds...")
78 time.sleep(2
)
79 current_page_num -= 1
80 page_url = url +
'page-' + str(current_page_num) +
'#comments'
81 img_addrs =
find_imgs(page_url)
82 save_imgs(folder, img_addrs)
83
84 if __name__ ==
'__main__':
85 download_mm()
转载于:https://www.cnblogs.com/duanguyuan/p/5208586.html
相关资源:各显卡算力对照表!