#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time : 2017/7/11 18:57
# @Author : 李振华
# 多线程快速抓取某个网页全部图片
import lxml
import threading
import requests
import queue
from bs4
import BeautifulSoup
from random
import Random
import os
# 解析图片地址
def parse(url):
html =
requests.get(url).text
soup = BeautifulSoup(html,
'lxml')
for img_url
in soup.find_all(
'img'):
q.put(img_url.get('src'))
print(q.qsize())
# 生成随机数
def random_str(randomlength=8
):
strs =
''
chars =
'AaBbCcDdEeFfGgHhIiJjKkLlMmNnOoPpQqRrSsTtUuVvWwXxYyZz0123456789'
length = len(chars) - 1
random =
Random()
for i
in range(randomlength):
strs +=
chars[random.randint(0, length)]
return strs
# 下载图片
def download_image(image_url):
if not os.path.exists(
'image'):
os.mkdir('image')
image_name =
random_str()
image =
requests.get(image_url)
with open('image/%s.jpg' % image_name,
'wb') as img:
for b
in image.iter_content(2048
):
img.write(b)
# 主函数
if __name__ ==
'__main__':
q =
queue.Queue()
url =
'http://enrz.com/fhm/2017/05/11/90122.html'
parse_thread = threading.Thread(target=parse, args=
(url, ))
parse_thread.start()
parse_thread.join()
for i
in range(q.qsize()):
download_image(q.get())
download_thread_pool =
[]
for i
in range(q.qsize()):
download_thread = threading.Thread(target=download_image, args=
(q.get(), ))
download_thread.start()
download_thread_pool.append(download_thread)
for thread
in download_thread_pool:
thread.join()
转载于:https://www.cnblogs.com/liyu2151/p/7152094.html