requests+BeautifulSoup+theading实现生产者消费者模式简单爬虫处理男人装图片

it2022-05-07 45

#!/usr/bin/env python # -*- coding: utf-8 -*- # @Time : 2017/7/11 18:57 # @Author : 李振华 # 多线程快速抓取某个网页全部图片 import lxml import threading import requests import queue from bs4 import BeautifulSoup from random import Random import os # 解析图片地址 def parse(url): html = requests.get(url).text soup = BeautifulSoup(html, 'lxml') for img_url in soup.find_all('img'): q.put(img_url.get('src')) print(q.qsize()) # 生成随机数 def random_str(randomlength=8): strs = '' chars = 'AaBbCcDdEeFfGgHhIiJjKkLlMmNnOoPpQqRrSsTtUuVvWwXxYyZz0123456789' length = len(chars) - 1 random = Random() for i in range(randomlength): strs += chars[random.randint(0, length)] return strs # 下载图片 def download_image(image_url): if not os.path.exists('image'): os.mkdir('image') image_name = random_str() image = requests.get(image_url) with open('image/%s.jpg' % image_name, 'wb') as img: for b in image.iter_content(2048): img.write(b) # 主函数 if __name__ == '__main__': q = queue.Queue() url = 'http://enrz.com/fhm/2017/05/11/90122.html' parse_thread = threading.Thread(target=parse, args=(url, )) parse_thread.start() parse_thread.join() for i in range(q.qsize()): download_image(q.get()) download_thread_pool = [] for i in range(q.qsize()): download_thread = threading.Thread(target=download_image, args=(q.get(), )) download_thread.start() download_thread_pool.append(download_thread) for thread in download_thread_pool: thread.join()

转载于:https://www.cnblogs.com/liyu2151/p/7152094.html

专利

最新回复(0)