1 # 天气网余姚地区爬虫案例
2 import requests
3 from lxml
import etree
4
5
6 class WeatherSpider:
7
8 def __init__(self):
9 self.url =
"http://www.weather.com.cn/weather/101210404.shtml"
10 self.headers =
{
11 "User-Agent":
"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.119 Safari/537.36"}
12
13 def get_url_content(self):
14 return requests.get(self.url, headers=
self.headers).content.decode()
15
16 def get_weather_data(self, html):
17 tmp_html =
etree.HTML(html)
18 tomorrow_doc = tmp_html.xpath(
"//div[contains(@class,'con') and contains(@class,'today')]//div[@class='c7d']/ul/li[2]")[0]
19 weather_data =
{}
20 weather_data[
"date"] = tomorrow_doc.xpath(
"./h1/text()")[0]
21 weather_data[
"weather"] = tomorrow_doc.xpath(
"./p[@class='wea']/@title")[0]
22 weather_data[
"temperature_max"] = tomorrow_doc.xpath(
"./p[@class='tem']/span/text()")[0]
23 weather_data[
"temperature_min"] = tomorrow_doc.xpath(
"./p[@class='tem']/i/text()")[0]
24 weather_data[
"air_speed"] = tomorrow_doc.xpath(
"./p[@class='win']/i/text()")[0]
25 return weather_data
26
27 def run(self):
28 # 获取url请求内容
29 content_html =
self.get_url_content()
30 # 根据url内容获取天气数据
31 data =
self.get_weather_data(content_html)
32 # 打印爬取的天气数据
33 print(data)
34
35
36 if __name__ ==
'__main__':
37 spider =
WeatherSpider()
38 spider.run()
爬取结果
自己刚学爬虫不久,利用爬虫爬取天气网,每次可以抓取第二天的天气状况,一个小demo
转载于:https://www.cnblogs.com/zst-blogs/p/10752565.html