|
|
@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
import requests
|
|
|
|
|
|
|
|
from lxml import etree
|
|
|
|
|
|
|
|
from xpinyin import Pinyin
|
|
|
|
|
|
|
|
import csv
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# 城市列表,可以根据需要添加或删除城市
|
|
|
|
|
|
|
|
cities = ['北京', '上海', '广州', '深圳', '成都', '杭州', '南京', '武汉', '西安', '长沙', '南昌', '赣州', '昆明', '大理', '乌鲁木齐', '拉萨', '九江', '上饶', '吉安', '景德镇', '齐齐哈尔', '沈阳']
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
p = Pinyin()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# 打开CSV文件,如果文件不存在则创建,如果文件存在则覆盖
|
|
|
|
|
|
|
|
with open('天气预报.csv', 'w', newline='', encoding='utf-8') as f:
|
|
|
|
|
|
|
|
writer = csv.writer(f)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# 写入CSV文件头
|
|
|
|
|
|
|
|
writer.writerow(['城市', '日期', '室外温度', '体感温度', '天气情况', '全天气温', '健康影响', '建议措施'])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
for city_name in cities:
|
|
|
|
|
|
|
|
# 将城市名转换为拼音
|
|
|
|
|
|
|
|
result1 = p.get_pinyin(city_name)
|
|
|
|
|
|
|
|
city = result1.replace('-', '')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# 构造URL
|
|
|
|
|
|
|
|
url = f'https://www.tianqishi.com/{city}.html'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# 设置请求头
|
|
|
|
|
|
|
|
headers = {
|
|
|
|
|
|
|
|
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36',
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# 发送网络请求并解析HTML页面
|
|
|
|
|
|
|
|
res_data = requests.get(url=url, headers=headers)
|
|
|
|
|
|
|
|
tree = etree.HTML(res_data.text)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# 从HTML树中提取天气信息
|
|
|
|
|
|
|
|
city_title = tree.xpath('//h3[@class="city-title ico"]')[0].text
|
|
|
|
|
|
|
|
date = tree.xpath('//h3[@class="city-title ico"]//span')[0].text
|
|
|
|
|
|
|
|
ot = tree.xpath('//div[@class="ltlTemperature"]//b')[0].text # 室外温度
|
|
|
|
|
|
|
|
st = tree.xpath('//div[@class="ltlTemperature"]//span')[0].text # 体感温度
|
|
|
|
|
|
|
|
t_type = tree.xpath('(//div[@class="box pcity"])[3]//li//a[@target="_blank"]')[0].text.split(':')[1].split(',')[
|
|
|
|
|
|
|
|
0]
|
|
|
|
|
|
|
|
all_day_t = \
|
|
|
|
|
|
|
|
tree.xpath('(//div[@class="box pcity"])[3]//li//a[@target="_blank"]')[0].text.split(':')[1].split(',')[1]
|
|
|
|
|
|
|
|
health_effect = tree.xpath('(//div[@class="air-quality pd0"])[1]//font')[0].text # 健康影响
|
|
|
|
|
|
|
|
suggest_measures = tree.xpath('(//div[@class="air-quality pd0"])[2]//font')[0].text # 建议措施
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# 写入CSV文件内容
|
|
|
|
|
|
|
|
writer.writerow([city_title, date, ot, st, t_type, all_day_t, health_effect, suggest_measures])
|