ADD file via upload

master
pl9p5shn8 4 years ago
parent 5880623a1f
commit c52b4d31b5

@ -0,0 +1,242 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "b3ba4d4c",
"metadata": {},
"outputs": [],
"source": [
"import requests\n",
"from bs4 import BeautifulSoup\n",
"import csv\n",
"import json\n",
"import matplotlib.pyplot as plt\n",
"import numpy as np\n",
"import pandas as pd\n",
"import math\n",
"\n",
"\n",
"def getHTMLtext(url):\n",
" \"\"\"请求获得网页内容\"\"\"\n",
" try:\n",
" r = requests.get(url, timeout=30)\n",
" r.raise_for_status()\n",
" r.encoding = r.apparent_encoding\n",
" print(\"成功访问\")\n",
" return r.text\n",
" except:\n",
" print(\"访问错误\")\n",
" return \" \"\n",
"\n",
"\n",
"def get_content(html):\n",
" \"\"\"处理得到有用信息保存数据文件\"\"\"\n",
" final = [] # 初始化一个列表保存数据\n",
" bs = BeautifulSoup(html, \"html.parser\") # 创建BeautifulSoup对象\n",
" body = bs.body\n",
" data = body.find('div', {'id': '7d'}) # 找到div标签且id = 7d\n",
" # 下面爬取当天的数据\n",
" data2 = body.find_all('div', {'class': 'left-div'})\n",
" text = data2[2].find('script').string\n",
" text = text[text.index('=') + 1:-2] # 移除改var data=将其变为json数据\n",
" jd = json.loads(text)\n",
" dayone = jd['od']['od2'] # 找到当天的数据\n",
" final_day = [] # 存放当天的数据\n",
" count = 0\n",
" for i in dayone:\n",
" temp = []\n",
" if count <= 23:\n",
" temp.append(i['od21']) # 添加时间\n",
" temp.append(i['od22']) # 添加当前时刻温度\n",
" temp.append(i['od24']) # 添加当前时刻风力方向\n",
" temp.append(i['od25']) # 添加当前时刻风级\n",
" temp.append(i['od26']) # 添加当前时刻降水量\n",
" temp.append(i['od27']) # 添加当前时刻相对湿度\n",
" temp.append(i['od28']) # 添加当前时刻控制质量\n",
" # print(temp)\n",
" final_day.append(temp)\n",
" count = count + 1\n",
" # 下面爬取7天的数据\n",
" ul = data.find('ul') # 找到所有的ul标签\n",
" li = ul.find_all('li') # 找到左右的li标签\n",
" i = 0 # 控制爬取的天数\n",
" for day in li: # 遍历找到的每一个li\n",
" if i < 7 and i > 0:\n",
" temp = [] # 临时存放每天的数据\n",
" date = day.find('h1').string # 得到日期\n",
" date = date[0:date.index('日')] # 取出日期号\n",
" temp.append(date)\n",
" inf = day.find_all('p') # 找出li下面的p标签,提取第一个p标签的值即天气\n",
" temp.append(inf[0].string)\n",
"\n",
" tem_low = inf[1].find('i').string # 找到最低气温\n",
"\n",
" if inf[1].find('span') is None: # 天气预报可能没有最高气温\n",
" tem_high = None\n",
" else:\n",
" tem_high = inf[1].find('span').string # 找到最高气温\n",
" temp.append(tem_low[:-1])\n",
" if tem_high[-1] == '℃':\n",
" temp.append(tem_high[:-1])\n",
" else:\n",
" temp.append(tem_high)\n",
"\n",
" wind = inf[2].find_all('span') # 找到风向\n",
" for j in wind:\n",
" temp.append(j['title'])\n",
"\n",
" wind_scale = inf[2].find('i').string # 找到风级\n",
" index1 = wind_scale.index('级')\n",
" temp.append(int(wind_scale[index1 - 1:index1]))\n",
" final.append(temp)\n",
" i = i + 1\n",
" return final_day, final\n",
"\n",
"\n",
"# print(final)\n",
"def get_content2(html):\n",
" \"\"\"处理得到有用信息保存数据文件\"\"\"\n",
" final = [] # 初始化一个列表保存数据\n",
" bs = BeautifulSoup(html, \"html.parser\") # 创建BeautifulSoup对象\n",
" body = bs.body\n",
" data = body.find('div', {'id': '15d'}) # 找到div标签且id = 15d\n",
" ul = data.find('ul') # 找到所有的ul标签\n",
" li = ul.find_all('li') # 找到左右的li标签\n",
" final = []\n",
" i = 0 # 控制爬取的天数\n",
" for day in li: # 遍历找到的每一个li\n",
" if i < 8:\n",
" temp = [] # 临时存放每天的数据\n",
" date = day.find('span', {'class': 'time'}).string # 得到日期\n",
" date = date[date.index('') + 1:-2] # 取出日期号\n",
" temp.append(date)\n",
" weather = day.find('span', {'class': 'wea'}).string # 找到天气\n",
" temp.append(weather)\n",
" tem = day.find('span', {'class': 'tem'}).text # 找到温度\n",
" temp.append(tem[tem.index('/') + 1:-1]) # 找到最低气温\n",
" temp.append(tem[:tem.index('/') - 1]) # 找到最高气温\n",
" wind = day.find('span', {'class': 'wind'}).string # 找到风向\n",
" if '转' in wind: # 如果有风向变化\n",
" temp.append(wind[:wind.index('转')])\n",
" temp.append(wind[wind.index('转') + 1:])\n",
" else: # 如果没有风向变化,前后风向一致\n",
" temp.append(wind)\n",
" temp.append(wind)\n",
" wind_scale = day.find('span', {'class': 'wind1'}).string # 找到风级\n",
" index1 = wind_scale.index('级')\n",
" temp.append(int(wind_scale[index1 - 1:index1]))\n",
"\n",
" final.append(temp)\n",
" return final\n",
"\n",
"\n",
"def write_to_csv(file_name, data, day=14):\n",
" \"\"\"保存为csv文件\"\"\"\n",
" with open(file_name, 'a', errors='ignore', newline='') as f:\n",
" if day == 14:\n",
" header = ['日期', '天气', '最低气温', '最高气温', '风向1', '风向2', '风级']\n",
" else:\n",
" header = ['小时', '温度', '风力方向', '风级', '降水量', '相对湿度', '空气质量']\n",
" f_csv = csv.writer(f)\n",
" f_csv.writerow(header)\n",
" f_csv.writerows(data)\n",
"\n",
"#数据可视化\n",
"def tem_curve(data):\n",
" \"\"\"温度曲线绘制\"\"\"\n",
" hour = list(data['小时'])\n",
" tem = list(data['温度'])\n",
" for i in range(0, 24):\n",
" if math.isnan(tem[i]) == True:\n",
" tem[i] = tem[i - 1]\n",
" tem_ave = sum(tem) / 24 # 求平均温度\n",
" tem_max = max(tem)\n",
" tem_max_hour = hour[tem.index(tem_max)] # 求最高温度\n",
" tem_min = min(tem)\n",
" tem_min_hour = hour[tem.index(tem_min)] # 求最低温度\n",
" x = []\n",
" y = []\n",
" for i in range(0, 24):\n",
" x.append(i)\n",
" y.append(tem[hour.index(i)])\n",
" plt.figure(1)\n",
" plt.plot(x, y, color='red', label='温度') # 画出温度曲线\n",
" plt.scatter(x, y, color='red') # 点出每个时刻的温度点\n",
" plt.plot([0, 24], [tem_ave, tem_ave], c='blue', linestyle='--', label='平均温度') # 画出平均温度虚线\n",
" plt.text(tem_max_hour + 0.15, tem_max + 0.15, str(tem_max), ha='center', va='bottom', fontsize=10.5) # 标出最高温度\n",
" plt.text(tem_min_hour + 0.15, tem_min + 0.15, str(tem_min), ha='center', va='bottom', fontsize=10.5) # 标出最低温度\n",
" plt.xticks(x)\n",
" plt.legend()\n",
" plt.title('一天温度变化曲线图')\n",
" plt.xlabel('时间/h')\n",
" plt.ylabel('摄氏度/℃')\n",
" plt.show()\n",
"\n",
"\n",
"def hum_curve(data):\n",
" \"\"\"相对湿度曲线绘制\"\"\"\n",
" hour = list(data['小时'])\n",
" hum = list(data['相对湿度'])\n",
" for i in range(0, 24):\n",
" if math.isnan(hum[i]) == True:\n",
" hum[i] = hum[i - 1]\n",
" hum_ave = sum(hum) / 24 # 求平均相对湿度\n",
" hum_max = max(hum)\n",
" hum_max_hour = hour[hum.index(hum_max)] # 求最高相对湿度\n",
" hum_min = min(hum)\n",
" hum_min_hour = hour[hum.index(hum_min)] # 求最低相对湿度\n",
" x = []\n",
" y = []\n",
" for i in range(0, 24):\n",
" x.append(i)\n",
" y.append(hum[hour.index(i)])\n",
" plt.figure(2)\n",
" plt.plot(x, y, color='blue', label='相对湿度') # 画出相对湿度曲线\n",
" plt.scatter(x, y, color='blue') # 点出每个时刻的相对湿度\n",
" plt.plot([0, 24], [hum_ave, hum_ave], c='red', linestyle='--', label='平均相对湿度') # 画出平均相对湿度虚线\n",
" plt.text(hum_max_hour + 0.15, hum_max + 0.15, str(hum_max), ha='center', va='bottom', fontsize=10.5) # 标出最高相对湿度\n",
" plt.text(hum_min_hour + 0.15, hum_min + 0.15, str(hum_min), ha='center', va='bottom', fontsize=10.5) # 标出最低相对湿度\n",
" plt.xticks(x)\n",
" plt.legend()\n",
" plt.title('一天相对湿度变化曲线图')\n",
" plt.xlabel('时间/h')\n",
" plt.ylabel('百分比/%')\n",
" plt.show()\n",
"\n",
"\n",
"def main():\n",
" plt.rcParams['font.sans-serif'] = ['SimHei'] # 解决中文显示问题\n",
" plt.rcParams['axes.unicode_minus'] = False # 解决负号显示问题\n",
" data1 = pd.read_csv('weather1.csv', encoding='gb2312')\n",
" print(data1)\n",
" tem_curve(data1)\n",
" hum_curve(data1)\n",
"\n",
"if __name__ == '__main__':\n",
" main()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.7"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Loading…
Cancel
Save