diff --git a/天气——吴庭顺.ipynb b/天气——吴庭顺.ipynb new file mode 100644 index 0000000..9cc71e8 --- /dev/null +++ b/天气——吴庭顺.ipynb @@ -0,0 +1,242 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "b3ba4d4c", + "metadata": {}, + "outputs": [], + "source": [ + "import requests\n", + "from bs4 import BeautifulSoup\n", + "import csv\n", + "import json\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import pandas as pd\n", + "import math\n", + "\n", + "\n", + "def getHTMLtext(url):\n", + " \"\"\"请求获得网页内容\"\"\"\n", + " try:\n", + " r = requests.get(url, timeout=30)\n", + " r.raise_for_status()\n", + " r.encoding = r.apparent_encoding\n", + " print(\"成功访问\")\n", + " return r.text\n", + " except:\n", + " print(\"访问错误\")\n", + " return \" \"\n", + "\n", + "\n", + "def get_content(html):\n", + " \"\"\"处理得到有用信息保存数据文件\"\"\"\n", + " final = [] # 初始化一个列表保存数据\n", + " bs = BeautifulSoup(html, \"html.parser\") # 创建BeautifulSoup对象\n", + " body = bs.body\n", + " data = body.find('div', {'id': '7d'}) # 找到div标签且id = 7d\n", + " # 下面爬取当天的数据\n", + " data2 = body.find_all('div', {'class': 'left-div'})\n", + " text = data2[2].find('script').string\n", + " text = text[text.index('=') + 1:-2] # 移除改var data=将其变为json数据\n", + " jd = json.loads(text)\n", + " dayone = jd['od']['od2'] # 找到当天的数据\n", + " final_day = [] # 存放当天的数据\n", + " count = 0\n", + " for i in dayone:\n", + " temp = []\n", + " if count <= 23:\n", + " temp.append(i['od21']) # 添加时间\n", + " temp.append(i['od22']) # 添加当前时刻温度\n", + " temp.append(i['od24']) # 添加当前时刻风力方向\n", + " temp.append(i['od25']) # 添加当前时刻风级\n", + " temp.append(i['od26']) # 添加当前时刻降水量\n", + " temp.append(i['od27']) # 添加当前时刻相对湿度\n", + " temp.append(i['od28']) # 添加当前时刻控制质量\n", + " # print(temp)\n", + " final_day.append(temp)\n", + " count = count + 1\n", + " # 下面爬取7天的数据\n", + " ul = data.find('ul') # 找到所有的ul标签\n", + " li = ul.find_all('li') # 找到左右的li标签\n", + " i = 0 # 控制爬取的天数\n", + " for day in li: # 遍历找到的每一个li\n", + " if i < 7 and i > 0:\n", + " temp = [] # 临时存放每天的数据\n", + " date = day.find('h1').string # 得到日期\n", + " date = date[0:date.index('日')] # 取出日期号\n", + " temp.append(date)\n", + " inf = day.find_all('p') # 找出li下面的p标签,提取第一个p标签的值,即天气\n", + " temp.append(inf[0].string)\n", + "\n", + " tem_low = inf[1].find('i').string # 找到最低气温\n", + "\n", + " if inf[1].find('span') is None: # 天气预报可能没有最高气温\n", + " tem_high = None\n", + " else:\n", + " tem_high = inf[1].find('span').string # 找到最高气温\n", + " temp.append(tem_low[:-1])\n", + " if tem_high[-1] == '℃':\n", + " temp.append(tem_high[:-1])\n", + " else:\n", + " temp.append(tem_high)\n", + "\n", + " wind = inf[2].find_all('span') # 找到风向\n", + " for j in wind:\n", + " temp.append(j['title'])\n", + "\n", + " wind_scale = inf[2].find('i').string # 找到风级\n", + " index1 = wind_scale.index('级')\n", + " temp.append(int(wind_scale[index1 - 1:index1]))\n", + " final.append(temp)\n", + " i = i + 1\n", + " return final_day, final\n", + "\n", + "\n", + "# print(final)\n", + "def get_content2(html):\n", + " \"\"\"处理得到有用信息保存数据文件\"\"\"\n", + " final = [] # 初始化一个列表保存数据\n", + " bs = BeautifulSoup(html, \"html.parser\") # 创建BeautifulSoup对象\n", + " body = bs.body\n", + " data = body.find('div', {'id': '15d'}) # 找到div标签且id = 15d\n", + " ul = data.find('ul') # 找到所有的ul标签\n", + " li = ul.find_all('li') # 找到左右的li标签\n", + " final = []\n", + " i = 0 # 控制爬取的天数\n", + " for day in li: # 遍历找到的每一个li\n", + " if i < 8:\n", + " temp = [] # 临时存放每天的数据\n", + " date = day.find('span', {'class': 'time'}).string # 得到日期\n", + " date = date[date.index('(') + 1:-2] # 取出日期号\n", + " temp.append(date)\n", + " weather = day.find('span', {'class': 'wea'}).string # 找到天气\n", + " temp.append(weather)\n", + " tem = day.find('span', {'class': 'tem'}).text # 找到温度\n", + " temp.append(tem[tem.index('/') + 1:-1]) # 找到最低气温\n", + " temp.append(tem[:tem.index('/') - 1]) # 找到最高气温\n", + " wind = day.find('span', {'class': 'wind'}).string # 找到风向\n", + " if '转' in wind: # 如果有风向变化\n", + " temp.append(wind[:wind.index('转')])\n", + " temp.append(wind[wind.index('转') + 1:])\n", + " else: # 如果没有风向变化,前后风向一致\n", + " temp.append(wind)\n", + " temp.append(wind)\n", + " wind_scale = day.find('span', {'class': 'wind1'}).string # 找到风级\n", + " index1 = wind_scale.index('级')\n", + " temp.append(int(wind_scale[index1 - 1:index1]))\n", + "\n", + " final.append(temp)\n", + " return final\n", + "\n", + "\n", + "def write_to_csv(file_name, data, day=14):\n", + " \"\"\"保存为csv文件\"\"\"\n", + " with open(file_name, 'a', errors='ignore', newline='') as f:\n", + " if day == 14:\n", + " header = ['日期', '天气', '最低气温', '最高气温', '风向1', '风向2', '风级']\n", + " else:\n", + " header = ['小时', '温度', '风力方向', '风级', '降水量', '相对湿度', '空气质量']\n", + " f_csv = csv.writer(f)\n", + " f_csv.writerow(header)\n", + " f_csv.writerows(data)\n", + "\n", + "#数据可视化\n", + "def tem_curve(data):\n", + " \"\"\"温度曲线绘制\"\"\"\n", + " hour = list(data['小时'])\n", + " tem = list(data['温度'])\n", + " for i in range(0, 24):\n", + " if math.isnan(tem[i]) == True:\n", + " tem[i] = tem[i - 1]\n", + " tem_ave = sum(tem) / 24 # 求平均温度\n", + " tem_max = max(tem)\n", + " tem_max_hour = hour[tem.index(tem_max)] # 求最高温度\n", + " tem_min = min(tem)\n", + " tem_min_hour = hour[tem.index(tem_min)] # 求最低温度\n", + " x = []\n", + " y = []\n", + " for i in range(0, 24):\n", + " x.append(i)\n", + " y.append(tem[hour.index(i)])\n", + " plt.figure(1)\n", + " plt.plot(x, y, color='red', label='温度') # 画出温度曲线\n", + " plt.scatter(x, y, color='red') # 点出每个时刻的温度点\n", + " plt.plot([0, 24], [tem_ave, tem_ave], c='blue', linestyle='--', label='平均温度') # 画出平均温度虚线\n", + " plt.text(tem_max_hour + 0.15, tem_max + 0.15, str(tem_max), ha='center', va='bottom', fontsize=10.5) # 标出最高温度\n", + " plt.text(tem_min_hour + 0.15, tem_min + 0.15, str(tem_min), ha='center', va='bottom', fontsize=10.5) # 标出最低温度\n", + " plt.xticks(x)\n", + " plt.legend()\n", + " plt.title('一天温度变化曲线图')\n", + " plt.xlabel('时间/h')\n", + " plt.ylabel('摄氏度/℃')\n", + " plt.show()\n", + "\n", + "\n", + "def hum_curve(data):\n", + " \"\"\"相对湿度曲线绘制\"\"\"\n", + " hour = list(data['小时'])\n", + " hum = list(data['相对湿度'])\n", + " for i in range(0, 24):\n", + " if math.isnan(hum[i]) == True:\n", + " hum[i] = hum[i - 1]\n", + " hum_ave = sum(hum) / 24 # 求平均相对湿度\n", + " hum_max = max(hum)\n", + " hum_max_hour = hour[hum.index(hum_max)] # 求最高相对湿度\n", + " hum_min = min(hum)\n", + " hum_min_hour = hour[hum.index(hum_min)] # 求最低相对湿度\n", + " x = []\n", + " y = []\n", + " for i in range(0, 24):\n", + " x.append(i)\n", + " y.append(hum[hour.index(i)])\n", + " plt.figure(2)\n", + " plt.plot(x, y, color='blue', label='相对湿度') # 画出相对湿度曲线\n", + " plt.scatter(x, y, color='blue') # 点出每个时刻的相对湿度\n", + " plt.plot([0, 24], [hum_ave, hum_ave], c='red', linestyle='--', label='平均相对湿度') # 画出平均相对湿度虚线\n", + " plt.text(hum_max_hour + 0.15, hum_max + 0.15, str(hum_max), ha='center', va='bottom', fontsize=10.5) # 标出最高相对湿度\n", + " plt.text(hum_min_hour + 0.15, hum_min + 0.15, str(hum_min), ha='center', va='bottom', fontsize=10.5) # 标出最低相对湿度\n", + " plt.xticks(x)\n", + " plt.legend()\n", + " plt.title('一天相对湿度变化曲线图')\n", + " plt.xlabel('时间/h')\n", + " plt.ylabel('百分比/%')\n", + " plt.show()\n", + "\n", + "\n", + "def main():\n", + " plt.rcParams['font.sans-serif'] = ['SimHei'] # 解决中文显示问题\n", + " plt.rcParams['axes.unicode_minus'] = False # 解决负号显示问题\n", + " data1 = pd.read_csv('weather1.csv', encoding='gb2312')\n", + " print(data1)\n", + " tem_curve(data1)\n", + " hum_curve(data1)\n", + "\n", + "if __name__ == '__main__':\n", + " main()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.7" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}