diff --git a/新冠——张嘉宇.ipynb b/新冠——张嘉宇.ipynb new file mode 100644 index 0000000..a51edd9 --- /dev/null +++ b/新冠——张嘉宇.ipynb @@ -0,0 +1,176 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 3, + "id": "4c38cdbb", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "200\n", + "today_province_2022_04_12.csv 保存成功\n" + ] + }, + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "import json\n", + "import time\n", + "import os\n", + "import requests\n", + "import pandas as pd\n", + "import matplotlib.pyplot as plt\n", + "import matplotlib as mpl\n", + "\n", + "def get_html(Url, header):\n", + " try:\n", + " r = requests.get(url=Url, headers=header)\n", + " #向网站发起请求,并获取响应对象参数url :需要抓取的URL地址headers : 请求头\n", + " r.encoding = r.apparent_encoding\n", + " #改变我们获取的页面的编码方式,因为r.encoding是它在返回信息的头字段中推断的这个页面的可能的编码方式,但是有很大的概率是错误的,错误的编码方式就会引起我们在观察返回的页面信息充满了各种各样的乱码。\n", + " status = r.status_code #状态研发代码\n", + " # 将原始数据类型转换为json类型,方便处理\n", + " data_json = json.loads(r.text) #,因为JSON表示出来就是一个字符串,可以被所有语言读取,也可以方便地存储到磁盘或者通过网络传输,所以通常将python类型的数据转化为JSON类型的数据,完成数据交换\n", + " print(status)\n", + " return data_json\n", + " except:\n", + " print(\"爬取失败\")\n", + "\n", + "\n", + "# 将提取34个省数据的方法封装为函数\n", + "def get_data(data, info_list):\n", + " # 直接提取[\"id\",\"name\",\"lastUpdateTime\"] 的数据\n", + " info = pd.DataFrame(data)[info_list]\n", + "\n", + " # 获取today的数据\n", + " today_data = pd.DataFrame([province[\"today\"] for province in data])\n", + " # 修改列名\n", + " today_data.columns = [\"today_\" + i for i in today_data.columns]\n", + "\n", + " # 获取total的数据\n", + " total_data = pd.DataFrame([province[\"total\"] for province in data])\n", + " # 修改列名\n", + " total_data.columns = [\"total_\" + i for i in total_data.columns]\n", + "\n", + " return pd.concat([info, today_data, total_data], axis=1)\n", + "\n", + "\n", + "def save_data(data, name):\n", + " \"\"\"定义保存数据的函数\"\"\"\n", + " # 保存的文件名名称\n", + " file_name = name + \"_\" + time.strftime(\"%Y_%m_%d\", time.localtime(time.time())) + \".csv\"\n", + "\n", + " data.to_csv(file_name, index=None, encoding=\"utf_8_sig\")\n", + "\n", + " # 检查是否保存成功,并打印提示文本\n", + " if os.path.exists(file_name):\n", + " print(file_name + \" 保存成功\")\n", + " else:\n", + " print('保存失败')\n", + "\n", + "\n", + "if __name__ == \"__main__\":\n", + " # 访问网易实时疫情播报平台网址\n", + " url = \"https://c.m.163.com/ug/api/wuhan/app/data/list-total\"\n", + "\n", + " # 设求置请头,伪装为浏览器\n", + " headers = {\n", + " 'user-agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) \\\n", + " AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.149 Safari/537.36'\n", + " }\n", + "\n", + " \"\"\"爬取中国各省的疫情数据\"\"\"\n", + " # 1.获取数据(此时的数据未经处理)\n", + " datas = get_html(url, headers)\n", + "\n", + " # 2.找到储存中国34省的数据所在\n", + " data_province = datas[\"data\"][\"areaTree\"][2][\"children\"]\n", + "\n", + " # 3.提取34个省数据\n", + " all_data = get_data(data_province, [\"id\", \"name\", \"lastUpdateTime\"])\n", + "\n", + " # 4.持久化保存数据\n", + " save_data(all_data, \"today_province\")\n", + "\n", + "mpl.rcParams['font.family']=[\"DengXian\", \"sans-serif\"] # 设置字体样式\n", + "\n", + "labels = ['香港', '湖北', '吉林', '台湾', '上海']\n", + "men_means = [308426, 68392, 37897, 28040, 12283]\n", + "women_means = [251, 0, 187, 630, 917]\n", + "men_std = [2, 3, 4, 1, 2]\n", + "women_std = [3, 5, 2, 3, 3]\n", + "width = 0.35\n", + "# 它是用来创建 总画布/figure“窗口”的,有figure就可以在上边(或其中一个子网格/subplot上)作图了,做一个1*1的图,\n", + "fig, ax = plt.subplots()\n", + "# bar()函数参数说明\n", + "# x 一个标量序列,代表柱状图的x坐标,默认x取值是每个柱状图所在的中点位置,或者也可以是柱状图左侧边缘位置。\n", + "# height 一个标量或者是标量序列,代表柱状图的高度。\n", + "# width 可选参数,标量或类数组,柱状图的默认宽度值为 0.8。\n", + "# bottom 可选参数,标量或类数组,柱状图的y坐标默认为None。\n", + "# algin 有两个可选项 {\"center\",\"edge\"},默认为 'center',该参数决定 x 值位于柱状图的位置\n", + "#tick_label 条形图的刻度标签。默认值。无 (使用默认的数字标签。)\n", + "# xerr, yerr: 分别针对水平、垂直型误差\n", + "ax.bar(labels, men_means, width, yerr=men_std, label='总计')\n", + "ax.bar(labels, women_means, width, yerr=women_std, bottom=men_means, label='新增')\n", + "# ylabel:此参数是标签文本。\n", + "ax.set_ylabel('新冠')\n", + "ax.set_title('2022-4-11')\n", + "# 说明图例的位置等相关属性,直接调用legend(),不传入任何参数,将会自动获取图例句柄和与之相对应的图例标签\n", + "ax.legend()\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ff3f0a15", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.7" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}