From 41f84ae875f7ecb854eeccda6206bf971909c9a6 Mon Sep 17 00:00:00 2001 From: hnu202309010318 <1438377688@qq.com> Date: Thu, 6 Jun 2024 22:39:47 +0800 Subject: [PATCH] =?UTF-8?q?Delete=20'=E7=96=AB=E6=83=85=E6=95=B0=E6=8D=AE?= =?UTF-8?q?=E7=88=AC=E5=8F=96.py'?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- 疫情数据爬取.py | 142 ------------------------------------------ 1 file changed, 142 deletions(-) delete mode 100644 疫情数据爬取.py diff --git a/疫情数据爬取.py b/疫情数据爬取.py deleted file mode 100644 index ff0df47..0000000 --- a/疫情数据爬取.py +++ /dev/null @@ -1,142 +0,0 @@ -import requests -from lxml import etree -import csv -import pandas as pd -import matplotlib.pyplot as plt -# 步骤一(替换sans-serif字体) -plt.rcParams['font.sans-serif'] = ['SimHei'] -# 步骤二(解决坐标轴负数的负号显示问题) -plt.rcParams['axes.unicode_minus'] = False -# 设置请求头信息 -headers = { - "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.69 Safari/537.36" -} - -url = "https://www.bitpush.news/covid19/" - -try: - # 发起 GET 请求 - response = requests.get(url, headers=headers) - response.raise_for_status() - response.encoding = response.apparent_encoding # 确保编码正确 - html = response.text -except requests.RequestException as e: - print(f"请求出错: {e}") - html = "" - -if html: - # 解析 HTML - doc = etree.HTML(html) - - # 获取国家名称 - country= doc.xpath('//div[@class="table_container"]//tbody/tr/td/span/text()') - # 获取确诊人数 - person = doc.xpath('//div[@class="table_container"]//tbody/tr/td[2]/text()') - person = [x.replace(",", "") for x in person] - # 获取死亡人数 - death = doc.xpath('//div[@class="table_container"]//tbody/tr/td[3]/text()') - death = [x.replace(",", "") for x in death] - - # 组合数据 - message = list(zip(country, person, death)) - # 按死亡人数排序 - message1 = sorted(message, key=lambda x: int(x[-1]), reverse=True) - - # 保存数据到CSV文件 - with open("content1.csv", "w", newline='', encoding='utf-8') as f: - w = csv.writer(f) - w.writerow(["country", "person", "death"]) # 添加表头 - w.writerows(message1) - - # 读取数据 - df = pd.read_csv("content1.csv") - df1 = df.head(10) - - # 绘制柱状图 - plt.figure(figsize=(12, 6)) - plt.bar(df1["country"], df1["death"].astype(int)) - plt.xlabel("国家", fontsize=14) - plt.ylabel("死亡人数", fontsize=14) - plt.title("COVID-19死亡人数前10的国家", fontsize=16) - - # 显示每个柱的数值 - for i, v in enumerate(df1["death"].astype(int)): - plt.text(i, v + 0.02 * max(df1["death"].astype(int)), str(v), ha='center', fontsize=12) - - plt.xticks(rotation=45) - plt.tight_layout() - plt.show() - # 按确诊人数排序 - message2 = sorted(message, key=lambda x: int(x[-2]), reverse=True) - - # 保存数据到CSV文件 - with open("content2.csv", "w", newline='', encoding='utf-8') as f: - w = csv.writer(f) - w.writerow(["country", "person", "death"]) # 添加表头 - w.writerows(message2) - - # 读取数据 - df = pd.read_csv("content2.csv") - df2 = df.head(10) - - # 绘制柱状图 - plt.figure(figsize=(12, 6)) - plt.bar(df2["country"], df2["person"].astype(int)) - plt.xlabel("国家", fontsize=14) - plt.ylabel("确诊人数", fontsize=14) - plt.title("COVID-19确诊人数前10的国家", fontsize=16) - - # 显示每个柱的数值 - for i, v in enumerate(df2["person"].astype(int)): - plt.text(i, v + 0.02 * max(df2["death"].astype(int)), str(v), ha='center', fontsize=12) - - plt.xticks(rotation=45) - plt.tight_layout() - plt.show() - # 获取州的名称 - a=country.index('加州') - zhou=country[a:] - # 获取确诊人数 - person1=person[a:] - # 获取死亡人数 - death1=death[a:] - #组合数据 - message0=list(zip(zhou,person1,death1)) - # 按死亡人数排序 - message3=sorted(message0, key=lambda x: int(x[-1]), reverse=True) - # 保存数据到CSV文件 - with open("content3.csv", "w", newline='', encoding='utf-8') as f: - w = csv.writer(f) - w.writerow(["zhou", "person1", "death1"]) # 添加表头 - w.writerows(message3) - # 读取数据 - df = pd.read_csv("content3.csv") - df3 = df.head(10) - # 绘制折线图 - plt.figure(figsize=(12, 6)) - plt.plot(df3["zhou"], df3["death1"].astype(int), marker='o') - plt.xlabel("州", fontsize=14) - plt.ylabel("死亡人数", fontsize=14) - plt.title("美国COVID9死亡人数前十的州", fontsize=16) - # 显示每个点的数值 - for i, v in enumerate(df3["death1"].astype(int)): - plt.text(i, v + 0.02 * max(df3["death1"].astype(int)), str(v), ha='center', fontsize=12) - # 按确诊人数排序 - message4=sorted(message0, key=lambda x: int(x[-2]), reverse=True) - # 保存数据到CSV文件 - with open("content4.csv", "w", newline='', encoding='utf-8') as f: - w = csv.writer(f) - w.writerow(["zhou", "person1", "death1"]) # 添加表头 - w.writerows(message4) - # 读取数据 - df = pd.read_csv("content4.csv") - df4 = df.head(10) - # 绘制折线图 - plt.figure(figsize=(12, 6)) - plt.plot(df3["zhou"], df3["person1"].astype(int), marker='o') - plt.xlabel("州", fontsize=14) - plt.ylabel("确诊人数", fontsize=14) - plt.title("美国COVID9确诊人数前十的州", fontsize=16) - # 显示每个点的数值 - for i, v in enumerate(df3["person1"].astype(int)): - plt.text(i, v + 0.02 * max(df3["death1"].astype(int)), str(v), ha='center', fontsize=12) \ No newline at end of file