|
|
|
@ -0,0 +1,142 @@
|
|
|
|
|
import requests
|
|
|
|
|
from lxml import etree
|
|
|
|
|
import csv
|
|
|
|
|
import pandas as pd
|
|
|
|
|
import matplotlib.pyplot as plt
|
|
|
|
|
# 步骤一(替换sans-serif字体)
|
|
|
|
|
plt.rcParams['font.sans-serif'] = ['SimHei']
|
|
|
|
|
# 步骤二(解决坐标轴负数的负号显示问题)
|
|
|
|
|
plt.rcParams['axes.unicode_minus'] = False
|
|
|
|
|
# 设置请求头信息
|
|
|
|
|
headers = {
|
|
|
|
|
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.69 Safari/537.36"
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
url = "https://www.bitpush.news/covid19/"
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
# 发起 GET 请求
|
|
|
|
|
response = requests.get(url, headers=headers)
|
|
|
|
|
response.raise_for_status()
|
|
|
|
|
response.encoding = response.apparent_encoding # 确保编码正确
|
|
|
|
|
html = response.text
|
|
|
|
|
except requests.RequestException as e:
|
|
|
|
|
print(f"请求出错: {e}")
|
|
|
|
|
html = ""
|
|
|
|
|
|
|
|
|
|
if html:
|
|
|
|
|
# 解析 HTML
|
|
|
|
|
doc = etree.HTML(html)
|
|
|
|
|
|
|
|
|
|
# 获取国家名称
|
|
|
|
|
country= doc.xpath('//div[@class="table_container"]//tbody/tr/td/span/text()')
|
|
|
|
|
# 获取确诊人数
|
|
|
|
|
person = doc.xpath('//div[@class="table_container"]//tbody/tr/td[2]/text()')
|
|
|
|
|
person = [x.replace(",", "") for x in person]
|
|
|
|
|
# 获取死亡人数
|
|
|
|
|
death = doc.xpath('//div[@class="table_container"]//tbody/tr/td[3]/text()')
|
|
|
|
|
death = [x.replace(",", "") for x in death]
|
|
|
|
|
|
|
|
|
|
# 组合数据
|
|
|
|
|
message = list(zip(country, person, death))
|
|
|
|
|
# 按死亡人数排序
|
|
|
|
|
message1 = sorted(message, key=lambda x: int(x[-1]), reverse=True)
|
|
|
|
|
|
|
|
|
|
# 保存数据到CSV文件
|
|
|
|
|
with open("content1.csv", "w", newline='', encoding='utf-8') as f:
|
|
|
|
|
w = csv.writer(f)
|
|
|
|
|
w.writerow(["country", "person", "death"]) # 添加表头
|
|
|
|
|
w.writerows(message1)
|
|
|
|
|
|
|
|
|
|
# 读取数据
|
|
|
|
|
df = pd.read_csv("content1.csv")
|
|
|
|
|
df1 = df.head(10)
|
|
|
|
|
|
|
|
|
|
# 绘制柱状图
|
|
|
|
|
plt.figure(figsize=(12, 6))
|
|
|
|
|
plt.bar(df1["country"], df1["death"].astype(int))
|
|
|
|
|
plt.xlabel("国家", fontsize=14)
|
|
|
|
|
plt.ylabel("死亡人数", fontsize=14)
|
|
|
|
|
plt.title("COVID-19死亡人数前10的国家", fontsize=16)
|
|
|
|
|
|
|
|
|
|
# 显示每个柱的数值
|
|
|
|
|
for i, v in enumerate(df1["death"].astype(int)):
|
|
|
|
|
plt.text(i, v + 0.02 * max(df1["death"].astype(int)), str(v), ha='center', fontsize=12)
|
|
|
|
|
|
|
|
|
|
plt.xticks(rotation=45)
|
|
|
|
|
plt.tight_layout()
|
|
|
|
|
plt.show()
|
|
|
|
|
# 按确诊人数排序
|
|
|
|
|
message2 = sorted(message, key=lambda x: int(x[-2]), reverse=True)
|
|
|
|
|
|
|
|
|
|
# 保存数据到CSV文件
|
|
|
|
|
with open("content2.csv", "w", newline='', encoding='utf-8') as f:
|
|
|
|
|
w = csv.writer(f)
|
|
|
|
|
w.writerow(["country", "person", "death"]) # 添加表头
|
|
|
|
|
w.writerows(message2)
|
|
|
|
|
|
|
|
|
|
# 读取数据
|
|
|
|
|
df = pd.read_csv("content2.csv")
|
|
|
|
|
df2 = df.head(10)
|
|
|
|
|
|
|
|
|
|
# 绘制柱状图
|
|
|
|
|
plt.figure(figsize=(12, 6))
|
|
|
|
|
plt.bar(df2["country"], df2["person"].astype(int))
|
|
|
|
|
plt.xlabel("国家", fontsize=14)
|
|
|
|
|
plt.ylabel("确诊人数", fontsize=14)
|
|
|
|
|
plt.title("COVID-19确诊人数前10的国家", fontsize=16)
|
|
|
|
|
|
|
|
|
|
# 显示每个柱的数值
|
|
|
|
|
for i, v in enumerate(df2["person"].astype(int)):
|
|
|
|
|
plt.text(i, v + 0.02 * max(df2["person"].astype(int)), str(v), ha='center', fontsize=12)
|
|
|
|
|
|
|
|
|
|
plt.xticks(rotation=45)
|
|
|
|
|
plt.tight_layout()
|
|
|
|
|
plt.show()
|
|
|
|
|
# 获取州的名称
|
|
|
|
|
a=country.index('加州')
|
|
|
|
|
zhou=country[a:]
|
|
|
|
|
# 获取确诊人数
|
|
|
|
|
person1=person[a:]
|
|
|
|
|
# 获取死亡人数
|
|
|
|
|
death1=death[a:]
|
|
|
|
|
#组合数据
|
|
|
|
|
message0=list(zip(zhou,person1,death1))
|
|
|
|
|
# 按死亡人数排序
|
|
|
|
|
message3=sorted(message0, key=lambda x: int(x[-1]), reverse=True)
|
|
|
|
|
# 保存数据到CSV文件
|
|
|
|
|
with open("content3.csv", "w", newline='', encoding='utf-8') as f:
|
|
|
|
|
w = csv.writer(f)
|
|
|
|
|
w.writerow(["zhou", "person1", "death1"]) # 添加表头
|
|
|
|
|
w.writerows(message3)
|
|
|
|
|
# 读取数据
|
|
|
|
|
df = pd.read_csv("content3.csv")
|
|
|
|
|
df3 = df.head(10)
|
|
|
|
|
# 绘制折线图
|
|
|
|
|
plt.figure(figsize=(12, 6))
|
|
|
|
|
plt.plot(df3["zhou"], df3["death1"].astype(int), marker='o')
|
|
|
|
|
plt.xlabel("州", fontsize=14)
|
|
|
|
|
plt.ylabel("死亡人数", fontsize=14)
|
|
|
|
|
plt.title("美国COVID9死亡人数前十的州", fontsize=16)
|
|
|
|
|
# 显示每个点的数值
|
|
|
|
|
for i, v in enumerate(df3["death1"].astype(int)):
|
|
|
|
|
plt.text(i, v + 0.02 * max(df3["death1"].astype(int)), str(v), ha='center', fontsize=12)
|
|
|
|
|
# 按确诊人数排序
|
|
|
|
|
message4=sorted(message0, key=lambda x: int(x[-2]), reverse=True)
|
|
|
|
|
# 保存数据到CSV文件
|
|
|
|
|
with open("content4.csv", "w", newline='', encoding='utf-8') as f:
|
|
|
|
|
w = csv.writer(f)
|
|
|
|
|
w.writerow(["zhou", "person1", "death1"]) # 添加表头
|
|
|
|
|
w.writerows(message4)
|
|
|
|
|
# 读取数据
|
|
|
|
|
df = pd.read_csv("content4.csv")
|
|
|
|
|
df4 = df.head(10)
|
|
|
|
|
# 绘制折线图
|
|
|
|
|
plt.figure(figsize=(12, 6))
|
|
|
|
|
plt.plot(df4["zhou"], df4["person1"].astype(int), marker='o')
|
|
|
|
|
plt.xlabel("州", fontsize=14)
|
|
|
|
|
plt.ylabel("确诊人数", fontsize=14)
|
|
|
|
|
plt.title("美国COVID9确诊人数前十的州", fontsize=16)
|
|
|
|
|
# 显示每个点的数值
|
|
|
|
|
for i, v in enumerate(df4["person1"].astype(int)):
|
|
|
|
|
plt.text(i, v + 0.02 * max(df4["person1"].astype(int)), str(v), ha='center', fontsize=12)
|