|
|
|
@ -0,0 +1,149 @@
|
|
|
|
|
# -*- coding: utf-8 -*-
|
|
|
|
|
"""
|
|
|
|
|
Created on Wed Jun 5 19:22:56 2024
|
|
|
|
|
|
|
|
|
|
@author: 44665
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
import requests
|
|
|
|
|
from lxml import etree
|
|
|
|
|
import csv
|
|
|
|
|
import pandas as pd
|
|
|
|
|
import matplotlib.pyplot as plt
|
|
|
|
|
|
|
|
|
|
url = "https://www.bitpush.news/covid19/"
|
|
|
|
|
|
|
|
|
|
headers = {
|
|
|
|
|
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.69 Safari/537.36"
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
response = requests.get(url, headers=headers)
|
|
|
|
|
response.raise_for_status() # 检查是否请求成功
|
|
|
|
|
except requests.exceptions.RequestException as e:
|
|
|
|
|
print("请求错误:", e)
|
|
|
|
|
exit()
|
|
|
|
|
|
|
|
|
|
html = response.text
|
|
|
|
|
|
|
|
|
|
parse = etree.HTMLParser(encoding='utf-8')
|
|
|
|
|
doc = etree.HTML(html)
|
|
|
|
|
|
|
|
|
|
# 解析国家、确诊人数和死亡人数
|
|
|
|
|
country = doc.xpath('//div[@class="table_container"]//tbody/tr/td/span/text()')
|
|
|
|
|
person = doc.xpath('//div[@class="table_container"]//tbody/tr/td[2]/text()')
|
|
|
|
|
death = doc.xpath('//div[@class="table_container"]//tbody/tr/td[3]/text()')
|
|
|
|
|
a = country.index('加州')
|
|
|
|
|
state = country[a:]
|
|
|
|
|
state_person = person[a:]
|
|
|
|
|
state_death = death[a:]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# 去除确诊人数和死亡人数中的逗号,并转换为整数
|
|
|
|
|
person = [int(x.replace(",", "")) for x in person]
|
|
|
|
|
death = [int(x.replace(",", "")) for x in death]
|
|
|
|
|
state_person = [int(x.replace(",", "")) for x in state_person]
|
|
|
|
|
state_death = [int(x.replace(",", "")) for x in state_death]
|
|
|
|
|
# 将数据存储到列表中
|
|
|
|
|
message = list(zip(country, person, death))
|
|
|
|
|
message = sorted(message, key=lambda x: x[-1], reverse=True)
|
|
|
|
|
|
|
|
|
|
# 将数据写入 CSV 文件
|
|
|
|
|
with open("content.csv", "w", newline='', encoding='utf-8') as f:
|
|
|
|
|
w = csv.writer(f)
|
|
|
|
|
w.writerows(message)
|
|
|
|
|
|
|
|
|
|
# 读取 CSV 文件并创建 DataFrame
|
|
|
|
|
df = pd.read_csv("content.csv", names=["country", "person", "death"])
|
|
|
|
|
df = df.drop(0)
|
|
|
|
|
df = df.head(10)
|
|
|
|
|
|
|
|
|
|
# 绘制柱状图
|
|
|
|
|
plt.rcParams['font.sans-serif'] = ['SimHei']
|
|
|
|
|
plt.rcParams['figure.figsize'] = (10, 5)
|
|
|
|
|
|
|
|
|
|
x = df["country"].values
|
|
|
|
|
y = df["death"].values
|
|
|
|
|
z = df['person'].values
|
|
|
|
|
|
|
|
|
|
colors=['red','green','blue','cyan','grey','black','yellow','orange','pink','purple']
|
|
|
|
|
plt.bar(x, y,color=colors)
|
|
|
|
|
plt.xlabel("国家", fontsize=14)
|
|
|
|
|
plt.ylabel("死亡人数", fontsize=14)
|
|
|
|
|
plt.title("COVID-19 死亡人数排名前十国家", fontsize=16)
|
|
|
|
|
for i,j in zip(x,y):
|
|
|
|
|
plt.text(i,j,j,ha='center')
|
|
|
|
|
plt.xticks(rotation=45) # x轴标签旋转45度,防止重叠
|
|
|
|
|
plt.show()
|
|
|
|
|
#绘制饼图
|
|
|
|
|
plt.rcParams['figure.figsize'] = (15, 10)
|
|
|
|
|
colors1=['maroon','crimson','yellow','olive','cyan','lavender','purple','teal','pink','magenta']
|
|
|
|
|
explode=[0.1,0,0,0,0,0,0,0,0,0]
|
|
|
|
|
labels=['美国','巴西','印度','俄罗斯','墨西哥','秘鲁','英国','印尼','意大利','伊朗']
|
|
|
|
|
plt.pie(y,explode=explode,labels=labels,colors=colors1,shadow=True,autopct='%.2f%%')
|
|
|
|
|
plt.legend()
|
|
|
|
|
plt.title('COVID-19 死亡人数排名前十国家')
|
|
|
|
|
plt.show()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#将数据重新存储一个列表
|
|
|
|
|
message = list(zip(country, person, death))
|
|
|
|
|
message = sorted(message, key=lambda x: x[-2], reverse=True)
|
|
|
|
|
|
|
|
|
|
# 将数据写入 CSV 文件
|
|
|
|
|
with open("content.csv", "w", newline='', encoding='utf-8') as f:
|
|
|
|
|
w = csv.writer(f)
|
|
|
|
|
w.writerows(message)
|
|
|
|
|
|
|
|
|
|
# 读取 CSV 文件并创建 DataFrame
|
|
|
|
|
df = pd.read_csv("content.csv", names=["country", "person", "death"])
|
|
|
|
|
df = df.drop(0)
|
|
|
|
|
df = df.head(20)
|
|
|
|
|
|
|
|
|
|
# 绘制两幅折线图
|
|
|
|
|
plt.rcParams['font.sans-serif'] = ['SimHei']
|
|
|
|
|
plt.rcParams['figure.figsize'] = (15, 12)
|
|
|
|
|
|
|
|
|
|
x = df["country"].values
|
|
|
|
|
y = df["death"].values
|
|
|
|
|
z = df['person'].values
|
|
|
|
|
|
|
|
|
|
plt.subplot(211)
|
|
|
|
|
plt.plot(x,z,color='crimson',marker='*',linestyle='--',label='确诊人数')
|
|
|
|
|
plt.legend(loc='upper right')
|
|
|
|
|
plt.xlabel("国家", fontsize=14)
|
|
|
|
|
plt.ylabel("确诊人数", fontsize=14)
|
|
|
|
|
plt.xticks(rotation=45) # x轴标签旋转45度,防止重叠
|
|
|
|
|
plt.title("COVID-19 确诊人数排名前二十国家确诊人数与死亡人数对比", fontsize=16)
|
|
|
|
|
plt.subplot(212)
|
|
|
|
|
plt.plot(x,y,color='teal',linestyle='-',marker='D',label='死亡人数')
|
|
|
|
|
plt.xlabel("国家", fontsize=14)
|
|
|
|
|
plt.ylabel("死亡人数", fontsize=14)
|
|
|
|
|
plt.legend(loc='upper right')
|
|
|
|
|
plt.xticks(rotation=45) # x轴标签旋转45度,防止重叠
|
|
|
|
|
plt.show()
|
|
|
|
|
|
|
|
|
|
#将州数据再写进列表
|
|
|
|
|
state_message = list(zip(state, state_person, state_death))
|
|
|
|
|
state_message = sorted(state_message, key=lambda x: x[-1], reverse=True)
|
|
|
|
|
# 将数据写入 CSV 文件
|
|
|
|
|
with open("state_content.csv", "w", newline='', encoding='utf-8') as f:
|
|
|
|
|
w = csv.writer(f)
|
|
|
|
|
w.writerows(state_message)
|
|
|
|
|
|
|
|
|
|
# 读取 CSV 文件并创建 DataFrame
|
|
|
|
|
df1 = pd.read_csv("state_content.csv", names=["state", "state_person", "state_death"])
|
|
|
|
|
df1 = df1.drop(0)
|
|
|
|
|
df1 = df1.head(10)
|
|
|
|
|
#绘制柱状图
|
|
|
|
|
plt.rcParams['font.sans-serif'] = ['SimHei']
|
|
|
|
|
plt.rcParams['figure.figsize'] = (10, 5)
|
|
|
|
|
|
|
|
|
|
colors1=['maroon','crimson','yellow','olive','cyan','lavender','purple','teal','pink','magenta']
|
|
|
|
|
x = df1["state"].values
|
|
|
|
|
y = df1["state_death"].values
|
|
|
|
|
plt.barh(x,y,height=0.8,color=colors1)
|
|
|
|
|
plt.xlabel('死亡人数')
|
|
|
|
|
plt.ylabel('州')
|
|
|
|
|
plt.title('美国COVID-19 死亡人数排名前十的州')
|
|
|
|
|
plt.show()
|