You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
Jen_Jon/新冠疫情各国死亡人数爬虫.py

149 lines
5.0 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

# -*- coding: utf-8 -*-
"""
Created on Wed Jun 5 19:22:56 2024
@author: 44665
"""
import requests
from lxml import etree
import csv
import pandas as pd
import matplotlib.pyplot as plt
url = "https://www.bitpush.news/covid19/"
headers = {
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.69 Safari/537.36"
}
try:
response = requests.get(url, headers=headers)
response.raise_for_status() # 检查是否请求成功
except requests.exceptions.RequestException as e:
print("请求错误:", e)
exit()
html = response.text
parse = etree.HTMLParser(encoding='utf-8')
doc = etree.HTML(html)
# 解析国家、确诊人数和死亡人数
country = doc.xpath('//div[@class="table_container"]//tbody/tr/td/span/text()')
person = doc.xpath('//div[@class="table_container"]//tbody/tr/td[2]/text()')
death = doc.xpath('//div[@class="table_container"]//tbody/tr/td[3]/text()')
a = country.index('加州')
state = country[a:]
state_person = person[a:]
state_death = death[a:]
# 去除确诊人数和死亡人数中的逗号,并转换为整数
person = [int(x.replace(",", "")) for x in person]
death = [int(x.replace(",", "")) for x in death]
state_person = [int(x.replace(",", "")) for x in state_person]
state_death = [int(x.replace(",", "")) for x in state_death]
# 将数据存储到列表中
message = list(zip(country, person, death))
message = sorted(message, key=lambda x: x[-1], reverse=True)
# 将数据写入 CSV 文件
with open("content.csv", "w", newline='', encoding='utf-8') as f:
w = csv.writer(f)
w.writerows(message)
# 读取 CSV 文件并创建 DataFrame
df = pd.read_csv("content.csv", names=["country", "person", "death"])
df = df.drop(0)
df = df.head(10)
# 绘制柱状图
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['figure.figsize'] = (10, 5)
x = df["country"].values
y = df["death"].values
z = df['person'].values
colors=['red','green','blue','cyan','grey','black','yellow','orange','pink','purple']
plt.bar(x, y,color=colors)
plt.xlabel("国家", fontsize=14)
plt.ylabel("死亡人数", fontsize=14)
plt.title("COVID-19 死亡人数排名前十国家", fontsize=16)
for i,j in zip(x,y):
plt.text(i,j,j,ha='center')
plt.xticks(rotation=45) # x轴标签旋转45度防止重叠
plt.show()
#绘制饼图
plt.rcParams['figure.figsize'] = (15, 10)
colors1=['maroon','crimson','yellow','olive','cyan','lavender','purple','teal','pink','magenta']
explode=[0.1,0,0,0,0,0,0,0,0,0]
labels=['美国','巴西','印度','俄罗斯','墨西哥','秘鲁','英国','印尼','意大利','伊朗']
plt.pie(y,explode=explode,labels=labels,colors=colors1,shadow=True,autopct='%.2f%%')
plt.legend()
plt.title('COVID-19 死亡人数排名前十国家')
plt.show()
#将数据重新存储一个列表
message = list(zip(country, person, death))
message = sorted(message, key=lambda x: x[-2], reverse=True)
# 将数据写入 CSV 文件
with open("content.csv", "w", newline='', encoding='utf-8') as f:
w = csv.writer(f)
w.writerows(message)
# 读取 CSV 文件并创建 DataFrame
df = pd.read_csv("content.csv", names=["country", "person", "death"])
df = df.drop(0)
df = df.head(20)
# 绘制两幅折线图
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['figure.figsize'] = (15, 12)
x = df["country"].values
y = df["death"].values
z = df['person'].values
plt.subplot(211)
plt.plot(x,z,color='crimson',marker='*',linestyle='--',label='确诊人数')
plt.legend(loc='upper right')
plt.xlabel("国家", fontsize=14)
plt.ylabel("确诊人数", fontsize=14)
plt.xticks(rotation=45) # x轴标签旋转45度防止重叠
plt.title("COVID-19 确诊人数排名前二十国家确诊人数与死亡人数对比", fontsize=16)
plt.subplot(212)
plt.plot(x,y,color='teal',linestyle='-',marker='D',label='死亡人数')
plt.xlabel("国家", fontsize=14)
plt.ylabel("死亡人数", fontsize=14)
plt.legend(loc='upper right')
plt.xticks(rotation=45) # x轴标签旋转45度防止重叠
plt.show()
#将州数据再写进列表
state_message = list(zip(state, state_person, state_death))
state_message = sorted(state_message, key=lambda x: x[-1], reverse=True)
# 将数据写入 CSV 文件
with open("state_content.csv", "w", newline='', encoding='utf-8') as f:
w = csv.writer(f)
w.writerows(state_message)
# 读取 CSV 文件并创建 DataFrame
df1 = pd.read_csv("state_content.csv", names=["state", "state_person", "state_death"])
df1 = df1.drop(0)
df1 = df1.head(10)
#绘制柱状图
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['figure.figsize'] = (10, 5)
colors1=['maroon','crimson','yellow','olive','cyan','lavender','purple','teal','pink','magenta']
x = df1["state"].values
y = df1["state_death"].values
plt.barh(x,y,height=0.8,color=colors1)
plt.xlabel('死亡人数')
plt.ylabel('')
plt.title('美国COVID-19 死亡人数排名前十的州')
plt.show()