# -*- coding: utf-8 -*- """ Created on Wed Jun 5 19:22:56 2024 @author: 44665 """ import requests from lxml import etree import csv import pandas as pd import matplotlib.pyplot as plt url = "https://www.bitpush.news/covid19/" headers = { "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.69 Safari/537.36" } try: response = requests.get(url, headers=headers) response.raise_for_status() # 检查是否请求成功 except requests.exceptions.RequestException as e: print("请求错误:", e) exit() html = response.text parse = etree.HTMLParser(encoding='utf-8') doc = etree.HTML(html) # 解析国家、确诊人数和死亡人数 country = doc.xpath('//div[@class="table_container"]//tbody/tr/td/span/text()') person = doc.xpath('//div[@class="table_container"]//tbody/tr/td[2]/text()') death = doc.xpath('//div[@class="table_container"]//tbody/tr/td[3]/text()') a = country.index('加州') state = country[a:] state_person = person[a:] state_death = death[a:] # 去除确诊人数和死亡人数中的逗号,并转换为整数 person = [int(x.replace(",", "")) for x in person] death = [int(x.replace(",", "")) for x in death] state_person = [int(x.replace(",", "")) for x in state_person] state_death = [int(x.replace(",", "")) for x in state_death] # 将数据存储到列表中 message = list(zip(country, person, death)) message = sorted(message, key=lambda x: x[-1], reverse=True) # 将数据写入 CSV 文件 with open("content.csv", "w", newline='', encoding='utf-8') as f: w = csv.writer(f) w.writerows(message) # 读取 CSV 文件并创建 DataFrame df = pd.read_csv("content.csv", names=["country", "person", "death"]) df = df.drop(0) df = df.head(10) # 绘制柱状图 plt.rcParams['font.sans-serif'] = ['SimHei'] plt.rcParams['figure.figsize'] = (10, 5) x = df["country"].values y = df["death"].values z = df['person'].values colors=['red','green','blue','cyan','grey','black','yellow','orange','pink','purple'] plt.bar(x, y,color=colors) plt.xlabel("国家", fontsize=14) plt.ylabel("死亡人数", fontsize=14) plt.title("COVID-19 死亡人数排名前十国家", fontsize=16) for i,j in zip(x,y): plt.text(i,j,j,ha='center') plt.xticks(rotation=45) # x轴标签旋转45度,防止重叠 plt.show() #绘制饼图 plt.rcParams['figure.figsize'] = (15, 10) colors1=['maroon','crimson','yellow','olive','cyan','lavender','purple','teal','pink','magenta'] explode=[0.1,0,0,0,0,0,0,0,0,0] labels=['美国','巴西','印度','俄罗斯','墨西哥','秘鲁','英国','印尼','意大利','伊朗'] plt.pie(y,explode=explode,labels=labels,colors=colors1,shadow=True,autopct='%.2f%%') plt.legend() plt.title('COVID-19 死亡人数排名前十国家') plt.show() #将数据重新存储一个列表 message = list(zip(country, person, death)) message = sorted(message, key=lambda x: x[-2], reverse=True) # 将数据写入 CSV 文件 with open("content.csv", "w", newline='', encoding='utf-8') as f: w = csv.writer(f) w.writerows(message) # 读取 CSV 文件并创建 DataFrame df = pd.read_csv("content.csv", names=["country", "person", "death"]) df = df.drop(0) df = df.head(20) # 绘制两幅折线图 plt.rcParams['font.sans-serif'] = ['SimHei'] plt.rcParams['figure.figsize'] = (15, 12) x = df["country"].values y = df["death"].values z = df['person'].values plt.subplot(211) plt.plot(x,z,color='crimson',marker='*',linestyle='--',label='确诊人数') plt.legend(loc='upper right') plt.xlabel("国家", fontsize=14) plt.ylabel("确诊人数", fontsize=14) plt.xticks(rotation=45) # x轴标签旋转45度,防止重叠 plt.title("COVID-19 确诊人数排名前二十国家确诊人数与死亡人数对比", fontsize=16) plt.subplot(212) plt.plot(x,y,color='teal',linestyle='-',marker='D',label='死亡人数') plt.xlabel("国家", fontsize=14) plt.ylabel("死亡人数", fontsize=14) plt.legend(loc='upper right') plt.xticks(rotation=45) # x轴标签旋转45度,防止重叠 plt.show() #将州数据再写进列表 state_message = list(zip(state, state_person, state_death)) state_message = sorted(state_message, key=lambda x: x[-1], reverse=True) # 将数据写入 CSV 文件 with open("state_content.csv", "w", newline='', encoding='utf-8') as f: w = csv.writer(f) w.writerows(state_message) # 读取 CSV 文件并创建 DataFrame df1 = pd.read_csv("state_content.csv", names=["state", "state_person", "state_death"]) df1 = df1.drop(0) df1 = df1.head(10) #绘制柱状图 plt.rcParams['font.sans-serif'] = ['SimHei'] plt.rcParams['figure.figsize'] = (10, 5) colors1=['maroon','crimson','yellow','olive','cyan','lavender','purple','teal','pink','magenta'] x = df1["state"].values y = df1["state_death"].values plt.barh(x,y,height=0.8,color=colors1) plt.xlabel('死亡人数') plt.ylabel('州') plt.title('美国COVID-19 死亡人数排名前十的州') plt.show()