From b8587416a3445b239c27fbf28a79413af621ebac Mon Sep 17 00:00:00 2001 From: piofyvqg4 <2825myy@163.com> Date: Wed, 7 Dec 2022 16:46:45 +0800 Subject: [PATCH] ADD file via upload --- 美国疫情爬取.py | 100 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 100 insertions(+) create mode 100644 美国疫情爬取.py diff --git a/美国疫情爬取.py b/美国疫情爬取.py new file mode 100644 index 0000000..5aa4abf --- /dev/null +++ b/美国疫情爬取.py @@ -0,0 +1,100 @@ +# -*- coding: utf-8 -*- +""" +Created on Thu Dec 1 11:48:04 2022 + +@author: 2602103447 +""" + +##### 代码窗口 +#导入库 +from bs4 import BeautifulSoup +import requests +import pandas as pd +import matplotlib.pyplot as plt + +# 请求的url +url = 'https://www.bitpush.news/covid19/' +# 使用reqeusts模快发起 GET 请求 +result = requests.get(url) +#逐一解析数据 +doc = BeautifulSoup(result.text, 'html.parser') +tbody = doc.find_all("tbody") +us = tbody[1] +table = us.find_all('tr') +s = [] +c = [] +d = [] + +for tr in table: + #爬取州 + state = tr.find('span').string + #爬取确诊病例 + cases = tr.find(attrs={'class':'table_card_cell_col_1 table_card_cell_int_type'}).string.replace(',','') + #爬取死亡人数 + deaths = tr.find(attrs={'class':'table_card_cell_col_2 table_card_cell_float_type'}).string.replace(',','') + s.append(state) + c.append(int(cases)) + d.append(int(deaths)) +#查找数据 +df = pd.DataFrame(list(zip(s, c, d)), +columns =['州', '确诊病例', '死亡人数']) +df1 = df.sort_values(by=['确诊病例'], ascending=False).head(15) +df2 = df.sort_values(by=['死亡人数'], ascending=False).head(15) +print('确诊病例top15排行') +print(df1) +print('死亡病例top15排行') +print(df2) +#添加搜索州的疫情情况 +state = input("请输入你要查询的州的名称: ") +for i in df.values: + + if i[0] == state: + print("查询结果如下:") + print('州名称:',end =' ') + print(i[0]) + print('确诊人数:',end =' ') + print(i[1]) + print('死亡人数:',end =' ') + print(i[2]) + print('查询完成') + break +else: + print("抱歉,没有查到相关的信息。") + + +#数据可视化 +%matplotlib inline +plt.rcParams['font.sans-serif'] = ['SimHei'] +plt.rcParams['figure.figsize'] = (16,5) +#绘制确诊人数柱形图 +x = df1['州'].values +y = df1['确诊病例'].values +plt.bar(x,y,width=0.6) +plt.xlabel('州',fontsize = 14) +plt.ylabel('确诊病例',fontsize = 14) +plt.title('疫情确诊人数排名 top15') +plt.show() +#绘制死亡人数柱形图 +x = df2['州'].values +y = df2['死亡人数'].values +plt.bar(x,y,width=0.6) +plt.xlabel('州',fontsize = 14) +plt.ylabel('死亡人数',fontsize = 14) +plt.title('死亡人数人数排名 top15') +plt.show() + +#绘制确诊人数top15饼图 +labels = df1['州'].values +oppcy = df1['确诊病例'].values +plt.pie(oppcy,labels=labels,autopct='%1.2f%%',radius=2) +plt.savefig("确诊人数top15饼图.jpg",dpi=200) +plt.title('确诊人数top15饼图') +plt.show() + +#绘制死亡人数top15饼图 +labels = df2['州'].values +oppcy = df2['死亡人数'].values +plt.pie(oppcy,labels=labels,autopct='%1.2f%%',radius=2) +plt.savefig("死亡人数top15饼图.jpg",dpi=200) +plt.title('死亡人数top15饼图') +plt.show() \ No newline at end of file