You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
covid/美国疫情爬取.py

100 lines
2.8 KiB

# -*- coding: utf-8 -*-
"""
Created on Thu Dec 1 11:48:04 2022
@author: 2602103447
"""
##### 代码窗口
#导入库
from bs4 import BeautifulSoup
import requests
import pandas as pd
import matplotlib.pyplot as plt
# 请求的url
url = 'https://www.bitpush.news/covid19/'
# 使用reqeusts模快发起 GET 请求
result = requests.get(url)
#逐一解析数据
doc = BeautifulSoup(result.text, 'html.parser')
tbody = doc.find_all("tbody")
us = tbody[1]
table = us.find_all('tr')
s = []
c = []
d = []
for tr in table:
#爬取州
state = tr.find('span').string
#爬取确诊病例
cases = tr.find(attrs={'class':'table_card_cell_col_1 table_card_cell_int_type'}).string.replace(',','')
#爬取死亡人数
deaths = tr.find(attrs={'class':'table_card_cell_col_2 table_card_cell_float_type'}).string.replace(',','')
s.append(state)
c.append(int(cases))
d.append(int(deaths))
#查找数据
df = pd.DataFrame(list(zip(s, c, d)),
columns =['', '确诊病例', '死亡人数'])
df1 = df.sort_values(by=['确诊病例'], ascending=False).head(15)
df2 = df.sort_values(by=['死亡人数'], ascending=False).head(15)
print('确诊病例top15排行')
print(df1)
print('死亡病例top15排行')
print(df2)
#添加搜索州的疫情情况
state = input("请输入你要查询的州的名称: ")
for i in df.values:
if i[0] == state:
print("查询结果如下:")
print('州名称:',end =' ')
print(i[0])
print('确诊人数:',end =' ')
print(i[1])
print('死亡人数:',end =' ')
print(i[2])
print('查询完成')
break
else:
print("抱歉,没有查到相关的信息。")
#数据可视化
%matplotlib inline
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['figure.figsize'] = (16,5)
#绘制确诊人数柱形图
x = df1[''].values
y = df1['确诊病例'].values
plt.bar(x,y,width=0.6)
plt.xlabel('',fontsize = 14)
plt.ylabel('确诊病例',fontsize = 14)
plt.title('疫情确诊人数排名 top15')
plt.show()
#绘制死亡人数柱形图
x = df2[''].values
y = df2['死亡人数'].values
plt.bar(x,y,width=0.6)
plt.xlabel('',fontsize = 14)
plt.ylabel('死亡人数',fontsize = 14)
plt.title('死亡人数人数排名 top15')
plt.show()
#绘制确诊人数top15饼图
labels = df1[''].values
oppcy = df1['确诊病例'].values
plt.pie(oppcy,labels=labels,autopct='%1.2f%%',radius=2)
plt.savefig("确诊人数top15饼图.jpg",dpi=200)
plt.title('确诊人数top15饼图')
plt.show()
#绘制死亡人数top15饼图
labels = df2[''].values
oppcy = df2['死亡人数'].values
plt.pie(oppcy,labels=labels,autopct='%1.2f%%',radius=2)
plt.savefig("死亡人数top15饼图.jpg",dpi=200)
plt.title('死亡人数top15饼图')
plt.show()