You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
100 lines
2.8 KiB
100 lines
2.8 KiB
# -*- coding: utf-8 -*-
|
|
"""
|
|
Created on Thu Dec 1 11:48:04 2022
|
|
|
|
@author: 2602103447
|
|
"""
|
|
|
|
##### 代码窗口
|
|
#导入库
|
|
from bs4 import BeautifulSoup
|
|
import requests
|
|
import pandas as pd
|
|
import matplotlib.pyplot as plt
|
|
|
|
# 请求的url
|
|
url = 'https://www.bitpush.news/covid19/'
|
|
# 使用reqeusts模快发起 GET 请求
|
|
result = requests.get(url)
|
|
#逐一解析数据
|
|
doc = BeautifulSoup(result.text, 'html.parser')
|
|
tbody = doc.find_all("tbody")
|
|
us = tbody[1]
|
|
table = us.find_all('tr')
|
|
s = []
|
|
c = []
|
|
d = []
|
|
|
|
for tr in table:
|
|
#爬取州
|
|
state = tr.find('span').string
|
|
#爬取确诊病例
|
|
cases = tr.find(attrs={'class':'table_card_cell_col_1 table_card_cell_int_type'}).string.replace(',','')
|
|
#爬取死亡人数
|
|
deaths = tr.find(attrs={'class':'table_card_cell_col_2 table_card_cell_float_type'}).string.replace(',','')
|
|
s.append(state)
|
|
c.append(int(cases))
|
|
d.append(int(deaths))
|
|
#查找数据
|
|
df = pd.DataFrame(list(zip(s, c, d)),
|
|
columns =['州', '确诊病例', '死亡人数'])
|
|
df1 = df.sort_values(by=['确诊病例'], ascending=False).head(15)
|
|
df2 = df.sort_values(by=['死亡人数'], ascending=False).head(15)
|
|
print('确诊病例top15排行')
|
|
print(df1)
|
|
print('死亡病例top15排行')
|
|
print(df2)
|
|
#添加搜索州的疫情情况
|
|
state = input("请输入你要查询的州的名称: ")
|
|
for i in df.values:
|
|
|
|
if i[0] == state:
|
|
print("查询结果如下:")
|
|
print('州名称:',end =' ')
|
|
print(i[0])
|
|
print('确诊人数:',end =' ')
|
|
print(i[1])
|
|
print('死亡人数:',end =' ')
|
|
print(i[2])
|
|
print('查询完成')
|
|
break
|
|
else:
|
|
print("抱歉,没有查到相关的信息。")
|
|
|
|
|
|
#数据可视化
|
|
%matplotlib inline
|
|
plt.rcParams['font.sans-serif'] = ['SimHei']
|
|
plt.rcParams['figure.figsize'] = (16,5)
|
|
#绘制确诊人数柱形图
|
|
x = df1['州'].values
|
|
y = df1['确诊病例'].values
|
|
plt.bar(x,y,width=0.6)
|
|
plt.xlabel('州',fontsize = 14)
|
|
plt.ylabel('确诊病例',fontsize = 14)
|
|
plt.title('疫情确诊人数排名 top15')
|
|
plt.show()
|
|
#绘制死亡人数柱形图
|
|
x = df2['州'].values
|
|
y = df2['死亡人数'].values
|
|
plt.bar(x,y,width=0.6)
|
|
plt.xlabel('州',fontsize = 14)
|
|
plt.ylabel('死亡人数',fontsize = 14)
|
|
plt.title('死亡人数人数排名 top15')
|
|
plt.show()
|
|
|
|
#绘制确诊人数top15饼图
|
|
labels = df1['州'].values
|
|
oppcy = df1['确诊病例'].values
|
|
plt.pie(oppcy,labels=labels,autopct='%1.2f%%',radius=2)
|
|
plt.savefig("确诊人数top15饼图.jpg",dpi=200)
|
|
plt.title('确诊人数top15饼图')
|
|
plt.show()
|
|
|
|
#绘制死亡人数top15饼图
|
|
labels = df2['州'].values
|
|
oppcy = df2['死亡人数'].values
|
|
plt.pie(oppcy,labels=labels,autopct='%1.2f%%',radius=2)
|
|
plt.savefig("死亡人数top15饼图.jpg",dpi=200)
|
|
plt.title('死亡人数top15饼图')
|
|
plt.show() |