You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

65 lines
2.1 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

# -*- coding: utf-8 -*-
"""
Created on Thu May 26 16:41:59 2022
@author: 张舒心 保婧芝
"""
import csv #用于把爬取的数据存储为csv格式可以excel直接打开的
import time #用于对请求加延时,爬取速度太快容易被反爬
from time import sleep #同上
import random #用于对延时设置随机数,尽量模拟人的行为
import requests #用于向网站发送请求
from lxml import etree #lxml为第三方网页解析库强大且速度快
import pandas as pd
import matplotlib.pyplot as plt
url='https://www.bitpush.news/covid19/'
headers={'User-Agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.64 Safari/537.36 Edg/101.0.1210.53"}
response = requests.get(url, headers=headers,timeout=10)
html=response.text
parse = etree.HTMLParser(encoding='utf-8')
doc = etree.HTML(html)
continent = doc.xpath('//div[@class="table_container"]//tbody/tr/td/span/text()')
# 确诊人数
person = doc.xpath('//div[@class="table_container"]//tbody/tr/td[2]/text()')
# 由于确诊人数中有逗号,我们使用列表推导式删除
person = [x.replace(",", "") for x in person]
# 死亡人数
death = doc.xpath('//div[@class="table_container"]//tbody/tr/td[3]/text()')
# 同样使用列表推导式删除逗号
death = [x.replace(",", "") for x in death]
message = list(zip(continent, person, death))
message
with open("pandemic.csv", "w") as f:
w = csv.writer(f)
w.writerows(message)
df = pd.read_csv("pandemic.csv", names=["continent", "person", "death"],encoding='gbk')
df.info()
df1 = df.drop(0).tail(58)
df1=df1.head(15)
print(df1)
# 在jupyter中直接展示图像
%matplotlib inline
# 设置中文显示
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['figure.figsize'] = (10, 5) # 设置figure_size尺寸
# x轴坐标
x = df1["continent"].values
# y轴坐标
y = df1["person"].values
# 绘制柱状图
plt.bar(x, y)
# 设置x轴名称
plt.xlabel("地区",fontsize=14)
# 设置x轴名称
plt.ylabel("确诊人数",fontsize=14)
plt.show()