You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
pachong666/计算机与人工智能大作业(秦恺乐,陈哲栋小组).py

99 lines
2.4 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

# -*- coding: utf-8 -*-
"""
Created on Tue Dec 28 14:59:48 2021
@author: dell
"""
##计算机与人工智能大作业汇报
# 首先我们需要导入 requests 库
import requests
# 请求的url
url = "https://www.bitpush.news/covid19/"
# 设置请求头信息
headers = {
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.69 Safari/537.36"
}
# 使用reqeusts模快发起 GET 请求
response = requests.get(url, headers=headers)
# 获取请求的返回结果
html = response.text
# 导入 lxml
from lxml import etree
# 创建一个 lxml 对象,编码方式设为 utf-8
parse = etree.HTMLParser(encoding='utf-8') # 添加编码
# 解析 requests 返回的响应结果
doc = etree.HTML(html)
# 地区
area = doc.xpath('(//div[@class="table_container"])[2]//tbody/tr/td/span/text()')
# 确诊人数
person = doc.xpath('(//div[@class="table_container"])[2]//tbody/tr/td[2]/text()')
# 由于确诊人数中有逗号,我们使用列表推导式删除
person = [x.replace(",", "") for x in person]
# 死亡人数
death = doc.xpath('(//div[@class="table_container"])[2]//tbody/tr/td[3]/text()')
# 同样使用列表推导式删除逗号
death = [x.replace(",", "") for x in death]
# 打包数据之后将其转换成列表
message = list(zip(area, person, death))
print(message)
# 导入python中的内置模块csv
import csv
#用文件的方式打开
with open("content.csv", "w",encoding='utf-8-sig') as df:
w = csv.writer(df)
w.writerows(message)
#测试读取内容
with open("content.csv",'r',encoding='utf-8') as df:
data=df.read()
print(data)
#用pandas的方式读取数据并处理
import pandas as pd
# 读取数据
df = pd.read_csv("content.csv", names=["area", "person", "death"])
#给列命名
df.head()
df.info()
# 取前15组数据降序排列
df1 = df.head(15)
import matplotlib.pyplot as plt
# 在jupyter中直接展示图像
#%matplotlib inline
# 设置中文显示
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['figure.figsize'] = (10, 5) # 设置figure_size尺寸
# x轴坐标
x = df1["area"].values
# y轴坐标
y = df1["person"].values
# 绘制柱状图
plt.bar(x, y,color='y')
plt.grid()
# 设置x轴名称
plt.xlabel("地区",fontsize=14)
# 设置x轴名称
plt.ylabel("确诊人数",fontsize=14)
plt.show()