diff --git a/aaapachong.py(1).py b/aaapachong.py(1).py new file mode 100644 index 0000000..e2122eb --- /dev/null +++ b/aaapachong.py(1).py @@ -0,0 +1,64 @@ +# -*- coding: utf-8 -*- +""" +Created on Fri Dec 2 21:14:38 2022 +@name:全球疫情数据爬取 +@author: 86156 +""" +#爬取网页内容 +import requests +from lxml import etree +import pandas as pd +import csv +import matplotlib.pyplot as plt +url='https://www.bitpush.news/covid19/' +headers = { + "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.69 Safari/537.36" +} +response = requests.get(url, headers=headers) +html = response.text +html +#使用lxml解析数据信息 +parse = etree.HTMLParser(encoding='utf-8') +doc = etree.HTML(html) +#使用xpath语句筛选美国疫情数据 +country = doc.xpath('//div[@class="table_container"]//tbody/tr/td/span/text()') +person = doc.xpath('//div[@class="table_container"]//tbody/tr/td[2]/text()') +death = doc.xpath('//div[@class="table_container"]//tbody/tr/td[3]/text()') +st=country.index('加州')#st=101 +en=country.index('美属萨摩亚')#en=158 +person = [x.replace(",", "") for x in person[st:en+1]] +death = [x.replace(",", "") for x in death[st:en+1]] +state=country[st:en+1] +#将数据打包并转换成列表 +message = list(zip(state, person, death)) +message + +#将message中的文件保存至csv文件中 +df = pd.DataFrame(message) +df.to_csv('content.csv')#保存在csv文件 + + +# 读取数据 +df = pd.read_csv("content.csv", names=["State", "person", "death"]) +df=df.sort_values(by=['person'],ascending=False) +df1 = df.head(15) + +import matplotlib.pyplot as plt + +# 设置中文显示 +plt.rcParams['font.sans-serif'] = ['SimHei'] +plt.rcParams['figure.figsize'] = (15, 5) # 设置figure_size尺寸 + + +# x轴坐标 +x = df1["State"].values +# y轴坐标 +y = df1["person"].values +# 绘制柱状图 +plt.bar(x, y) +# 设置x轴名称 +plt.xlabel("不同地区",fontsize=14) +# 设置x轴名称 +plt.ylabel("确诊人数",fontsize=14) +plt.show() +