# -*- coding: utf-8 -*- """ Created on Fri Dec 2 21:14:38 2022 @name:全球疫情数据爬取 @author: 86156 """ #爬取网页内容 import requests from lxml import etree import pandas as pd import csv import matplotlib.pyplot as plt url='https://www.bitpush.news/covid19/' headers = { "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.69 Safari/537.36" } response = requests.get(url, headers=headers) html = response.text html #使用lxml解析数据信息 parse = etree.HTMLParser(encoding='utf-8') doc = etree.HTML(html) #使用xpath语句筛选美国疫情数据 country = doc.xpath('//div[@class="table_container"]//tbody/tr/td/span/text()') person = doc.xpath('//div[@class="table_container"]//tbody/tr/td[2]/text()') death = doc.xpath('//div[@class="table_container"]//tbody/tr/td[3]/text()') st=country.index('加州')#st=101 en=country.index('美属萨摩亚')#en=158 person = [x.replace(",", "") for x in person[st:en+1]] death = [x.replace(",", "") for x in death[st:en+1]] state=country[st:en+1] #将数据打包并转换成列表 message = list(zip(state, person, death)) message #将message中的文件保存至csv文件中 df = pd.DataFrame(message) df.to_csv('content.csv')#保存在csv文件 # 读取数据 df = pd.read_csv("content.csv", names=["State", "person", "death"]) df=df.sort_values(by=['person'],ascending=False) df1 = df.head(15) import matplotlib.pyplot as plt # 设置中文显示 plt.rcParams['font.sans-serif'] = ['SimHei'] plt.rcParams['figure.figsize'] = (15, 5) # 设置figure_size尺寸 # x轴坐标 x = df1["State"].values # y轴坐标 y = df1["person"].values # 绘制柱状图 plt.bar(x, y) # 设置x轴名称 plt.xlabel("不同地区",fontsize=14) # 设置x轴名称 plt.ylabel("确诊人数",fontsize=14) plt.show()