diff --git a/爬虫.py b/爬虫.py new file mode 100644 index 0000000..29936f1 --- /dev/null +++ b/爬虫.py @@ -0,0 +1,75 @@ +# -*- coding: utf-8 -*- +""" +Created on Thu May 23 22:35:58 2024 + +@author: Lenovo +""" + +import requests +url = "https://www.bitpush.news/covid19/" +headers = {"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36 Edg/116.0.1938.54"} +response = requests.get(url,headers=headers) +html = response.text + +from lxml import etree +parse = etree.HTMLParser(encoding='gbk') +doc = etree.HTML(html) + +area = doc.xpath('//*[@id="main"]/div[2]/div/div/div/div/div[1]/table/tbody/tr/td[1]/span/text()') +person = doc.xpath('//*[@id="main"]/div[2]/div/div/div/div/div[1]/table/tbody/tr/td[2]/text()') +person = [x.replace(",", "") for x in person] +death = doc.xpath('//*[@id="main"]/div[2]/div/div/div/div/div[1]/table/tbody/tr/td[3]/text()') +death = [x.replace(",", "") for x in death] +message = list(zip(area, person, death)) + +import csv +with open("content.csv", "w") as f: + w = csv.writer(f) + w.writerows(message) + +import pandas as pd +df = pd.read_csv("content.csv", names=["area", "person", "death"],encoding='gbk') +df.head() +df.info() +df1 = df.drop(0).head(15) + +import matplotlib.pyplot as plt +#确诊人数垂直图 +plt.rcParams['font.sans-serif'] = ['SimHei'] +plt.rcParams['figure.figsize'] = (10, 5) +x = df1["area"].values +y = df1["person"].values +plt.bar(x, y) +plt.xlabel("地区",fontsize=14) +plt.ylabel("确诊人数",fontsize=14) +plt.show() +#确诊人数水平图 +x = df1["area"].values +y = df1["person"].values +colors=['red','green','cyan','blue'] +plt.barh(x, y,height=0.8,color=colors) +plt.yticks(x) +plt.xlabel("确诊人数",fontsize=14) +plt.ylabel("地区",fontsize=14) +for i,j in zip(x,y): + plt.text(j,i,j) +plt.legend() +plt.show() +#死亡人数垂直图 +plt.rcParams['font.sans-serif'] = ['SimHei'] +plt.rcParams['figure.figsize'] = (10, 5) +x = df1["area"].values +y = df1["death"].values +plt.bar(x, y) +plt.xlabel("地区",fontsize=14) +plt.ylabel("死亡人数",fontsize=14) +plt.show() +#确诊人数饼图 +import numpy as np +import matplotlib.pyplot as plt +labels = df1["area"].values +sizes = df1["person"].values +explode = (0.1,0,0,0,0,0,0,0,0,0,0,0,0,0,0) +plt.pie(sizes,explode,labels=labels,autopct='%.1f%%') +plt.title("美国疫情确诊人数排名 top15 地区情况") +