diff --git a/crawlbug(patrick).py b/crawlbug(patrick).py new file mode 100644 index 0000000..3c54e69 --- /dev/null +++ b/crawlbug(patrick).py @@ -0,0 +1,54 @@ + +import requests +from lxml import etree +url = "https://www.bitpush.news/covid19/" +headers = { + "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.69 Safari/537.36" +} +response = requests.get(url, headers=headers) +html = response.text +#print(html) +parse = etree.HTMLParser(encoding='utf-8') +doc = etree.HTML(html) +# states +zhou = doc.xpath('//div[@class="table_container"]//tbody/tr/td/span/text()') +print(zhou) +ren = doc.xpath('//div[@class="table_container"]//tbody/tr/td[2]/text()') +print(ren) +ren = [m.replace(",", "") for m in ren] +print(ren) +siwang = doc.xpath('//div[@class="table_container"]//tbody/tr/td[3]/text()') +print(ren) +siwang = [m.replace(",", "") for m in siwang] +message = list(zip(zhou, ren, siwang)) +import csv +with open("content.csv", "w") as f: + w = csv.writer(f) + w.writerows(message) +import pandas as pd + +#读取数据 +df = pd.read_csv("content.csv", names=["zhou", "ren", "siwang"]) +df.head() +print(df) +for i in range(101): + df.drop([i],inplace=True) +print(df) +df.sort_values(by=['ren'],ascending=False,ignore_index=True) +print(df) +df=df.iloc[0:15] +print(df) +#作图 +import matplotlib.pyplot as plt +# 设置中文显示 +plt.rcParams['font.sans-serif'] = ['SimHei'] +plt.rcParams['figure.figsize'] = (10, 5) +m = df["zhou"].values +n = df['siwang'].values +plt.bar(m, n) +plt.xlabel("zhou") +plt.ylabel("siwang") +plt.show() + + +