You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

103 lines
4.4 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

#####请在以下代码块填写爬虫代码
import requests
url = "https://www.bitpush.news/covid19/"# 请求的url
headers = {"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.69 Safari/537.36"}# 设置请求头信息
response = requests.get(url, headers=headers)# 使用reqeusts模快发起 GET 请求
html = response.text# 获取请求的返回结果
from lxml import etree# 导入 lxml
doc = etree.HTML(html)
#XPath 使用路径表达式来选取 XML 文档中的节点或节点集。节点是通过沿着路径 (path) 或者步 (steps) 来选取的。
country = doc.xpath('//div[@class="table_container"]//tbody/tr/td/span/text()')
person = doc.xpath('//div[@class="table_container"]//tbody/tr/td[2]/text()')
person = [x.replace(",", "") for x in person]
death = doc.xpath('//div[@class="table_container"]//tbody/tr/td[3]/text()')
death = [x.replace(",", "") for x in death]
message = list(zip(country, person, death))# 打包数据之后将其转换成列表
import csv# 导入python中的内置模块csv将列表写入文件
with open("content.csv", "w") as f:
w = csv.writer(f)
w.writerows(message)
import pandas as pd#导入pandas库读取数据
df = pd.read_csv("content.csv", names=["country", "person", "death"],encoding='gbk')
df1 = df.drop(0).head(10)# 由于第一行是全球的数据总和我们先将其删除我们显示10个国家在图上
import matplotlib.pyplot as plt#导入plt库作图
plt.rcParams['font.sans-serif'] = ['SimHei']# 设置中文显示
plt.rcParams['figure.figsize'] = (10, 5) # 设置figure_size尺寸
x = df1["country"].values
y = df1["death"].values
plt.bar(x, y)
plt.xlabel("国家",fontsize=14)
plt.ylabel("死亡人数",fontsize=14)
plt.show()
#练习二如下
import requests
url = "https://www.bitpush.news/covid19/"
headers = {"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.69 Safari/537.36"}
response = requests.get(url, headers=headers)
html = response.text
from lxml import etree
doc = etree.HTML(html)
country = doc.xpath('//div[@class="table_container"]//tbody/tr/td/span/text()')
person = doc.xpath('//div[@class="table_container"]//tbody/tr/td[2]/text()')
person = [x.replace(",", "") for x in person]
death = doc.xpath('//div[@class="table_container"]//tbody/tr/td[3]/text()')
death = [x.replace(",", "") for x in death]
message=[]
for i in range(len(death)):
a=[country[i],person[i],death[i]]
message.append(a)
f=lambda x : eval(x[1])
message=sorted(message,key=f,reverse=True)
print(message)
import csv
with open("content.csv", "w") as f:
w = csv.writer(f)
w.writerows(message)
import pandas as pd
df = pd.read_csv("content.csv", names=["country", "person", "death"],encoding='gbk')
df1 = df.drop(0).head(15)
import matplotlib.pyplot as plt
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['figure.figsize'] = (15, 5)
x = df1["country"].values
y = df1["person"].values
plt.bar(x, y)
plt.xlabel("国家",fontsize=14)
plt.ylabel("感染人数",fontsize=14)
plt.show()
#练习一如下
import requests
url = "https://www.bitpush.news/covid19/"
headers = {"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.69 Safari/537.36"}
response = requests.get(url, headers=headers)
html = response.text
from lxml import etree
doc = etree.HTML(html)
country = doc.xpath('/html/body/div/div/div[2]/div[2]/div/div/div/div//tbody/tr/td/span/text()')
person = doc.xpath('/html/body/div/div/div[2]/div[2]/div/div/div/div//tbody/tr/td[2]/text()')
person = [x.replace(",", "") for x in person]
death = doc.xpath('/html/body/div/div/div[2]/div[2]/div/div/div/div//tbody/tr/td[3]/text()')
death = [x.replace(",", "") for x in death]
message=[]
for i in range(len(death)):
a=[country[i],person[i],death[i]]
message.append(a)
f=lambda x : eval(x[1])
message=sorted(message,key=f,reverse=True)
print(message)
import csv
with open("content.csv", "w") as f:
w = csv.writer(f)
w.writerows(message)
import pandas as pd
df = pd.read_csv("content.csv", names=["country", "person", "death"],encoding='gbk')
df1 = df.drop(0).head(15)
import matplotlib.pyplot as plt
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['figure.figsize'] = (15, 5)
x = df1["country"].values
y = df1["person"].values
plt.bar(x, y)
plt.xlabel("地区",fontsize=14)
plt.ylabel("感染人数",fontsize=14)
plt.show()