You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
pbixy2zje/爬取网站.py

34 lines
1.1 KiB

import requests
import re
from bs4 import BeautifulSoup
import json
from 国外总数据 import summaryDataOut
# 获取数据
req = requests.get ('https://voice.baidu.com/act/newpneumonia/newpneumonia/?from=osari_pc_3#tab4')
content = req.content.decode('utf-8')
soup = BeautifulSoup(content,'html.parser')
# 过滤筛选
tag = soup.find('script',attrs={'type':'application/json','id':'captain-config'})
tagstr=tag.string #标签转化为字符串
tagdic=json.loads(tagstr) #标签字符串转化为字典
component=tagdic['component'][0]
time= component['mapLastUpdatedTime']
result = component['summaryDataOut'] #字典中找出'component'key下的'summaryDataOut'key内容
# 存储
OutData=summaryDataOut()
OutData.confirmed=int(result['confirmed'])
OutData.confirmedRelative=int(result['confirmedRelative'])
OutData.cured=int(result['cured'])
OutData.curedRelative=int(result['curedRelative'])
OutData.died=int(result['died'])
OutData.diedRelative=int(result['diedRelative'])
OutData.curConfirm=int(result['curConfirm'])
OutData.curConfirmRelative=int(result['curConfirmRelative'])
OutData.time=time
OutData.PrintOut()