You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
99 lines
3.5 KiB
99 lines
3.5 KiB
import requests
|
|
import re
|
|
from bs4 import BeautifulSoup
|
|
import json
|
|
|
|
class DataService:
|
|
def __init__(self):
|
|
self.url = 'https://voice.baidu.com/act/newpneumonia/newpneumonia'
|
|
self.db = MyDB('localhost', 'root', 'lujian123','covid19_datas_guangxi')
|
|
|
|
|
|
# 抓取网页
|
|
def fetch_html_page(self):
|
|
res = requests.get(self.url)
|
|
res = res.content.decode('utf-8')
|
|
return res
|
|
|
|
# 解析网页
|
|
def parse_target_page(self, html):
|
|
soup = BeautifulSoup(html)
|
|
tag = soup.find('script', attrs={'id':'captain-config'})
|
|
tagStr = tag.string
|
|
tagDict = json.loads(tagStr)
|
|
|
|
# 提取数据更新时间
|
|
self.pub_date = tagDict['component'][0]['mapLastUpdatedTime']
|
|
|
|
# 获取国内各省份各地级市的数据
|
|
self.insideDatas = tagDict['component'][0]['caseList'] # [dict, dict, ....]
|
|
|
|
|
|
# 处理字符串
|
|
def process_str(self, s):
|
|
ret = ''
|
|
if s is None or s == '':
|
|
ret ='0'
|
|
else:
|
|
ret = s
|
|
return ret
|
|
|
|
|
|
# 提取各个省份数据
|
|
def fetch_province_datas(self):
|
|
all_provinces = []
|
|
for item in self.insideDatas:
|
|
# item : dict
|
|
province = Province()
|
|
province.confirmed = int(item['confirmed'])
|
|
province.died = int(item.get('died','0'))
|
|
province.crued = int(item['crued'])
|
|
province.relativeTime = int(item['relativeTime'])
|
|
province.confirmedRelative = int(item['confirmedRelative'])
|
|
province.diedRelative = int(item['diedRelative'])
|
|
province.curedRelative = int(item['curedRelative'])
|
|
province.asymptomaticRelative = int(self.process_str(item.get('asymptomaticRelative', '0')))
|
|
province.asymptomatic = int(self.process_str(item.get('asymptomatic', '0')))
|
|
province.curConfirm = int(item['curConfirm'])
|
|
province.curConfirmRelative = int(item['curConfirmRelative'])
|
|
province.icuDisable = int(item['icuDisable'])
|
|
province.area = item['area']
|
|
province.pub_date = self.pub_date
|
|
|
|
# 提取各个地级市的数据
|
|
for cityItem in item['subList']: # subList: [dict, dict, ...]
|
|
city = City()
|
|
city.city = cityItem['city']
|
|
city.confirmed = int(self.process_str(cityItem['confirmed']))
|
|
city.died = int(self.process_str(cityItem.get('died','0')))
|
|
city.crued = int(self.process_str(cityItem['crued']))
|
|
city.confirmedRelative = int(self.process_str(cityItem['confirmedRelative']))
|
|
city.curConfirm = int(self.process_str(cityItem.get('curConfirm','0')))
|
|
city.pub_date = self.pub_date
|
|
city.province = province.area
|
|
province.subList.append(city)
|
|
|
|
all_provinces.append(province)
|
|
return all_provinces
|
|
|
|
|
|
# 提取内容生成对象
|
|
def fetch_page_datas(self):
|
|
all_provinces = self.fetch_province_datas()
|
|
return all_provinces
|
|
|
|
# 业务函数
|
|
def process_data(self):
|
|
html = self.fetch_html_page()
|
|
self.parse_target_page(html)
|
|
all_provinces = self.fetch_page_datas()
|
|
|
|
# 保存省份数据
|
|
self.db.save_province_datas(all_provinces)
|
|
# 保存地级市数据
|
|
self.db.save_city_datas(all_provinces)
|
|
|
|
# 创建Dataservice对象
|
|
ds = DataService()
|
|
ds.process_data()
|