import requests import re from bs4 import BeautifulSoup import json class DataService: def __init__(self): self.url = 'https://voice.baidu.com/act/newpneumonia/newpneumonia' self.db = MyDB('localhost', 'root', 'lujian123','covid19_datas_guangxi') # 抓取网页 def fetch_html_page(self): res = requests.get(self.url) res = res.content.decode('utf-8') return res # 解析网页 def parse_target_page(self, html): soup = BeautifulSoup(html) tag = soup.find('script', attrs={'id':'captain-config'}) tagStr = tag.string tagDict = json.loads(tagStr) # 提取数据更新时间 self.pub_date = tagDict['component'][0]['mapLastUpdatedTime'] # 获取国内各省份各地级市的数据 self.insideDatas = tagDict['component'][0]['caseList'] # [dict, dict, ....] # 处理字符串 def process_str(self, s): ret = '' if s is None or s == '': ret ='0' else: ret = s return ret # 提取各个省份数据 def fetch_province_datas(self): all_provinces = [] for item in self.insideDatas: # item : dict province = Province() province.confirmed = int(item['confirmed']) province.died = int(item.get('died','0')) province.crued = int(item['crued']) province.relativeTime = int(item['relativeTime']) province.confirmedRelative = int(item['confirmedRelative']) province.diedRelative = int(item['diedRelative']) province.curedRelative = int(item['curedRelative']) province.asymptomaticRelative = int(self.process_str(item.get('asymptomaticRelative', '0'))) province.asymptomatic = int(self.process_str(item.get('asymptomatic', '0'))) province.curConfirm = int(item['curConfirm']) province.curConfirmRelative = int(item['curConfirmRelative']) province.icuDisable = int(item['icuDisable']) province.area = item['area'] province.pub_date = self.pub_date # 提取各个地级市的数据 for cityItem in item['subList']: # subList: [dict, dict, ...] city = City() city.city = cityItem['city'] city.confirmed = int(self.process_str(cityItem['confirmed'])) city.died = int(self.process_str(cityItem.get('died','0'))) city.crued = int(self.process_str(cityItem['crued'])) city.confirmedRelative = int(self.process_str(cityItem['confirmedRelative'])) city.curConfirm = int(self.process_str(cityItem.get('curConfirm','0'))) city.pub_date = self.pub_date city.province = province.area province.subList.append(city) all_provinces.append(province) return all_provinces # 提取内容生成对象 def fetch_page_datas(self): all_provinces = self.fetch_province_datas() return all_provinces # 业务函数 def process_data(self): html = self.fetch_html_page() self.parse_target_page(html) all_provinces = self.fetch_page_datas() # 保存省份数据 self.db.save_province_datas(all_provinces) # 保存地级市数据 self.db.save_city_datas(all_provinces) # 创建Dataservice对象 ds = DataService() ds.process_data()