|
|
import pymysql
|
|
|
import requests
|
|
|
from bs4 import BeautifulSoup
|
|
|
import json
|
|
|
|
|
|
class ForeignCountry:
|
|
|
def __init__(self):
|
|
|
self.died = 0
|
|
|
self.confirmed = 0
|
|
|
self.crued = 0
|
|
|
self.area = ''
|
|
|
self.curConfirm = 0
|
|
|
self.confirmedRelative = 0
|
|
|
self.pub_date = ''
|
|
|
def __str__(self):
|
|
|
return 'area: %s, died: %s, confirmed: %s, crued: %s, curConfirm: %s, confirmedRelative: %s, pub_date: %s' % (self.area, self.died, self.confirmed, self.crued, self.curConfirm, self.confirmedRelative, self.pub_date)
|
|
|
|
|
|
def get_fcountry_info_tuple(self):
|
|
|
return ((self.died, self.confirmed, self.crued, self.area, self.curConfirm, self.confirmedRelative, self.pub_date))
|
|
|
|
|
|
class MyDB:
|
|
|
def __init__(self, host, user, passwd, db):
|
|
|
self.conn = pymysql.connect(host, user, passwd, db)
|
|
|
self.cursor = self.conn.cursor()
|
|
|
|
|
|
|
|
|
# 获取国外国家类参数列表格式
|
|
|
def get_fcountry_args_list(self, all_foreign_countries):
|
|
|
all_args_list = []
|
|
|
for country in all_foreign_countries:
|
|
|
info = country.get_fcountry_info_tuple()
|
|
|
all_args_list.append(info)
|
|
|
return all_args_list
|
|
|
|
|
|
# 保存国外每日疫情数据
|
|
|
def save_outside_daily_datas(self, all_foreign_countries):
|
|
|
curdate=all_foreign_countries[0].pub_date
|
|
|
#先删除当天已有的数据
|
|
|
sql='delete from foreign_daily_datas where pub_date like "%s"'%(curdate[:10]+ '%')
|
|
|
|
|
|
try:
|
|
|
self.cursor.execute(sql)
|
|
|
self.conn.commit()
|
|
|
except Exception as e:
|
|
|
print(e)
|
|
|
|
|
|
sql = 'insert into foreign_daily_datas(died, confirmed, crued, area, curConfirm, confirmedRelative, pub_date) values(%s, %s, %s, %s, %s, %s, %s)'
|
|
|
res = self.get_fcountry_args_list(all_foreign_countries)
|
|
|
print('+++ foreign_daily_datas, data len: %d' % len(res))
|
|
|
try:
|
|
|
self.cursor.executemany(sql, res)
|
|
|
self.conn.commit()
|
|
|
except Exception as e:
|
|
|
print(e)
|
|
|
print('+++ foreign_daily_datas is over.')
|
|
|
|
|
|
|
|
|
def __del__(self):
|
|
|
if self.conn is not None:
|
|
|
self.conn.close()
|
|
|
|
|
|
|
|
|
class DataService:
|
|
|
#解析网页
|
|
|
def __init__(self):
|
|
|
self.url = 'https://voice.baidu.com/act/newpneumonia/newpneumonia'
|
|
|
self.db = MyDB('localhost', 'root', '1999z5g24x','text_data_increasing')
|
|
|
|
|
|
# 抓取网页
|
|
|
def fetch_html_page(self):
|
|
|
res = requests.get(self.url)
|
|
|
res = res.content.decode('utf-8')
|
|
|
return res
|
|
|
|
|
|
# 解析网页
|
|
|
def parse_target_page(self, html):
|
|
|
soup = BeautifulSoup(html)
|
|
|
tag = soup.find('script', attrs={'id':'captain-config'})
|
|
|
tagStr = tag.string
|
|
|
tagDict = json.loads(tagStr)
|
|
|
|
|
|
# 提取数据更新时间\n",
|
|
|
self.pub_date = tagDict['component'][0]['mapLastUpdatedTime']
|
|
|
# 获取国外各国家的数据\n",
|
|
|
self.outsideDatas = tagDict['component'][0]['caseOutsideList']
|
|
|
# 处理字符串
|
|
|
def process_str(self, s):
|
|
|
ret = ''
|
|
|
if s is None or s == '':
|
|
|
ret ='0'
|
|
|
else:
|
|
|
ret = s
|
|
|
return ret
|
|
|
|
|
|
def getOrElse(self,target, key):
|
|
|
ret = ''
|
|
|
if target.get(key) != None:
|
|
|
ret = target[key]
|
|
|
else:
|
|
|
ret = ''
|
|
|
return ret
|
|
|
|
|
|
|
|
|
#解析国外数据
|
|
|
def parse_outside_daily_datas(self):
|
|
|
# self.__outsideDailyDatas为list,一个国家对应一个元素
|
|
|
all_foreign_countries = []
|
|
|
for item in self.outsideDatas:
|
|
|
country = ForeignCountry()
|
|
|
country.died = self.getOrElse(item, 'died')
|
|
|
country.confirmed = self.getOrElse(item, 'confirmed')
|
|
|
country.crued = self.getOrElse(item, 'crued')
|
|
|
country.area = self.getOrElse(item, 'area')
|
|
|
country.curConfirm = self.getOrElse(item, 'curConfirm')
|
|
|
country.confirmedRelative = self.getOrElse(item, 'confirmedRelative')
|
|
|
country.pub_date = self.pub_date
|
|
|
all_foreign_countries.append(country)
|
|
|
return all_foreign_countries
|
|
|
|
|
|
# 提取内容生成对象
|
|
|
def fetch_page_datas(self):
|
|
|
all_countries = self.parse_outside_daily_datas()
|
|
|
|
|
|
#for item in all_countries:
|
|
|
# print(item)
|
|
|
return all_countries
|
|
|
|
|
|
# 业务函数
|
|
|
def process_data(self):
|
|
|
html = self.fetch_html_page()
|
|
|
self.parse_target_page(html)
|
|
|
all_countries = self.fetch_page_datas()
|
|
|
# 保存数据
|
|
|
self.db.save_outside_daily_datas(all_countries)
|
|
|
|
|
|
# 创建Dataservice对象
|
|
|
ds = DataService()
|
|
|
ds.process_data()
|