Compare commits
5 Commits
Author | SHA1 | Date |
---|---|---|
|
cd49025acb | 5 years ago |
|
3489845ae8 | 5 years ago |
|
b4128ef7eb | 5 years ago |
|
bae6447109 | 5 years ago |
|
ea4a3c13a7 | 5 years ago |
@ -1,79 +0,0 @@
|
||||
import pymysql
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
import json
|
||||
import re
|
||||
|
||||
import pymysql
|
||||
def getData(DB):
|
||||
cs = DB.cursor()
|
||||
cs.execute('select * from cn')
|
||||
res = cs.fetchone()
|
||||
|
||||
print(res)
|
||||
|
||||
def delData(DB,day):
|
||||
cs = DB.cursor()
|
||||
tmp = day[0:11:1] +'%'
|
||||
sql = 'delete from cn where date like %s'
|
||||
data = [tmp]
|
||||
try:
|
||||
cs.executemany(sql,data)
|
||||
DB.commit()
|
||||
except Exception as e:
|
||||
print('删除不成功')
|
||||
print(e)
|
||||
|
||||
def fun1(summary):
|
||||
tmp = summary.__dict__
|
||||
|
||||
d = []
|
||||
|
||||
for item in tmp:
|
||||
d.append(tmp[item])
|
||||
|
||||
return d
|
||||
|
||||
def save_summary(host,user, password, database):
|
||||
|
||||
#db = pymysql.connect('localhost', 'root', '123456', 'covid19')
|
||||
db = pymysql.connect(host,user, password, database)
|
||||
|
||||
cs = db.cursor()
|
||||
|
||||
tmp,time = Mod_Summary_CN()
|
||||
D = fun1(tmp)
|
||||
|
||||
tmp1 = []
|
||||
|
||||
for i in range(0,18):
|
||||
tmp1.append(D[i])
|
||||
|
||||
tmp1.append(time)
|
||||
tmp2 = tuple(tmp1)
|
||||
|
||||
|
||||
sql = 'insert into cn(confirmed, confirmed_R, curConfirm, curConfirm_R, died, died_R,cured, cured_R, asymptomatic, asymptomatic_R, unconfirmed, \
|
||||
unconfirmed_R, icu, icu_R, overseasIn, overseasIn_R, unOverseasInC, unOverseasInA, date)values(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)'
|
||||
|
||||
data = []
|
||||
data.append(tmp2)
|
||||
|
||||
|
||||
try:
|
||||
delData(db,time)
|
||||
cs.executemany(sql,data)
|
||||
except Exception as e:
|
||||
print('添加不成功')
|
||||
print(e)
|
||||
else:
|
||||
db.commit()
|
||||
print('添加成功')
|
||||
|
||||
|
||||
getData(db)
|
||||
|
||||
cs.close()
|
||||
db.close()
|
||||
|
||||
|
@ -1,50 +0,0 @@
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
import json
|
||||
import re
|
||||
|
||||
|
||||
def Mod_Summary_CN():
|
||||
response = requests.get('https://voice.baidu.com/act/newpneumonia/newpneumonia/')
|
||||
content = response.text
|
||||
soup = BeautifulSoup(content, 'html.parser')
|
||||
|
||||
|
||||
tag1 = soup.find('script', attrs={'id':'captain-config'})
|
||||
tagstr = str(tag1)
|
||||
res = re.findall(r'(\{".*?".*?\})',tagstr)
|
||||
|
||||
tagJson = json.loads(res[831])
|
||||
|
||||
temp1 = re.findall(r'("mapLastUpdatedTime":".*?")',tagstr)
|
||||
temp2 = str(temp1[0])
|
||||
time = temp2[22:len(temp2)-1:1]
|
||||
|
||||
|
||||
summary = Summary()
|
||||
summary.confirmed = int(tagJson['confirmed'])
|
||||
summary.confirmedRelative = int(tagJson['confirmedRelative'])
|
||||
summary.curConfirm = int(tagJson['curConfirm'])
|
||||
summary.curConfirmRelative = int(tagJson['curConfirmRelative'])
|
||||
summary.died = int(tagJson['died'])
|
||||
summary.diedRelative = int(tagJson['diedRelative'])
|
||||
summary.cured = int(tagJson['cured'])
|
||||
summary.curedRelative = int(tagJson['curedRelative'])
|
||||
summary.asymptomatic = int(tagJson['asymptomatic'])
|
||||
summary.asymptomaticRelative = int(tagJson['asymptomaticRelative'])
|
||||
summary.unconfirmed = int(tagJson['unconfirmed'])
|
||||
summary.unconfirmedRelative = int(tagJson['unconfirmedRelative'])
|
||||
summary.icu = int(tagJson['icu'])
|
||||
summary.icuRelative = int(tagJson['icuRelative'])
|
||||
summary.overseasInput = int(tagJson['overseasInput'])
|
||||
summary.overseasInputRelative = int(tagJson['overseasInputRelative'])
|
||||
summary.unOverseasInputCumulative = int(tagJson['unOverseasInputCumulative'])
|
||||
summary.unOverseasInputNewAdd = int(tagJson['unOverseasInputNewAdd'])
|
||||
summary.icuDisable = int(tagJson['icuDisable'])
|
||||
|
||||
|
||||
#print('---------------------------------------------------------')
|
||||
#print(' 更新时间:%s'%(time))
|
||||
#print('---------------------------------------------------------')
|
||||
#print(summary)
|
||||
|
Binary file not shown.
Binary file not shown.
@ -0,0 +1,98 @@
|
||||
import requests
|
||||
import re
|
||||
from bs4 import BeautifulSoup
|
||||
import json
|
||||
|
||||
class DataService:
|
||||
def __init__(self):
|
||||
self.url = 'https://voice.baidu.com/act/newpneumonia/newpneumonia'
|
||||
self.db = MyDB('localhost', 'root', 'lujian123','covid19_datas_guangxi')
|
||||
|
||||
|
||||
# 抓取网页
|
||||
def fetch_html_page(self):
|
||||
res = requests.get(self.url)
|
||||
res = res.content.decode('utf-8')
|
||||
return res
|
||||
|
||||
# 解析网页
|
||||
def parse_target_page(self, html):
|
||||
soup = BeautifulSoup(html)
|
||||
tag = soup.find('script', attrs={'id':'captain-config'})
|
||||
tagStr = tag.string
|
||||
tagDict = json.loads(tagStr)
|
||||
|
||||
# 提取数据更新时间
|
||||
self.pub_date = tagDict['component'][0]['mapLastUpdatedTime']
|
||||
|
||||
# 获取国内各省份各地级市的数据
|
||||
self.insideDatas = tagDict['component'][0]['caseList'] # [dict, dict, ....]
|
||||
|
||||
|
||||
# 处理字符串
|
||||
def process_str(self, s):
|
||||
ret = ''
|
||||
if s is None or s == '':
|
||||
ret ='0'
|
||||
else:
|
||||
ret = s
|
||||
return ret
|
||||
|
||||
|
||||
# 提取各个省份数据
|
||||
def fetch_province_datas(self):
|
||||
all_provinces = []
|
||||
for item in self.insideDatas:
|
||||
# item : dict
|
||||
province = Province()
|
||||
province.confirmed = int(item['confirmed'])
|
||||
province.died = int(item.get('died','0'))
|
||||
province.crued = int(item['crued'])
|
||||
province.relativeTime = int(item['relativeTime'])
|
||||
province.confirmedRelative = int(item['confirmedRelative'])
|
||||
province.diedRelative = int(item['diedRelative'])
|
||||
province.curedRelative = int(item['curedRelative'])
|
||||
province.asymptomaticRelative = int(self.process_str(item.get('asymptomaticRelative', '0')))
|
||||
province.asymptomatic = int(self.process_str(item.get('asymptomatic', '0')))
|
||||
province.curConfirm = int(item['curConfirm'])
|
||||
province.curConfirmRelative = int(item['curConfirmRelative'])
|
||||
province.icuDisable = int(item['icuDisable'])
|
||||
province.area = item['area']
|
||||
province.pub_date = self.pub_date
|
||||
|
||||
# 提取各个地级市的数据
|
||||
for cityItem in item['subList']: # subList: [dict, dict, ...]
|
||||
city = City()
|
||||
city.city = cityItem['city']
|
||||
city.confirmed = int(self.process_str(cityItem['confirmed']))
|
||||
city.died = int(self.process_str(cityItem.get('died','0')))
|
||||
city.crued = int(self.process_str(cityItem['crued']))
|
||||
city.confirmedRelative = int(self.process_str(cityItem['confirmedRelative']))
|
||||
city.curConfirm = int(self.process_str(cityItem.get('curConfirm','0')))
|
||||
city.pub_date = self.pub_date
|
||||
city.province = province.area
|
||||
province.subList.append(city)
|
||||
|
||||
all_provinces.append(province)
|
||||
return all_provinces
|
||||
|
||||
|
||||
# 提取内容生成对象
|
||||
def fetch_page_datas(self):
|
||||
all_provinces = self.fetch_province_datas()
|
||||
return all_provinces
|
||||
|
||||
# 业务函数
|
||||
def process_data(self):
|
||||
html = self.fetch_html_page()
|
||||
self.parse_target_page(html)
|
||||
all_provinces = self.fetch_page_datas()
|
||||
|
||||
# 保存省份数据
|
||||
self.db.save_province_datas(all_provinces)
|
||||
# 保存地级市数据
|
||||
self.db.save_city_datas(all_provinces)
|
||||
|
||||
# 创建Dataservice对象
|
||||
ds = DataService()
|
||||
ds.process_data()
|
@ -1,120 +0,0 @@
|
||||
class Province:
|
||||
def __init__(self):
|
||||
self.confirmed = 0#
|
||||
self.died = 0
|
||||
self.crued = 0
|
||||
self.confirmedRelative = 0
|
||||
self.diedRelative = 0
|
||||
self.curedRelative = 0
|
||||
self.asymptomaticRelative = 0
|
||||
self.asymptomatic = 0
|
||||
self.curConfirm = 0
|
||||
self.curConfirmRelative = 0
|
||||
self.area = ''
|
||||
self.pub_date = ''
|
||||
self.subList = []
|
||||
|
||||
def __str__(self):
|
||||
return '%s\n省份: %s\n累计确诊: %s(较昨日: %+d)\n累计死亡: %d(较昨日: %+d)\n\
|
||||
累计治愈: %d(较昨日: %+d)\n现有确诊: %d(较昨日: %+d) \n无症状感染者: %d(较昨日: %+d)\n'% (self.pub_date,self.area, self.confirmed,self.confirmedRelative,
|
||||
self.died,self.diedRelative,self.crued, self.curedRelative,
|
||||
self.curConfirm,self.curConfirmRelative,self.asymptomatic,self.asymptomaticRelative )
|
||||
|
||||
class City():
|
||||
def __init__(self):
|
||||
self.city = ''
|
||||
self.confirmed = 0
|
||||
self.died = 0
|
||||
self.crued = 0
|
||||
# self.confirmedRelative = 0
|
||||
self.curConfirm = 0
|
||||
|
||||
def __str__(self):
|
||||
return '%s\n累计确诊: %d\n累计死亡: %d\n累计治愈: \
|
||||
%d\n现存确诊: %d'% (self.city,self.confirmed, self.died,
|
||||
self.crued,self.curConfirm)
|
||||
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
import re
|
||||
import json
|
||||
|
||||
def getOrElse(a,key):
|
||||
ret = ''
|
||||
if a.get(key) != None:
|
||||
ret = a[key]
|
||||
else:
|
||||
ret = ''
|
||||
return ret
|
||||
|
||||
def processStr(s):
|
||||
ret = ''
|
||||
if s==None or s == '':
|
||||
ret = '0'
|
||||
else:
|
||||
ret = s
|
||||
return ret
|
||||
|
||||
def Print(a):
|
||||
for p in a:
|
||||
print(p)
|
||||
if len(p.subList) != 0:
|
||||
print('- - - - - - - - - - - - - - - - -' * 4)
|
||||
print('省内各地级市:\n')
|
||||
else:
|
||||
pass
|
||||
for c in p.subList:
|
||||
print(c)
|
||||
print()
|
||||
print('---------------------------------' * 4)
|
||||
|
||||
|
||||
res = requests.get('https://voice.baidu.com/act/newpneumonia/newpneumonia/')
|
||||
|
||||
res = res.text
|
||||
|
||||
soup = BeautifulSoup(res,'html.parser')
|
||||
|
||||
tag = soup.find('script',attrs={'id':'captain-config'})
|
||||
|
||||
tagstr = tag.string
|
||||
|
||||
tagjson = json.loads(tagstr)
|
||||
|
||||
dateinfos = tagjson['component']
|
||||
|
||||
insidedateinfos = dateinfos[0]['caseList']
|
||||
|
||||
pub_date = tagjson['component'][0]['mapLastUpdatedTime']
|
||||
|
||||
all_provinces = []
|
||||
for item in insidedateinfos:
|
||||
province = Province()
|
||||
province.confirmed = int(processStr(getOrElse(item,'confirmed')))
|
||||
province.died = int(processStr(getOrElse(item,'died')))
|
||||
province.crued = int(processStr(getOrElse(item,'crued')))
|
||||
province.confirmedRelative = int(processStr(getOrElse(item,'confirmedRelative')))
|
||||
province.diedRelative = int(processStr(getOrElse(item,'diedRelative')))
|
||||
province.curedRelative = int(processStr(getOrElse(item,'curedRelative')))
|
||||
province.asymptomaticRelative = int(processStr(getOrElse(item,'asymptomaticRelative')))
|
||||
province.asymptomatic = int(processStr(getOrElse(item,'asymptomatic')))
|
||||
province.curConfirm = int(processStr(getOrElse(item,'curConfirm')))
|
||||
province.curConfirmRelative = int(processStr(getOrElse(item,'curConfirmRelative')))
|
||||
province.pub_date = pub_date
|
||||
province.area = item['area']
|
||||
|
||||
sublist = item['subList']
|
||||
for cityitem in sublist:
|
||||
city = City()
|
||||
city.city = getOrElse(cityitem,'city')
|
||||
city.confirmed = int(processStr(getOrElse(cityitem,'confirmed')))
|
||||
city.died = int(processStr(getOrElse(cityitem, 'died')))
|
||||
city.crued = int(processStr(getOrElse(cityitem,'crued')))
|
||||
city.confirmedRelative = int(processStr(getOrElse(cityitem, 'confirmedRelative')))
|
||||
city.curConfirm = int(processStr(getOrElse(cityitem,'curConfirm')))
|
||||
province.subList.append(city)
|
||||
|
||||
all_provinces.append(province)
|
||||
|
||||
Print(all_provinces)
|
||||
|
@ -0,0 +1,12 @@
|
||||
create table city_daily_datas
|
||||
(
|
||||
Id INT primary key auto_increment not null comment '编号',
|
||||
city VARCHAR(4000) comment '城市名称',
|
||||
confirmedRelative INT comment '新增确诊',
|
||||
curConfirm INT comment '现有确诊',
|
||||
confirmed INT comment '累计确诊',
|
||||
died INT comment '累计死亡',
|
||||
crued INT comment '累计治愈出院',
|
||||
pub_date VARCHAR(4000) comment '发布日期',
|
||||
province VARCHAR(4000) comment '所属省份'
|
||||
) comment '各国内城市每日疫情数据';
|
@ -0,0 +1,73 @@
|
||||
import pymysql
|
||||
|
||||
class MyDB:
|
||||
def __init__(self, host, user, passwd, db):
|
||||
self.conn = pymysql.connect(host, user, passwd, db)
|
||||
self.cursor = self.conn.cursor()
|
||||
|
||||
def get_province_list_tuple(self, all_provinces):
|
||||
info_tuple = []
|
||||
for item in all_provinces:
|
||||
info_tuple.append(item.get_info_tuple())
|
||||
return info_tuple
|
||||
|
||||
def get_city_list_tuple(self):
|
||||
city_info_tuple = []
|
||||
for city in self.subList:
|
||||
city_info_tuple.append((city.city,city.confirmedRelative,city.confirmed,city.died,city.crued,city.pub_date,city.province))
|
||||
return city_info_tuple
|
||||
|
||||
# 保存省份数据
|
||||
def save_province_datas(self, all_provinces):
|
||||
curdate = all_provinces[0].pub_date
|
||||
sql = 'delete from province_daily_datas where pub_date like "%s"'%(curdate[:10] + '%')
|
||||
|
||||
try:
|
||||
self.cursor.execute(sql)
|
||||
self.conn.commit()
|
||||
except Exception as e:
|
||||
print(e)
|
||||
|
||||
|
||||
sql = 'insert into province_daily_datas(curConfirm,curConfirmRelative,confirmed,confirmedRelative,died,diedRelative,crued,curedRelative,area,asymptomatic,asymptomaticRelative,pub_date) values(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)'
|
||||
res = self.get_province_list_tuple(all_provinces)
|
||||
|
||||
print('+++ save_province_datas, data len: %d' % len(res))
|
||||
try:
|
||||
self.cursor.executemany(sql, res)
|
||||
self.conn.commit()
|
||||
except Exception as e:
|
||||
print(e)
|
||||
print('+++ save_province_datas is over.')
|
||||
|
||||
# 保存地级市数据
|
||||
def save_city_datas(self, all_provinces):
|
||||
curdate = all_provinces[0].pub_date
|
||||
sql = 'delete from city_daily_datas where pub_date like "%s"'%(curdate[:10] + '%')
|
||||
|
||||
try:
|
||||
self.cursor.execute(sql)
|
||||
self.conn.commit()
|
||||
except Exception as e:
|
||||
print(e)
|
||||
|
||||
|
||||
sql = 'insert into city_daily_datas(city,confirmedRelative,curConfirm,confirmed,died,crued,pub_date,province) values(%s,%s,%s,%s,%s,%s,%s,%s)'
|
||||
res = all_city_infos = []
|
||||
for province in all_provinces:
|
||||
city_info = province.get_city_info_tuple()
|
||||
all_city_infos += city_info
|
||||
|
||||
print('+++ save_city_datas, data len: %d' % len(res))
|
||||
try:
|
||||
self.cursor.executemany(sql, res)
|
||||
self.conn.commit()
|
||||
except Exception as e:
|
||||
print(e)
|
||||
print('+++ save_city_datas is over.')
|
||||
|
||||
|
||||
|
||||
def __del__(self):
|
||||
if self.conn is not None:
|
||||
self.conn.close()
|
@ -0,0 +1,52 @@
|
||||
# 地级市实体类
|
||||
class City:
|
||||
def __init__(self):
|
||||
self.city = ''
|
||||
self.confirmed = 0
|
||||
self.died = 0
|
||||
self.crued = 0
|
||||
self.confirmedRelative = 0
|
||||
self.curConfirm = 0
|
||||
self.pub_date = ''
|
||||
self.province = ''
|
||||
|
||||
def __str__(self):
|
||||
return 'city: %s,confirmed: %d,died: %d,crued: %d,confirmedRelative: %d,curConfirm: %d,pub_date:%s' % (self.city, self.confirmed, self.died, self.crued, self.confirmedRelative, self.curConfirm, self.pub_date)
|
||||
|
||||
|
||||
|
||||
# 国内省份疫情实体类
|
||||
class Province:
|
||||
def __init__(self):
|
||||
self.confirmed = 0
|
||||
self.died = 0
|
||||
self.crued = 0
|
||||
self.relativeTime = 0
|
||||
self.confirmedRelative = 0
|
||||
self.diedRelative = 0
|
||||
self.curedRelative = 0
|
||||
self.asymptomaticRelative = 0
|
||||
self.asymptomatic = 0
|
||||
self.curConfirm = 0
|
||||
self.curConfirmRelative = 0
|
||||
self.icuDisable = 0
|
||||
self.area = ''
|
||||
self.pub_date = ''
|
||||
self.subList = []
|
||||
|
||||
def __str__(self):
|
||||
return 'area: %s, confirmed:%s, died: %d, crued: %d, relativeTime: %d, confirmedRelative: %d, diedRelative: %d, \
|
||||
curedRelative: %d, asymptomaticRelative: %d, curConfirm: %d, curConfirmRelative: %d, icuDisable: %d, pub_date: %s' % \
|
||||
(self.area, self.confirmed, self.died, self.crued, self.relativeTime, self.confirmedRelative, \
|
||||
self.diedRelative, self.curedRelative, self.asymptomaticRelative, self.curConfirm, \
|
||||
self.curConfirmRelative, self.icuDisable, self.pub_date)
|
||||
|
||||
def get_info_tuple(self):
|
||||
return ((self.curConfirm,self.curConfirmRelative,self.confirmed,self.confirmedRelative,self.died,self.diedRelative,self.crued,self.curedRelative,self.area,self.asymptomatic,self.asymptomaticRelative,self.pub_date))
|
||||
|
||||
def get_city_info_tuple(self):
|
||||
city_info_list = []
|
||||
for city in self.subList:
|
||||
city_info_list.append((city.city,city.confirmedRelative,city.curConfirm,city.confirmed,city.died,city.crued,city.pub_date,city.province))
|
||||
return city_info_list
|
||||
|
@ -0,0 +1,16 @@
|
||||
create table province_daily_datas
|
||||
(
|
||||
Id INT primary key auto_increment not null comment '编号',
|
||||
curConfirm INT comment '现有确诊',
|
||||
curConfirmRelative INT comment '较昨日现有确诊新增',
|
||||
confirmed INT comment '累计确诊',
|
||||
confirmedRelative INT comment '新增确诊',
|
||||
died INT comment '累计死亡',
|
||||
diedRelative INT comment '较昨日累计死亡新增',
|
||||
crued INT comment '累计治愈出院',
|
||||
curedRelative INT comment '较昨日累计治愈新增',
|
||||
area VARCHAR(4000) comment '省份名称',
|
||||
asymptomatic INT comment '无症状感染者数',
|
||||
asymptomaticRelative INT comment '较昨日无症状感染新增',
|
||||
pub_date VARCHAR(4000) comment '发布日期'
|
||||
) comment '各省份每日疫情数据';
|
Loading…
Reference in new issue