import time
import requests
from bs4 import BeautifulSoup
import re
import json
import datetime
import pymysql
def Inside ( ds ) :
class Province :
def __init__ ( self ) :
self . provinceName = ' '
self . provinceShortName = ' '
self . currentConfirmedCount = 0 #现有确诊病例数
self . confirmedCount = 0 #累计确诊
self . suspectedCount = 0 #疑似病例
self . curedCount = 0 #累计治愈
self . deadCount = 0 #累计死亡
self . pub_time = datetime . datetime . now ( ) . strftime ( ' % Y- % m- %d % H: % M: % S ' ) #时间
self . cities = [ ]
def __str__ ( self ) :
return ' provinceName: %s provinceShortName: %s currentConfirmedCount: %d \
confirmedCount : % d suspectedCount : % d curedCount : % d deadCount : % d ' % (self.provinceName,self.provinceShortName,self.currentConfirmedCount,self.confirmedCount,self.suspectedCount,self.curedCount,self.deadCount)
def get_info_tuple ( self ) :
return ( ( self . provinceName , self . provinceShortName , self . currentConfirmedCount , self . confirmedCount , self . suspectedCount , self . curedCount , self . deadCount , self . pub_time ) )
class City :
def __init__ ( self ) :
self . cityName = ' '
self . currentConfirmedCount = 0
self . confirmedCount = 0
self . suspectedCount = 0
self . curedCount = 0
self . deadCount = 0
self . locationId = 0
self . province = ' '
self . pub_time = datetime . datetime . now ( ) . strftime ( ' % Y- % m- %d % H: % M: % S ' ) #时间
def __str__ ( self ) :
return ' cityName: %s , currentConfirmedCount: %d , confirmedCount: %d , suspectedCount: %d , \
curedCount : % d , deadCount : % d , locationId : % d , pub_time : % s , province : % s ' % (self.cityName, self.currentConfirmedCount, self.confirmedCount, self.suspectedCount, self.curedCount, self.deadCount, self.locationId,self.pub_time,self.province)
def get_info_tuple ( self ) :
return ( ( self . cityName , self . currentConfirmedCount , self . confirmedCount , self . suspectedCount , self . curedCount , self . deadCount , self . locationId , self . province , self . pub_time ) )
class MyDB :
def __init__ ( self , host , user , passwd , db ) :
self . conn = pymysql . connect ( host , user , passwd , db )
self . cursor = self . conn . cursor ( )
def get_province_list_tuple ( self , all_province ) :
info_tuple = [ ]
for item in all_province :
info_tuple . append ( item . get_info_tuple ( ) )
return info_tuple
def get_city_list_tuple ( self , all_city ) :
info_tuple = [ ]
for item in all_city :
info_tuple . append ( item . get_info_tuple ( ) )
return info_tuple
#保存省份数据
def save_province_datas ( self , all_province ) :
date1 = datetime . datetime . now ( ) . strftime ( ' % Y- % m- %d ' )
sql1 = ' delete from province_daily_datas where pub_time like " %s " ' % ( date1 + ' % ' )
print ( sql1 )
try :
self . cursor . execute ( sql1 )
self . conn . commit ( )
print ( " 之前省份删除成功 " )
except Exception as a :
print ( a )
sql = ' insert into province_daily_datas(provinceName,provinceShortName,currentConfirmedCount,confirmedCount,suspectedCount,curedCount,deadCount,pub_time) \
values ( % s , % s , % s , % s , % s , % s , % s , % s ) '
res = self . get_province_list_tuple ( all_province )
print ( " +++++++ save_province_datas, datas len: %d " % ( len ( res ) ) )
try :
self . cursor . executemany ( sql , res )
self . conn . commit ( )
except Exception as e :
print ( e )
print ( " ??? " )
print ( " ++++++++++++ save_province_datas is over " )
#保存城市数据
def save_city_datas ( self , all_city ) :
date2 = datetime . datetime . now ( ) . strftime ( ' % Y- % m- %d ' )
sql2 = ' delete from city_daily_datas where pub_time like " %s " ' % ( date2 + ' % ' )
try :
self . cursor . execute ( sql2 )
print ( ( date2 + " % " ) )
self . conn . commit ( )
print ( " 之前城市删除成功 " )
except Exception as a :
print ( a )
sql = ' insert into city_daily_datas(cityName,currentConfirmedCount,confirmedCount,suspectedCount,curedCount,deadCount,locationId,province,pub_time) \
values ( % s , % s , % s , % s , % s , % s , % s , % s , % s ) '
res = self . get_city_list_tuple ( all_city )
print ( " +++++++ save_city_daily_datas, datas len: %d " % ( len ( res ) ) )
try :
self . cursor . executemany ( sql , res )
self . conn . commit ( )
except Exception as e :
print ( e )
print ( " ??? " )
print ( " ++++++++++++ save_city_daily_datas is over " )
def __del__ ( self ) :
if self . conn is not None :
self . conn . close ( )
class DataService :
def __init__ ( self , ds ) :
self . url = ' https://ncov.dxy.cn/ncovh5/view/pneumonia '
self . db = MyDB ( host = ds [ 0 ] , user = ds [ 1 ] , passwd = ds [ 2 ] , db = ds [ 3 ] )
#抓取网页
def fetch_html_page ( self ) :
res = requests . get ( self . url )
res = res . content . decode ( ' utf-8 ' )
return res
#解析网页
def parse_html_page ( self , html ) :
soup = BeautifulSoup ( html , ' html.parser ' )
tag = soup . find ( ' script ' , attrs = { ' id ' : ' getAreaStat ' } )
tagstr = tag . string
self . results = re . findall ( ' \ { " provinceName " :.*? " cities " :.*? \ ] \ } ' , tagstr )
#提取各个省份及其城市数据
def fetch_province_datas ( self ) :
all_province = [ ]
all_city = [ ]
province_name = ' '
for item in self . results :
province = Province ( )
obj = json . loads ( item )
province . provinceName = obj [ " provinceName " ]
#提取省份名, 放入city()
province_name = province . provinceName
province . provinceShortName = obj [ " provinceShortName " ]
province . currentConfirmedCount = obj [ " currentConfirmedCount " ]
province . confirmedCount = obj [ " confirmedCount " ]
province . suspectedCount = obj [ " suspectedCount " ]
province . curedCount = obj [ " curedCount " ]
province . deadCount = obj [ " deadCount " ]
#提取城市数据
cities = obj [ " cities " ]
for cityItem in cities :
# print(cityItem)
city = City ( )
city . province = province_name
city . cityName = cityItem [ " cityName " ]
city . currentConfirmedCount = cityItem [ " currentConfirmedCount " ]
city . confirmedCount = cityItem [ " confirmedCount " ]
city . suspectedCount = cityItem [ " suspectedCount " ]
city . curedCount = cityItem [ " curedCount " ]
city . deadCount = cityItem [ " deadCount " ]
city . locationId = cityItem [ " locationId " ]
all_city . append ( city )
province . cities . append ( city )
all_province . append ( province )
return all_province , all_city
#业务函数
def process_data ( self ) :
html = self . fetch_html_page ( )
self . parse_html_page ( html )
all_province , all_city = self . fetch_province_datas ( )
# # 保存省份数据
self . db . save_province_datas ( all_province )
# 保存城市数据
self . db . save_city_datas ( all_city )
# 创建Dataservice对象
ds = DataService ( ds )
ds . process_data ( )
def Outside ( ds ) :
class Country :
def __init__ ( self ) :
self . countryName = ' '
self . currentConfirmedCount = 0 # 现有确诊病例数
self . confirmedCount = 0 # 累计确诊
self . confirmedCountRank = 0 # 累计确诊排名
self . curedCount = 0 # 累计治愈
self . deadCount = 0 # 累计死亡
self . deadCountRank = 0 # 累计死亡排名
self . deadRate = 0.0 # 死亡率
self . deadRateRank = 0 # 死亡率排名
self . updatedTime = datetime . datetime . now ( ) . strftime ( ' % Y- % m- %d % H: % M: % S ' )
def get_info_tuple ( self ) :
return ( self . countryName , self . currentConfirmedCount , self . confirmedCount , self . confirmedCountRank , self . curedCount , self . deadCount , self . deadCountRank , self . deadRate , self . deadRateRank , self . updatedTime )
def __str__ ( self ) :
return ' countryName: %s ,currentConfirmedCount: %d ,confirmedCount: %d , \
confirmedCountRankt : % d , curedCount : % d , deadCount : % d , deadCountRank : % d , deadRate : % d , deadRateRank : % d , updatedTime : % s ' % (self.countryName, self.currentConfirmedCount, self.confirmedCount, self.confirmedCountRank, self.curedCount, self.deadCount, self.deadCountRank, self.deadRate, self.deadRateRank,self.updatedTime)
class MyDB :
def __init__ ( self , host , user , passwd , db ) :
self . conn = pymysql . connect ( host , user , passwd , db )
self . cursor = self . conn . cursor ( )
def get_country_list_tuple ( self , all_country ) :
info_tuple = [ ]
for item in all_country :
info_tuple . append ( item . get_info_tuple ( ) )
return info_tuple
# 保存数据
def save_country_datas ( self , all_country ) :
date = datetime . datetime . now ( ) . strftime ( ' % Y- % m- %d ' )
sql = ' delete from country_daily_datas where pub_time like " %s " ' % ( date + " % " )
try :
self . cursor . execute ( sql )
self . conn . commit ( )
except Exception as e :
print ( e )
sql = ' insert into country_daily_datas(countryName,currentConfirmedCount, \
confirmedCount , confirmedCountRank , curedCount , deadCount , deadCountRank , deadRate , deadRateRank , pub_time ) values ( % s , % s , % s , % s , % s , % s , % s , % s , % s , % s ) '
res = self . get_country_list_tuple ( all_country )
print ( ' +++ save_country_datas, data len: %d ' % len ( res ) )
try :
self . cursor . executemany ( sql , res )
self . conn . commit ( )
except Exception as e :
print ( e )
print ( ' +++ save_country_datas is over. ' )
def show_country_datas ( self ) :
self . cursor . execute ( ' select * from country_daily_datas ' )
def __del__ ( self ) :
if self . conn is not None :
self . conn . close ( )
def forign_data_search ( ds ) :
db = MyDB ( host = ds [ 0 ] , user = ds [ 1 ] , passwd = ds [ 2 ] , db = ds [ 3 ] )
res = requests . get ( ' https://ncov.dxy.cn/ncovh5/view/pneumonia ' ) # 爬取页面
res = res . content . decode ( ' utf-8 ' ) # 重新解码
soup = BeautifulSoup ( res , ' html.parser ' ) # 构建soup对象
tag = soup . find ( ' script ' , attrs = { ' id ' : ' getListByCountryTypeService2true ' } ) # Tag# 使用soup对象查找实时播报新闻标签
tagStr = tag . string # 获取内容
results = re . findall ( ' \ { " id " .*? " showRank " .*? \ } ' , tagStr ) # length: 34, [str, str, ....]# 使用正则表达式匹配
all_country = [ ]
for item in results :
country = Country ( )
obj = json . loads ( item ) # obj -> dict
country . countryName = obj [ ' provinceName ' ]
country . currentConfirmedCount = int ( obj [ ' currentConfirmedCount ' ] )
country . confirmedCount = int ( obj [ ' confirmedCount ' ] )
country . curedCount = int ( obj [ ' curedCount ' ] )
country . deadCount = int ( obj [ ' deadCount ' ] )
country . deadRate = float ( obj [ ' deadRate ' ] )
country . updatedTime = datetime . datetime . now ( ) . strftime ( ' % Y- % m- %d % H: % M: % S ' )
try :
country . deadCountRank = int ( obj [ ' deadCountRank ' ] )
country . deadRateRank = int ( obj [ ' deadRateRank ' ] )
country . confirmedCountRank = int ( obj [ ' confirmedCountRank ' ] )
except KeyError :
country . deadCountRank = 0
country . deadRateRank = 0
country . confirmedCountRank = 0
finally :
all_country . append ( country )
db . save_country_datas ( all_country )
forign_data_search ( ds )
def OutsideSummary ( dsin ) :
class OutsideSummary :
def __init__ ( self ) :
self . currentConfirmedCount = 0
self . confirmedCount = 0
self . suspectedCount = 0
self . curedCount = 0
self . deadCount = 0
self . suspectedIncr = 0
self . currentConfirmedIncr = 0
self . confirmedIncr = 0
self . curedIncr = 0
self . deadIncr = 0
self . updatedTime = datetime . datetime . now ( ) . strftime ( ' % Y- % m- %d % H: % M: % S ' )
def get_info_tuple ( self ) :
return ( self . currentConfirmedCount , self . confirmedCount , self . suspectedCount , self . curedCount ,
self . deadCount , self . suspectedIncr , self . currentConfirmedIncr , self . confirmedIncr , self . curedIncr ,
self . deadIncr , self . updatedTime )
def __str__ ( self ) :
return ' currentConfirmedCount: %s , confirmedCount: %s , suspectedCount: %s , curedCount: %s , deadCount: %s , suspectedIncr: %s , currentConfirmedIncr: %s , confirmedIncr: %s , curedIncr: %s , deadIncr: %s , updatedTime: %s ' % (
self . currentConfirmedCount , self . confirmedCount , self . suspectedCount , self . curedCount , self . deadCount ,
self . suspectedIncr , self . currentConfirmedIncr , self . confirmedIncr , self . curedIncr , self . deadIncr , self . updatedTime )
# 数据库实体类
#数据库实体类
import pymysql
class MyDB :
def __init__ ( self , host , user , passwd , db ) :
self . conn = pymysql . connect ( host , user , passwd , db )
self . cursor = self . conn . cursor ( )
def get_outsideSummary_list_tuple ( self , outsideSummary ) :
info_tuple = [ ]
info_tuple . append ( outsideSummary . get_info_tuple ( ) )
return info_tuple
# 保存数据
def save_outsideSummary_datas ( self , outsideSummary ) :
date = datetime . datetime . now ( ) . strftime ( ' % Y- % m- %d ' )
print ( ' +++ [MyDB] delete from outsideSummary_realtime_datas ' )
self . cursor . execute ( ' delete from outsidesummary_realtime_datas where updatedTime like " %s " ' % ( date + ' % ' ) )
self . conn . commit ( )
sql = ' insert into outsidesummary_realtime_datas(currentConfirmedCount,confirmedCount,suspectedCount,curedCount,deadCount,suspectedIncr,currentConfirmedIncr,confirmedIncr,curedIncr,deadIncr,updatedTime) values( %s , %s , %s , %s , %s , %s , %s , %s , %s , %s , %s ) '
res = self . get_outsideSummary_list_tuple ( outsideSummary )
print ( ' +++ save_outsideSummary_datas, data len: %d ' % len ( res ) )
try :
self . cursor . executemany ( sql , res )
self . conn . commit ( )
except Exception as e :
print ( e )
print ( ' +++ save_outsideSummary_datas is over. ' )
def __del__ ( self ) :
if self . conn is not None :
self . conn . close ( )
# 业务逻辑类
#业务逻辑类
import datetime
import requests
import re
from bs4 import BeautifulSoup
import json
class DataService :
def __init__ ( self , ds ) :
self . db = MyDB ( host = ds [ 0 ] , user = ds [ 1 ] , passwd = ds [ 2 ] , db = ds [ 3 ] )
# 爬取页面
res = requests . get ( ' https://ncov.dxy.cn/ncovh5/view/pneumonia ' )
# 重新解码
res = res . content . decode ( ' utf-8 ' )
# 构建soup对象
soup = BeautifulSoup ( res , ' html.parser ' )
# 使用soup对象查找国外疫情数据标签
tag = soup . find ( ' script ' , attrs = { ' id ' : ' getStatisticsService ' } )
# 转成字符串
tagstr = tag . string
# 使用正则表达式查找所有内容
result = re . findall ( ' \ { " currentConfirmedCount " .*? " deadIncr " .*? \ } ' , tagstr )
# 获取国外疫情数据
print ( tagstr )
obj = json . loads ( result [ 0 ] )
#print(obj)
def fetch_outside_summary ( obj ) :
outsideSummary = OutsideSummary ( )
outsideSummary . currentConfirmedCount = int ( obj [ ' currentConfirmedCount ' ] )
outsideSummary . confirmedCount = int ( obj [ ' confirmedCount ' ] )
outsideSummary . suspectedCount = int ( obj [ ' suspectedCount ' ] )
outsideSummary . curedCount = int ( obj [ ' curedCount ' ] )
outsideSummary . deadCount = int ( obj [ ' deadCount ' ] )
outsideSummary . suspectedIncr = int ( obj [ ' suspectedIncr ' ] )
outsideSummary . currentConfirmedIncr = int ( obj [ ' currentConfirmedIncr ' ] )
outsideSummary . confirmedIncr = int ( obj [ ' confirmedIncr ' ] )
outsideSummary . curedIncr = int ( obj [ ' curedIncr ' ] )
outsideSummary . deadIncr = int ( obj [ ' deadIncr ' ] )
outsideSummary . updatedTime = datetime . datetime . now ( ) . strftime ( ' % Y- % m- %d % H: % M: % S ' )
return outsideSummary
# 创建Dataservice对象
ds = DataService ( dsin )
outsideSummary = fetch_outside_summary ( obj )
ds . db . save_outsideSummary_datas ( outsideSummary )
def InsideSummary ( ds ) :
class class_InsideSummary :
def __init__ ( self ) :
self . curConfirm = 0 # 现有确诊
self . curConfirmRelative = 0 # 较昨日新增确诊
self . asymptomatic = 0 # 无症状感染
self . asymptomaticRelative = 0 # 较昨日新增无症状感染
self . unconfirmed = 0 # 现有疑似
self . unconfirmedRelative = 0 # 较昨日疑似新增
self . icu = 0 # 现有重症
self . icuRelative = 0 # 较昨日重症病例新增
self . confirmed = 0 # 累计确诊
self . confirmedRelative = 0 # 较昨日累计确诊新增
self . overseasInput = 0 # 累计境外输入
self . overseasInputRelative = 0 # 较昨日累计境外输入新 增
self . cured = 0 # 累计治愈
self . curedRelative = 0 # 较昨日累计治愈新增
self . died = 0 # 累计死亡
self . diedRelative = 0 # 较昨日累计死亡新增
self . updatedTime = 0 # 发布时间
# 返回元组
def get_inside_summary_tuple ( self ) :
return ( ( self . curConfirm , self . curConfirmRelative , self . asymptomatic , self . asymptomaticRelative , \
self . unconfirmed , self . unconfirmedRelative , self . icu , self . icuRelative , self . confirmed , \
self . confirmedRelative , self . overseasInput , self . overseasInputRelative , self . cured ,
self . curedRelative , \
self . died , self . diedRelative , self . updatedTime ) )
# 输出接口
def __str__ ( self ) :
return ' %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s ' % (
self . curConfirm , self . curConfirmRelative , self . asymptomatic , self . asymptomaticRelative ,
self . unconfirmed ,
self . unconfirmedRelative , self . icu , self . icuRelative , self . confirmed , self . confirmedRelative ,
self . overseasInput , self . overseasInputRelative , self . cured , self . curedRelative , self . died ,
self . diedRelative ,
self . updatedTime )
def get_text ( ) : # 爬取国内疫情数据文本
res = requests . get ( ' https://view.inews.qq.com/g2/getOnsInfo?name=disease_h5 ' )
res = res . content . decode ( ' utf-8 ' )
dict = json . loads ( res ) # str->dict
# 规范数据
for key in dict :
try :
dict [ key ] = dict [ key ] . replace ( ' \\ ' , ' ' )
key = key . replace ( ' \\ ' , ' ' )
except :
pass
data = json . loads ( dict [ ' data ' ] )
return data
# 国内疫情数据赋值
def fetch_inside_summary ( ) :
dataf = get_text ( )
insideSummary = class_InsideSummary ( )
insideSummary . curConfirm = int ( dataf [ ' chinaTotal ' ] [ ' nowConfirm ' ] )
insideSummary . curConfirmRelative = int ( dataf [ ' chinaAdd ' ] [ ' nowConfirm ' ] )
insideSummary . asymptomatic = int ( dataf [ ' chinaTotal ' ] [ ' noInfect ' ] )
insideSummary . asymptomaticRelative = int ( dataf [ ' chinaAdd ' ] [ ' noInfect ' ] )
insideSummary . unconfirmed = int ( dataf [ ' chinaTotal ' ] [ ' suspect ' ] )
insideSummary . unconfirmedRelative = int ( dataf [ ' chinaAdd ' ] [ ' suspect ' ] )
insideSummary . icu = int ( dataf [ ' chinaTotal ' ] [ ' nowSevere ' ] )
insideSummary . icuRelative = int ( dataf [ ' chinaAdd ' ] [ ' nowSevere ' ] )
insideSummary . confirmed = int ( dataf [ ' chinaTotal ' ] [ ' confirm ' ] )
insideSummary . updatedTime = dataf [ ' lastUpdateTime ' ]
insideSummary . confirmedRelative = int ( dataf [ ' chinaAdd ' ] [ ' confirm ' ] )
insideSummary . overseasInput = int ( dataf [ ' chinaTotal ' ] [ ' importedCase ' ] )
insideSummary . overseasInputRelative = int ( dataf [ ' chinaAdd ' ] [ ' importedCase ' ] )
insideSummary . cured = int ( dataf [ ' chinaTotal ' ] [ ' heal ' ] )
insideSummary . curedRelative = int ( dataf [ ' chinaAdd ' ] [ ' heal ' ] )
insideSummary . died = int ( dataf [ ' chinaTotal ' ] [ ' dead ' ] )
insideSummary . diedRelative = int ( dataf [ ' chinaAdd ' ] [ ' dead ' ] )
return insideSummary
# 保存国内疫情概况数据
def insert ( res ) :
# 创建连接,并且返回连接对象
conn = pymysql . connect ( host = ' localhost ' , user = ' root ' , db = ' covid19 ' , passwd = ' kfq991122 ' )
# 创建游标对象
cursor = conn . cursor ( )
date = data [ ' lastUpdateTime ' ]
sql = ' delete from home_realtime_datas where updatedTime like " %s " ' % ( date + ' % ' )
print ( ' delete old_insidesummary_datas successful ' )
try :
cursor . execute ( sql )
conn . commit ( )
except Exception as e :
print ( e )
sql = ' insert into home_realtime_datas(curConfirm,curConfirmRelative,asymptomatic,asymptomaticRelative,unconfirmed,unconfirmedRelative,icu,icuRelative,confirmed,confirmedRelative,overseasInput,overseasInputRelative,cured,curedRelative,died,diedRelative,updatedTime) values( %s , %s , %s , %s , %s , %s , %s , %s , %s , %s , %s , %s , %s , %s , %s , %s , %s ) '
try :
cursor . execute ( sql , res )
conn . commit ( )
print ( " +++ save_insidesummary_datas successful " )
print ( ' +++ save_outsideSummary_datas, data len: %d ' % len ( res ) )
except Exception as e :
print ( e )
print ( ' +++ save_insidesummary_datas fail. ' )
print ( ' +++ save_insidesummary_datas is over. ' )
cursor . close ( )
conn . close ( )
########InsideSummary__main__####################
data = get_text ( )
insert ( fetch_inside_summary ( ) . get_inside_summary_tuple ( ) )
ds = [ ' localhost ' , ' root ' , ' kfq991122 ' , ' covid19 ' ]
while ( True ) :
Inside ( ds )
InsideSummary ( ds )
Outside ( ds )
OutsideSummary ( ds )
time . sleep ( 3600 )