diff --git a/数据存储/.idea/workspace.xml b/数据存储/.idea/workspace.xml index d0a892f..9e3a45f 100644 --- a/数据存储/.idea/workspace.xml +++ b/数据存储/.idea/workspace.xml @@ -2,8 +2,10 @@ - - + + + + - + - - + + - - + + - - + + - + diff --git a/数据存储/DataFrame.py b/数据存储/DataFrame.py index 9c651c0..fb00969 100644 --- a/数据存储/DataFrame.py +++ b/数据存储/DataFrame.py @@ -139,6 +139,7 @@ class SaveToDB(): curser.close() self.conn.close() + #存储国内各省疫情信息 def InsertIntoProvince(self,data): #param data list(tuple),每个元组即一个省的信息 #首先删掉数据库中当日的数据 @@ -156,6 +157,7 @@ class SaveToDB(): cursor.execute(sql) cursor.close() + #存储国内城市的疫情信息 def InsertToCity(self,data): #首先删掉数据库中当日的数据 cursor=self.conn.cursor() @@ -171,6 +173,7 @@ class SaveToDB(): cursor.execute(sql) cursor.close() + #存储外国所有国家疫情信息 def InsertForeignCountry(self,data): #首先删掉数据库中当日的数据 cursor=self.conn.cursor() @@ -189,6 +192,39 @@ class SaveToDB(): pass cursor.close() + #存储国内疫情汇总 + def InsertSummaryChina(self,data): + # 首先删掉数据库中当日的数据 + cursor = self.conn.cursor() + insertTime = data[-1] + # 删掉同一天的数据 + deletesql = 'delete from InsideChina where Date(updateTime)=Date("%s");' % (insertTime) + cursor.execute(deletesql) + # 设置ID从最小开始 + self.__rule('InsideChina') + sql = 'insert into InsideChina(curConfirm,curConfirmRelative,asymptomatic,asymptomaticRelative,' \ + 'unconfirmed,unconfirmedRelative,icu,icuRelative,confirmed,confirmedRelative,' \ + 'overseasInput,overseasInputRelative,cured,curedRelative,died,diedRelative,updateTime) ' \ + 'values(%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,"%s");' % (data) + cursor.execute(sql) + cursor.close() + + #存储国外疫情汇总 + def InsertSummaryForeign(self,data): + # 首先删掉数据库中当日的数据 + cursor = self.conn.cursor() + insertTime = data[-1] + # 删掉同一天的数据 + deletesql = 'delete from OutsideChina where Date(updateTime)=Date("%s");' % (insertTime) + cursor.execute(deletesql) + # 设置ID从最小开始 + self.__rule('OutsideChina') + sql = 'insert into OutsideChina(confirmed,curConfirmed,confirmedRelative,' \ + 'cured,curedRelative,died,diedRelative,updateTime) ' \ + 'values(%d,%d,%d,%d,%d,%d,%d,"%s");' % (data) + cursor.execute(sql) + cursor.close() + #删除某些元组后ID可能出现中断,手动调整ID连续从最小ID开始 diff --git a/数据存储/GrapData.py b/数据存储/GrapData.py index d133b63..63eda0b 100644 --- a/数据存储/GrapData.py +++ b/数据存储/GrapData.py @@ -148,6 +148,9 @@ class Spider(): Outside.diedRelative=int(DataOut['diedRelative']) Outside.updatedTime=time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(time.time())) print(Outside) + return (Outside.confirmed,Outside.curConfirm,Outside.confirmedRelative, + Outside.cured,Outside.curedRelative,Outside.died,Outside.diedRelative, + Outside.updatedTime) #抓取国内疫情信息概况 @@ -183,6 +186,11 @@ class Spider(): Inside.curConfirmRelative=int(DataIn['curConfirmRelative']) Inside.updatedTime=time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(time.time())) print(Inside) + return (Inside.curConfirm,Inside.curConfirmRelative,Inside.asymptomatic,Inside.asymptomaticRelative, + Inside.unconfirmed,Inside.unconfirmedRelative,Inside.icu,Inside.icuRelative,Inside.confirmed, + Inside.confirmedRelative,Inside.overseasInput,Inside.overseasInputRelative,Inside.cured, + Inside.curConfirmRelative,Inside.died,Inside.diedRelative,Inside.updatedTime) + def dealData(self,a,b): diff --git a/数据存储/Grapmain.py b/数据存储/Grapmain.py index aeb7c80..f2029dc 100644 --- a/数据存储/Grapmain.py +++ b/数据存储/Grapmain.py @@ -3,10 +3,14 @@ from GrapData import Spider,SaveToDB def main(): spider=Spider() spider.url='https://voice.baidu.com/act/newpneumonia/newpneumonia/' - spider.grapProvince() + #spider.grapProvince() savedb=SaveToDB() - foreignData=spider.grapForeign() - savedb.InsertForeignCountry(foreignData) + #foreignData=spider.grapForeign() + #国内疫情概述 + #InsideData=spider.grapSummaryChina() + #国外疫情汇总 + OutsideData=spider.grapSummaryForeign() + savedb.InsertSummaryForeign(OutsideData) if __name__ == '__main__': main() \ No newline at end of file diff --git a/数据存储/__pycache__/DataFrame.cpython-36.pyc b/数据存储/__pycache__/DataFrame.cpython-36.pyc index 92bd349..b81aae0 100644 Binary files a/数据存储/__pycache__/DataFrame.cpython-36.pyc and b/数据存储/__pycache__/DataFrame.cpython-36.pyc differ diff --git a/数据存储/__pycache__/GrapData.cpython-36.pyc b/数据存储/__pycache__/GrapData.cpython-36.pyc index e830c00..14c5f23 100644 Binary files a/数据存储/__pycache__/GrapData.cpython-36.pyc and b/数据存储/__pycache__/GrapData.cpython-36.pyc differ