diff --git a/数据存储/.idea/workspace.xml b/数据存储/.idea/workspace.xml index 9e3a45f..f59b7ec 100644 --- a/数据存储/.idea/workspace.xml +++ b/数据存储/.idea/workspace.xml @@ -3,7 +3,6 @@ - @@ -80,22 +79,26 @@ - + - - + + - - + + - - + + - + + + + + diff --git a/数据存储/GrapData.py b/数据存储/GrapData.py index 63eda0b..7116a1a 100644 --- a/数据存储/GrapData.py +++ b/数据存储/GrapData.py @@ -66,22 +66,10 @@ class Spider(): citysinfo.city=cj['city'] citysinfo.confirmed=int(cj['confirmed']) #需要对缺失数据进行处理 - if cj['died']=='': - citysinfo.died=0 - else: - citysinfo.died=int(cj['died']) - if cj['crued']=='': - citysinfo.cured=0 - else: - citysinfo.cured=int(cj['crued']) - citysinfo.confirmedRelative=int(cj['confirmedRelative']) - try: - if cj['curConfirm']=='': - citysinfo.curConfirm=0 - else: - citysinfo.curConfirm=int(cj['curConfirm']) - except: - citysinfo.curConfirm = 0 + citysinfo.died=int(self.dealData(cj,'died')) + citysinfo.cured=int(self.dealData(cj,'crued')) + citysinfo.confirmedRelative=int(self.dealData(cj,'confirmedRelative')) + citysinfo.curConfirm=int(self.dealData(cj,'curConfirm')) province.subList.append(citysinfo) ctuple=(citysinfo.city,province.area,citysinfo.confirmed,citysinfo.died,citysinfo.cured,citysinfo.confirmed, citysinfo.curConfirm,province.pub_date) @@ -122,8 +110,6 @@ class Spider(): foreignTuples.append(ftuples) return foreignTuples - - #抓取国外疫情信息概况 def grapSummaryForeign(self): data=requests.get(self.url,headers=self.headers) @@ -152,7 +138,6 @@ class Spider(): Outside.cured,Outside.curedRelative,Outside.died,Outside.diedRelative, Outside.updatedTime) - #抓取国内疫情信息概况 def grapSummaryChina(self): data=requests.get(self.url,headers=self.headers) diff --git a/数据存储/Grapmain.py b/数据存储/Grapmain.py index f2029dc..7f819e9 100644 --- a/数据存储/Grapmain.py +++ b/数据存储/Grapmain.py @@ -1,16 +1,24 @@ from GrapData import Spider,SaveToDB -def main(): +#抓取所有数据填入数据库 +def updateDB(): spider=Spider() + saveDb=SaveToDB() spider.url='https://voice.baidu.com/act/newpneumonia/newpneumonia/' - #spider.grapProvince() - savedb=SaveToDB() - #foreignData=spider.grapForeign() - #国内疫情概述 - #InsideData=spider.grapSummaryChina() - #国外疫情汇总 + proData,cityData=spider.grapProvince() + saveDb.InsertIntoProvince(proData) + saveDb.InsertToCity(cityData) + foreignData = spider.grapForeign() + saveDb.InsertForeignCountry(foreignData) + InsideData=spider.grapSummaryChina() + saveDb.InsertSummaryChina(InsideData) OutsideData=spider.grapSummaryForeign() - savedb.InsertSummaryForeign(OutsideData) + saveDb.InsertSummaryForeign(OutsideData) + + + +def main(): + updateDB() if __name__ == '__main__': main() \ No newline at end of file diff --git a/数据存储/__pycache__/DataFrame.cpython-36.pyc b/数据存储/__pycache__/DataFrame.cpython-36.pyc index b81aae0..db69851 100644 Binary files a/数据存储/__pycache__/DataFrame.cpython-36.pyc and b/数据存储/__pycache__/DataFrame.cpython-36.pyc differ diff --git a/数据存储/__pycache__/GrapData.cpython-36.pyc b/数据存储/__pycache__/GrapData.cpython-36.pyc index 14c5f23..58fad85 100644 Binary files a/数据存储/__pycache__/GrapData.cpython-36.pyc and b/数据存储/__pycache__/GrapData.cpython-36.pyc differ diff --git a/数据存储/xinguan.sql b/数据存储/xinguan.sql index 1bc349e..e29a84d 100644 --- a/数据存储/xinguan.sql +++ b/数据存储/xinguan.sql @@ -10,7 +10,7 @@ create table ForeignCountry( country varchar(30) unique, curConfirm int, confirmedRelative int, - pub_data DATETIME not null + pub_date DATETIME not null ); create table InsideChina( @@ -56,7 +56,6 @@ create table CityTable( confirmedRelative int, curConfirm int, pub_date DATETIME, - foreign key(provinceName) references ProvinceTable(area) ); create table ProvinceTable( @@ -70,8 +69,9 @@ create table ProvinceTable( asymptomatic int, asymptomaticRelative int, curConfirm int, + curConfirmRelative int, icuDisable int, - area varchar(10) unique, + area varchar(10), pub_date DATETIME not null );