From c23a6852e13a62eac2964793ba8607d8475fe34e Mon Sep 17 00:00:00 2001 From: yangxudongll <1593744164@qq.com> Date: Thu, 6 Aug 2020 08:59:52 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BF=AE=E6=94=B9=E4=BA=86=E7=9C=81=E4=BB=BD?= =?UTF-8?q?=E8=A1=A8=E5=92=8C=E5=9F=8E=E5=B8=82=E8=A1=A8=E7=9A=84=E7=BB=93?= =?UTF-8?q?=E6=9E=84=EF=BC=8C=E8=A7=A3=E5=86=B3=E9=87=8D=E5=A4=8D=E7=9C=81?= =?UTF-8?q?=E4=BB=BD=E4=B8=8D=E8=83=BD=E6=8F=92=E5=85=A5=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- 数据存储/.idea/workspace.xml | 21 ++++++++------- 数据存储/GrapData.py | 23 +++-------------- 数据存储/Grapmain.py | 24 ++++++++++++------ .../__pycache__/DataFrame.cpython-36.pyc | Bin 8686 -> 8686 bytes .../__pycache__/GrapData.cpython-36.pyc | Bin 5754 -> 5653 bytes 数据存储/xinguan.sql | 6 ++--- 6 files changed, 35 insertions(+), 39 deletions(-) diff --git a/数据存储/.idea/workspace.xml b/数据存储/.idea/workspace.xml index 9e3a45f..f59b7ec 100644 --- a/数据存储/.idea/workspace.xml +++ b/数据存储/.idea/workspace.xml @@ -3,7 +3,6 @@ - @@ -80,22 +79,26 @@ - + - - + + - - + + - - + + - + + + + + diff --git a/数据存储/GrapData.py b/数据存储/GrapData.py index 63eda0b..7116a1a 100644 --- a/数据存储/GrapData.py +++ b/数据存储/GrapData.py @@ -66,22 +66,10 @@ class Spider(): citysinfo.city=cj['city'] citysinfo.confirmed=int(cj['confirmed']) #需要对缺失数据进行处理 - if cj['died']=='': - citysinfo.died=0 - else: - citysinfo.died=int(cj['died']) - if cj['crued']=='': - citysinfo.cured=0 - else: - citysinfo.cured=int(cj['crued']) - citysinfo.confirmedRelative=int(cj['confirmedRelative']) - try: - if cj['curConfirm']=='': - citysinfo.curConfirm=0 - else: - citysinfo.curConfirm=int(cj['curConfirm']) - except: - citysinfo.curConfirm = 0 + citysinfo.died=int(self.dealData(cj,'died')) + citysinfo.cured=int(self.dealData(cj,'crued')) + citysinfo.confirmedRelative=int(self.dealData(cj,'confirmedRelative')) + citysinfo.curConfirm=int(self.dealData(cj,'curConfirm')) province.subList.append(citysinfo) ctuple=(citysinfo.city,province.area,citysinfo.confirmed,citysinfo.died,citysinfo.cured,citysinfo.confirmed, citysinfo.curConfirm,province.pub_date) @@ -122,8 +110,6 @@ class Spider(): foreignTuples.append(ftuples) return foreignTuples - - #抓取国外疫情信息概况 def grapSummaryForeign(self): data=requests.get(self.url,headers=self.headers) @@ -152,7 +138,6 @@ class Spider(): Outside.cured,Outside.curedRelative,Outside.died,Outside.diedRelative, Outside.updatedTime) - #抓取国内疫情信息概况 def grapSummaryChina(self): data=requests.get(self.url,headers=self.headers) diff --git a/数据存储/Grapmain.py b/数据存储/Grapmain.py index f2029dc..7f819e9 100644 --- a/数据存储/Grapmain.py +++ b/数据存储/Grapmain.py @@ -1,16 +1,24 @@ from GrapData import Spider,SaveToDB -def main(): +#抓取所有数据填入数据库 +def updateDB(): spider=Spider() + saveDb=SaveToDB() spider.url='https://voice.baidu.com/act/newpneumonia/newpneumonia/' - #spider.grapProvince() - savedb=SaveToDB() - #foreignData=spider.grapForeign() - #国内疫情概述 - #InsideData=spider.grapSummaryChina() - #国外疫情汇总 + proData,cityData=spider.grapProvince() + saveDb.InsertIntoProvince(proData) + saveDb.InsertToCity(cityData) + foreignData = spider.grapForeign() + saveDb.InsertForeignCountry(foreignData) + InsideData=spider.grapSummaryChina() + saveDb.InsertSummaryChina(InsideData) OutsideData=spider.grapSummaryForeign() - savedb.InsertSummaryForeign(OutsideData) + saveDb.InsertSummaryForeign(OutsideData) + + + +def main(): + updateDB() if __name__ == '__main__': main() \ No newline at end of file diff --git a/数据存储/__pycache__/DataFrame.cpython-36.pyc b/数据存储/__pycache__/DataFrame.cpython-36.pyc index b81aae0c07d1a491975ea6c05d65eca0482f4450..db698518d99a1dfdd055c7f81800edaf60c8100f 100644 GIT binary patch delta 72 zcmV-O0Js0{L+(QfGYt(2p;jti&9MnB7Xgp6Hx~&40im-&84dygv9nMcj{*V2v)~=% e0Rh#M;2jl{+8_S`=abqXodp&M5(p5Jd?9jxQW+}% delta 72 zcmV-O0Js0{L+(QfGYt(2XH_a+Sg{E$7Xgm5Hx~&40iUx$84dyguCq`Zj{*U~v)~=% e0RhsJ;2jl{+8_S`Lt#lcLk(vQR~l0=gQmjddt5S`C$M@mZhpX~&cv8CnS;|; zn2S+>k&BTHi20a=7{PFJ6{kHjW60)RJWR}tMVt5X%djx^O}-_3gK_KR%_4^x_fIYt Qoy6+GA;%#znNKVS0B^ZK{Qv*} delta 374 zcmbQL^Gk=zn3tF9TBufhjq*mWSSBt31_lOaATItlxsWN3x2uFZg}a&2g<%3yY|Z93 zOhJr16BsLGY6O5HjIlzK-B_&ZYlKn+BpFiVvzaC^6{)3g)$qrQN zf?@Ir7FWqj;S42eDGJSuj0}Y(;S4pLHC$;-!3>&;lPkGoHaoF;GqM-40=-&gJ$WLV z<>nJ?Zx|VCCU52NRpnz8VB}(C17bcVK1L28hLKEQmJm>mXR|1$Ju_p%=1?9cX2yoi t5&SYNY;!>&XE!;PUv=_);dP8hCi{sTW;{1pNOTfw0EYsH?BtcAIRL2-Rs#S4 diff --git a/数据存储/xinguan.sql b/数据存储/xinguan.sql index 1bc349e..e29a84d 100644 --- a/数据存储/xinguan.sql +++ b/数据存储/xinguan.sql @@ -10,7 +10,7 @@ create table ForeignCountry( country varchar(30) unique, curConfirm int, confirmedRelative int, - pub_data DATETIME not null + pub_date DATETIME not null ); create table InsideChina( @@ -56,7 +56,6 @@ create table CityTable( confirmedRelative int, curConfirm int, pub_date DATETIME, - foreign key(provinceName) references ProvinceTable(area) ); create table ProvinceTable( @@ -70,8 +69,9 @@ create table ProvinceTable( asymptomatic int, asymptomaticRelative int, curConfirm int, + curConfirmRelative int, icuDisable int, - area varchar(10) unique, + area varchar(10), pub_date DATETIME not null );