From df7a2232faf33e2da0acb5e3db98bf1f73345d6f Mon Sep 17 00:00:00 2001 From: yangxudongll <1593744164@qq.com> Date: Mon, 3 Aug 2020 14:52:47 +0800 Subject: [PATCH] =?UTF-8?q?=E5=AE=8C=E6=88=90=E5=9B=BD=E5=A4=96=E7=96=AB?= =?UTF-8?q?=E6=83=85=E4=BF=A1=E6=81=AF=E7=9A=84=E8=8E=B7=E5=8F=96?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- 数据采集/GrapSpider.py | 49 +++++++++++++++++- .../__pycache__/GrapSpider.cpython-36.pyc | Bin 4640 -> 6099 bytes 数据采集/grapdata.py | 5 +- 3 files changed, 48 insertions(+), 6 deletions(-) diff --git a/数据采集/GrapSpider.py b/数据采集/GrapSpider.py index 3a9d0e1..25abf14 100644 --- a/数据采集/GrapSpider.py +++ b/数据采集/GrapSpider.py @@ -14,6 +14,10 @@ class Spider(): self.news=[] #网站消息更新时间 self.modifytime='' + #国外疫情信息 + self.foreignCountry=[] + #国内疫情信息 + self.provinces=[] # 获取实时新闻 @@ -66,7 +70,7 @@ class Spider(): timestr = time.strftime("%Y-%m-%d %H:%M:%S", localt) self.modifytime=timestr - print(timestr) + # 获取国内疫情 def grapchina(self): @@ -111,6 +115,8 @@ class Spider(): cityinfo.deadCount=cj['deadCount'] province.cities.append(cityinfo) provinces.append(province) + + self.provinces=provinces return provinces @@ -124,7 +130,27 @@ class Spider(): #查找数据 tag=soup.find('script',attrs={'id':'getListByCountryTypeService2true'}) yiqinginfo=str(tag) - + # 使用正则表达式匹配 + results = re.findall(r'(\{"id".*?"showRank".*?\})', yiqinginfo) + all_countries = [] + for item in results: + country = ForeignCountry() + itemJson = json.loads(item) + country.provinceName = itemJson['provinceName'] + country.provinceShortName = itemJson['provinceShortName'] + country.currentConfirmedCount=int(itemJson['currentConfirmedCount']) + country.confirmedCount = int(itemJson['confirmedCount']) + country.suspectedCount = int(itemJson['suspectedCount']) + country.curedCount = int(itemJson['curedCount']) + country.deadCount = int(itemJson['deadCount']) + country.deadRate = float(itemJson['deadRate']) + country.countryShortCode = itemJson['countryShortCode'] + country.countryFullName = itemJson['countryFullName'] + country.continents = itemJson['continents'] + all_countries.append(country) + + self.foreignCountry=all_countries + return all_countries #news类用来存储实时新闻的信息 @@ -184,3 +210,22 @@ class Provinceinfo(): self.deadCount = 0 # 各个市的疫情信息,通过一个字典存储 self.cities = [] + +#国外疫情信息存储类 +class ForeignCountry: + def __init__(self): + self.provinceName = '' + self.provinceShortName = '' + self.currentConfirmedCount = 0 + self.confirmedCount = 0 + self.suspectedCount = 0 + self.curedCount = 0 + self.deadCount = 0 + self.deadRate = 0 + self.countryShortCode = '' + self.countryFullName = '' + self.continents = '' + def __str__(self): + return '%s,当前确诊:%d,累计确诊:%d,疑似:%d, 治愈: %d, 死亡: %d, 死亡率: %s,洲: %s'\ + %(self.provinceName,self.currentConfirmedCount,self.confirmedCount, + self.suspectedCount, self.curedCount, self.deadCount, self.deadRate,self.continents) \ No newline at end of file diff --git a/数据采集/__pycache__/GrapSpider.cpython-36.pyc b/数据采集/__pycache__/GrapSpider.cpython-36.pyc index ceabb0503004fb55935ba4cc5c6da433242e3ef1..2b38cfc401ac93aca65ee25a4f1be7ca86c31e65 100644 GIT binary patch delta 1909 zcmbuA&u<$=6vubgj@RB_wiEw|)3|kh$8DN6G!-PNL`qVFN^wI|6v+)(Zg$%`B(^%c zDKxn9Ara+=idse>dLvS~L93*_(1t4#$I{zMhy!g>xbPPs@!r_v7qNtdSMRg$&5U>7 zy!p=T*X>UZwH?tk`TYH^?4A}u_)XaIbAdk$W?i|}b}{s-U?_&VC>ZL3n2`-KE+n<( zn~QGZ+z;1%=pGBgNLr7mLwFDkh!7%-5D^hX3=u^%0vvUW7O&(BQ)a$!p-AJ{zL&KG z8kHB&J%Z?Gx*XmA61qnb&48qz`p|DjbRaqrT>yujmSZHt@^YLUW!L2|bOl=7U?X5w zFH1|4(zhkgsPOh3wk&m)?O;hZl$fwUO1^A8kJM_^z$2|1g?OY_qcD&B)hJ>|;VFS? z)ChhsCcyJz;MH@l3A_gG?I}sq%_ZLgA;RT%CR&8|BP9v4(XaXJc%}I!xvWkIcH9UV z;gG;YcW0SukUiBA0YH*aKiuYPm z^~6I|Q5NfoS5%3c$Pml?(#hskZ4qUnGgCRP9E88pVZ=$cto+%RM)x#g4B$xC-0W(E{!L#&{^)x>wyuGM3|XszU% z&-Fj{@iBbM)*2Z1ST@aOS6Gif5#I6MaeVn%e0jqVxOc!T+=yP{)#woG(+;y$ze)z! zPyUGm8_QaIMREHlebk2sME7zLRK;e(97xLlnnt+PFI>@2`It2D4SxgJ?otk#ytCZ zo=-3lJXHnI*1xa$==M#-mIIM89-I6i>;?z(nijzD4p~pTz}#SUU>-2(f;WO$i|M&@ zFuud;dG-T)_VJDNrMu7XeKOc#^sGO)_x$eCmiWtCPghs5C$YBjXl?PM!36ied-U|- z(qLkEGg$xZI`mmRYY$fNp7d6Vv8&MF-Ic98inBjK&qBkmU>1IC1R@fZ%tyA*wl|yQ z%1kHg=nh@tMycrXR0`am!T!;uO0fkNhMzYH@cA+3xXxSsm9H58cDb-aLZGlIs)D9K c=u!4p(BFvPJ-qotpsg(Zb^SHY` zs%by9p=U(&84>T!%j)gunp)DFWdH5cuv-37jqHT4XT(NXAOb`I3&;U^AOzTe1lQ;Q zP!RaWRoZya?X7mYy;~bJhVwFTQoxVx((E+6V$`{JcV)HX$J^b_t-$pRyz0O~fs>{L z^eRvTjsnL7%X~vGDxV+fWmV^0eYd3K6w(&)!h$tTT2hOobKfrVSu-0!y&69;Tgelm z&;?+IrwXIIXMQhU#OEboPTQ81{o^OcTYh1w3l&%cIiK7qpsUkWdls|IQs}EK& gSU~JG#Q=