|
|
|
@ -70,7 +70,6 @@ class Spider():
|
|
|
|
|
|
|
|
|
|
# 获取国内疫情
|
|
|
|
|
def grapchina(self):
|
|
|
|
|
|
|
|
|
|
data=requests.get(self.url,headers=self.headers)
|
|
|
|
|
data.encoding='utf-8'
|
|
|
|
|
html = data.content.decode('utf-8')
|
|
|
|
@ -80,38 +79,53 @@ class Spider():
|
|
|
|
|
tag=soup.find('script',attrs={'id':'getAreaStat'})
|
|
|
|
|
yiqinginfo=str(tag)
|
|
|
|
|
|
|
|
|
|
provinceNames=re.findall(r'"provinceName":"(.*?)",',yiqinginfo)
|
|
|
|
|
print(provinceNames)
|
|
|
|
|
print(len(provinceNames))
|
|
|
|
|
provinceShortName=re.findall(r'"provinceShortName":"(.*?)",',yiqinginfo)
|
|
|
|
|
print(provinceShortName)
|
|
|
|
|
currentConfirmedCounts=re.findall(r'"currentConfirmedCount":(.*?),"',yiqinginfo)
|
|
|
|
|
print(currentConfirmedCounts)
|
|
|
|
|
print(len(currentConfirmedCounts))
|
|
|
|
|
confirmedCounts=re.findall(r'"confirmedCount":(.*?),',yiqinginfo)
|
|
|
|
|
print(confirmedCounts)
|
|
|
|
|
suspectedCounts=re.findall(r'"suspectedCount":(.*?),',yiqinginfo)
|
|
|
|
|
print(suspectedCounts)
|
|
|
|
|
curedCounts=re.findall(r'"curedCount":(.*?),',yiqinginfo)
|
|
|
|
|
print(curedCounts)
|
|
|
|
|
deadCounts=re.findall(r'"deadCount":(.*?),',yiqinginfo)
|
|
|
|
|
print(deadCounts)
|
|
|
|
|
comments=re.findall(r'"comment":(.*?),',yiqinginfo)
|
|
|
|
|
print(comments)
|
|
|
|
|
locationIds=re.findall(r'"locationId":(.*?),',yiqinginfo)
|
|
|
|
|
print(locationIds)
|
|
|
|
|
statisticsDatas=re.findall(r'"statisticsData":"(.*?)",',yiqinginfo)
|
|
|
|
|
print(statisticsDatas)
|
|
|
|
|
cities=re.findall(r'"cities":(.*?)}',yiqinginfo)
|
|
|
|
|
print(cities)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
print(tag)
|
|
|
|
|
#各省信息
|
|
|
|
|
provinceinfo=re.findall(r'({"provinceName":.*?),{"provinceName"',yiqinginfo)
|
|
|
|
|
#遍历所有省的疫情信息,保存在一个列表中
|
|
|
|
|
provinces=[]
|
|
|
|
|
for pro in provinceinfo:
|
|
|
|
|
#转换成字典格式之后就可以直接获取其成员了
|
|
|
|
|
province=Provinceinfo()
|
|
|
|
|
j=json.loads(pro)
|
|
|
|
|
#录入省的疫情信息
|
|
|
|
|
province.provinceName=j['provinceName']
|
|
|
|
|
province.provinceShortName=j['provinceShortName']
|
|
|
|
|
province.currentConfirmedCount=j['currentConfirmedCount']
|
|
|
|
|
province.confirmedCount=j['confirmedCount']
|
|
|
|
|
province.suspectedCount=j['suspectedCount']
|
|
|
|
|
province.deadCount=j['deadCount']
|
|
|
|
|
province.curedCount=j['curedCount']
|
|
|
|
|
|
|
|
|
|
cities=re.findall(r'({.*?})',str(j['cities']))
|
|
|
|
|
for city in cities:
|
|
|
|
|
#json里面引号必须是“,否则报错
|
|
|
|
|
city=str(city).replace('\'','\"')
|
|
|
|
|
#转换成字典格式就很容易获取各个字段信息了
|
|
|
|
|
cj=json.loads(city)
|
|
|
|
|
cityinfo=Cityinfo()
|
|
|
|
|
cityinfo.cityName=cj['cityName']
|
|
|
|
|
cityinfo.currentConfirmedCount=cj['currentConfirmedCount']
|
|
|
|
|
cityinfo.confirmedCount=cj['confirmedCount']
|
|
|
|
|
cityinfo.curedCount=cj['curedCount']
|
|
|
|
|
cityinfo.suspectedCount=cj['suspectedCount']
|
|
|
|
|
cityinfo.deadCount=cj['deadCount']
|
|
|
|
|
province.cities.append(cityinfo)
|
|
|
|
|
provinces.append(province)
|
|
|
|
|
return provinces
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# 获取国外疫情
|
|
|
|
|
def grapforign(self):
|
|
|
|
|
pass
|
|
|
|
|
data=requests.get(self.url,headers=self.headers)
|
|
|
|
|
data.encoding='utf-8'
|
|
|
|
|
html = data.content.decode('utf-8')
|
|
|
|
|
#构建soup对象
|
|
|
|
|
soup=BeautifulSoup(html,'html.parser')
|
|
|
|
|
#查找数据
|
|
|
|
|
tag=soup.find('script',attrs={'id':'getListByCountryTypeService2true'})
|
|
|
|
|
yiqinginfo=str(tag)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#news类用来存储实时新闻的信息
|
|
|
|
|
class news():
|
|
|
|
@ -124,3 +138,49 @@ class news():
|
|
|
|
|
self.summay=summary
|
|
|
|
|
self.sourceUrl=sourceUrl
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#存储各市的疫情信息
|
|
|
|
|
class Cityinfo():
|
|
|
|
|
def __int__(self):
|
|
|
|
|
#城市名
|
|
|
|
|
self.cityName=''
|
|
|
|
|
#当前确诊
|
|
|
|
|
self.currentConfirmedCount=0
|
|
|
|
|
#累计确诊
|
|
|
|
|
self.confirmedCount=0
|
|
|
|
|
#疑似
|
|
|
|
|
self.suspectedCount=0
|
|
|
|
|
#治愈
|
|
|
|
|
self.curedCount=0
|
|
|
|
|
#死亡人数
|
|
|
|
|
self.deadCount=0
|
|
|
|
|
def __str__(self):
|
|
|
|
|
return self.cityName+" 当前确诊:"+str(self.currentConfirmedCount)+\
|
|
|
|
|
" 累计确诊:"+str(self.confirmedCount)+"疑似:"+str(self.suspectedCount)+" 治愈:"+str(self.curedCount)+\
|
|
|
|
|
" 死亡人数:"+str(self.deadCount)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#存储省的疫情信息
|
|
|
|
|
class Provinceinfo():
|
|
|
|
|
def __init__(self,provinceName,provinceShortName,currentConfirmedCount,confirmedCount,
|
|
|
|
|
suspectedCount,curedCount,deadCount,cities):
|
|
|
|
|
self.provinceName=provinceName
|
|
|
|
|
self.provinceShortName=provinceShortName
|
|
|
|
|
self.currentConfirmedCount=currentConfirmedCount
|
|
|
|
|
self.confirmedCount=confirmedCount
|
|
|
|
|
self.suspectedCount=suspectedCount
|
|
|
|
|
self.curedCount=curedCount
|
|
|
|
|
self.deadCount=deadCount
|
|
|
|
|
#各个市的疫情信息,通过一个字典存储
|
|
|
|
|
self.cities=cities
|
|
|
|
|
|
|
|
|
|
def __init__(self):
|
|
|
|
|
self.provinceName = ''
|
|
|
|
|
self.provinceShortName = ''
|
|
|
|
|
self.currentConfirmedCount = 0
|
|
|
|
|
self.confirmedCount = 0
|
|
|
|
|
self.suspectedCount = 0
|
|
|
|
|
self.curedCount = 0
|
|
|
|
|
self.deadCount = 0
|
|
|
|
|
# 各个市的疫情信息,通过一个字典存储
|
|
|
|
|
self.cities = []
|
|
|
|
|