parent
5f42c114ab
commit
262e968fb7
@ -0,0 +1,6 @@
|
||||
<component name="InspectionProjectProfileManager">
|
||||
<settings>
|
||||
<option name="USE_PROJECT_PROFILE" value="false" />
|
||||
<version value="1.0" />
|
||||
</settings>
|
||||
</component>
|
@ -0,0 +1,4 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.6" project-jdk-type="Python SDK" />
|
||||
</project>
|
@ -0,0 +1,8 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="ProjectModuleManager">
|
||||
<modules>
|
||||
<module fileurl="file://$PROJECT_DIR$/.idea/数据存储.iml" filepath="$PROJECT_DIR$/.idea/数据存储.iml" />
|
||||
</modules>
|
||||
</component>
|
||||
</project>
|
@ -0,0 +1,6 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="VcsDirectoryMappings">
|
||||
<mapping directory="$PROJECT_DIR$/.." vcs="Git" />
|
||||
</component>
|
||||
</project>
|
@ -0,0 +1,102 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="ChangeListManager">
|
||||
<list default="true" id="fcea9d71-f4b2-41ab-b680-ecae8402b966" name="Default Changelist" comment="">
|
||||
<change beforePath="$PROJECT_DIR$/xinguan.sql" beforeDir="false" afterPath="$PROJECT_DIR$/xinguan.sql" afterDir="false" />
|
||||
<change beforePath="$PROJECT_DIR$/../数据采集/百度数据/.idea/workspace.xml" beforeDir="false" afterPath="$PROJECT_DIR$/../数据采集/百度数据/.idea/workspace.xml" afterDir="false" />
|
||||
</list>
|
||||
<option name="SHOW_DIALOG" value="false" />
|
||||
<option name="HIGHLIGHT_CONFLICTS" value="true" />
|
||||
<option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" />
|
||||
<option name="LAST_RESOLUTION" value="IGNORE" />
|
||||
</component>
|
||||
<component name="Git.Settings">
|
||||
<option name="RECENT_GIT_ROOT_PATH" value="$PROJECT_DIR$/.." />
|
||||
</component>
|
||||
<component name="ProjectId" id="1feziIuC7mkUIgggUnBaTTaY5uL" />
|
||||
<component name="ProjectLevelVcsManager" settingsEditedManually="true" />
|
||||
<component name="ProjectViewState">
|
||||
<option name="hideEmptyMiddlePackages" value="true" />
|
||||
<option name="showExcludedFiles" value="true" />
|
||||
<option name="showLibraryContents" value="true" />
|
||||
</component>
|
||||
<component name="PropertiesComponent">
|
||||
<property name="RunOnceActivity.ShowReadmeOnStart" value="true" />
|
||||
<property name="last_opened_file_path" value="$PROJECT_DIR$" />
|
||||
</component>
|
||||
<component name="RunManager">
|
||||
<configuration name="Grapmain" type="PythonConfigurationType" factoryName="Python" temporary="true" nameIsGenerated="true">
|
||||
<module name="数据存储" />
|
||||
<option name="INTERPRETER_OPTIONS" value="" />
|
||||
<option name="PARENT_ENVS" value="true" />
|
||||
<envs>
|
||||
<env name="PYTHONUNBUFFERED" value="1" />
|
||||
</envs>
|
||||
<option name="SDK_HOME" value="" />
|
||||
<option name="WORKING_DIRECTORY" value="$PROJECT_DIR$" />
|
||||
<option name="IS_MODULE_SDK" value="true" />
|
||||
<option name="ADD_CONTENT_ROOTS" value="true" />
|
||||
<option name="ADD_SOURCE_ROOTS" value="true" />
|
||||
<option name="SCRIPT_NAME" value="$PROJECT_DIR$/Grapmain.py" />
|
||||
<option name="PARAMETERS" value="" />
|
||||
<option name="SHOW_COMMAND_LINE" value="false" />
|
||||
<option name="EMULATE_TERMINAL" value="false" />
|
||||
<option name="MODULE_MODE" value="false" />
|
||||
<option name="REDIRECT_INPUT" value="false" />
|
||||
<option name="INPUT_FILE" value="" />
|
||||
<method v="2" />
|
||||
</configuration>
|
||||
<recent_temporary>
|
||||
<list>
|
||||
<item itemvalue="Python.Grapmain" />
|
||||
</list>
|
||||
</recent_temporary>
|
||||
</component>
|
||||
<component name="ServiceViewManager">
|
||||
<option name="viewStates">
|
||||
<list>
|
||||
<serviceView>
|
||||
<treeState>
|
||||
<expand />
|
||||
<select />
|
||||
</treeState>
|
||||
</serviceView>
|
||||
</list>
|
||||
</option>
|
||||
</component>
|
||||
<component name="SvnConfiguration">
|
||||
<configuration />
|
||||
</component>
|
||||
<component name="TaskManager">
|
||||
<task active="true" id="Default" summary="Default task">
|
||||
<changelist id="fcea9d71-f4b2-41ab-b680-ecae8402b966" name="Default Changelist" comment="" />
|
||||
<created>1596593471956</created>
|
||||
<option name="number" value="Default" />
|
||||
<option name="presentableId" value="Default" />
|
||||
<updated>1596593471956</updated>
|
||||
</task>
|
||||
<servers />
|
||||
</component>
|
||||
<component name="WindowStateProjectService">
|
||||
<state width="1814" height="387" key="GridCell.Tab.0.bottom" timestamp="1596607121816">
|
||||
<screen x="67" y="27" width="1853" height="1053" />
|
||||
</state>
|
||||
<state width="1814" height="387" key="GridCell.Tab.0.bottom/67.27.1853.1053@67.27.1853.1053" timestamp="1596607121816" />
|
||||
<state width="1814" height="387" key="GridCell.Tab.0.center" timestamp="1596607121816">
|
||||
<screen x="67" y="27" width="1853" height="1053" />
|
||||
</state>
|
||||
<state width="1814" height="387" key="GridCell.Tab.0.center/67.27.1853.1053@67.27.1853.1053" timestamp="1596607121816" />
|
||||
<state width="1814" height="387" key="GridCell.Tab.0.left" timestamp="1596607121816">
|
||||
<screen x="67" y="27" width="1853" height="1053" />
|
||||
</state>
|
||||
<state width="1814" height="387" key="GridCell.Tab.0.left/67.27.1853.1053@67.27.1853.1053" timestamp="1596607121816" />
|
||||
<state width="1814" height="387" key="GridCell.Tab.0.right" timestamp="1596607121816">
|
||||
<screen x="67" y="27" width="1853" height="1053" />
|
||||
</state>
|
||||
<state width="1814" height="387" key="GridCell.Tab.0.right/67.27.1853.1053@67.27.1853.1053" timestamp="1596607121816" />
|
||||
<state x="1111" y="25" key="search.everywhere.popup" timestamp="1596606706048">
|
||||
<screen x="67" y="27" width="1853" height="1053" />
|
||||
</state>
|
||||
<state x="1111" y="25" key="search.everywhere.popup/67.27.1853.1053@67.27.1853.1053" timestamp="1596606706045" />
|
||||
</component>
|
||||
</project>
|
@ -0,0 +1,11 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<module type="PYTHON_MODULE" version="4">
|
||||
<component name="NewModuleRootManager">
|
||||
<content url="file://$MODULE_DIR$" />
|
||||
<orderEntry type="inheritedJdk" />
|
||||
<orderEntry type="sourceFolder" forTests="false" />
|
||||
</component>
|
||||
<component name="TestRunnerService">
|
||||
<option name="PROJECT_TEST_RUNNER" value="Twisted Trial" />
|
||||
</component>
|
||||
</module>
|
@ -0,0 +1,202 @@
|
||||
import time
|
||||
|
||||
import requests
|
||||
import re
|
||||
import json
|
||||
from bs4 import BeautifulSoup
|
||||
from DataFrame import *
|
||||
|
||||
class Spider():
|
||||
def __init__(self):
|
||||
self.url=''
|
||||
self.headers={"user-agent":"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36"}
|
||||
self.updateTime=''
|
||||
|
||||
#抓取国内各省市的信息
|
||||
def grapProvince(self):
|
||||
data=requests.get(self.url,headers=self.headers)
|
||||
data.encoding='utf-8'
|
||||
html=data.text
|
||||
#构建soup对象
|
||||
soup=BeautifulSoup(html,'html.parser')
|
||||
#查找目标标签
|
||||
tag=soup.find('script', attrs={'id':'captain-config'})
|
||||
tagstr=tag.string
|
||||
#提取出各省疫情信息
|
||||
caselist=re.findall(r'"caseList":\[(\{.*?\})\],"dataSource"',tagstr)
|
||||
#转换成json对象
|
||||
provincesinfo=re.findall(r'({"confirmed".*?\]})',caselist[0])
|
||||
#保存所有省疫情信息
|
||||
provinces=[]
|
||||
provincesTupes=[]
|
||||
citiesTuples=[]
|
||||
for i in range(34):
|
||||
pro=json.loads(provincesinfo[i])
|
||||
province=Porvince()
|
||||
province.area=pro['area']
|
||||
province.confirmedRelative=int(pro['confirmedRelative'])
|
||||
province.confirmed=int(pro['confirmed'])
|
||||
province.crued=int(pro['crued'])
|
||||
province.died=int(pro['died'])
|
||||
province.curConfirmRelative=int(pro['curConfirmRelative'])
|
||||
province.curConfirm=int(pro['curConfirm'])
|
||||
province.diedRelative=int(pro['diedRelative'])
|
||||
province.curedRelative=int(pro['curedRelative'])
|
||||
province.asymptomatic=int(self.dealData(pro,'asymptomatic'))
|
||||
province.asymptomaticRelative=int(self.dealData(pro,'asymptomaticRelative'))
|
||||
province.icuDisable=int(pro['icuDisable'])
|
||||
province.relativeTime=self.changeTime(pro['relativeTime'])
|
||||
province.pub_date=time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(time.time()))
|
||||
|
||||
#将上面的信息组成一个元组,方便插入数据库
|
||||
ptuple=(province.confirmed,province.confirmedRelative,province.died,province.diedRelative,province.crued,
|
||||
province.curedRelative,province.asymptomatic,province.asymptomaticRelative,province.curConfirm,
|
||||
province.curConfirmRelative,province.icuDisable,province.area,province.pub_date)
|
||||
provincesTupes.append(ptuple)
|
||||
|
||||
province.subList=[]
|
||||
#处理城市信息,并加入到该省的信息中
|
||||
citysinfo=pro['subList']
|
||||
cities=re.findall(r'(\{.*?\})',str(citysinfo))
|
||||
print(province)
|
||||
for city in cities:
|
||||
city=str(city).replace('\'',"\"")
|
||||
cj=json.loads(city)
|
||||
citysinfo=CityInfo()
|
||||
citysinfo.city=cj['city']
|
||||
citysinfo.confirmed=int(cj['confirmed'])
|
||||
#需要对缺失数据进行处理
|
||||
if cj['died']=='':
|
||||
citysinfo.died=0
|
||||
else:
|
||||
citysinfo.died=int(cj['died'])
|
||||
if cj['crued']=='':
|
||||
citysinfo.cured=0
|
||||
else:
|
||||
citysinfo.cured=int(cj['crued'])
|
||||
citysinfo.confirmedRelative=int(cj['confirmedRelative'])
|
||||
try:
|
||||
if cj['curConfirm']=='':
|
||||
citysinfo.curConfirm=0
|
||||
else:
|
||||
citysinfo.curConfirm=int(cj['curConfirm'])
|
||||
except:
|
||||
citysinfo.curConfirm = 0
|
||||
province.subList.append(citysinfo)
|
||||
ctuple=(citysinfo.city,province.area,citysinfo.confirmed,citysinfo.died,citysinfo.cured,citysinfo.confirmed,
|
||||
citysinfo.curConfirm,province.pub_date)
|
||||
citiesTuples.append(ctuple)
|
||||
|
||||
print(citysinfo)
|
||||
print('-----------------------------------------------------------')
|
||||
return (provincesTupes,citiesTuples)
|
||||
#j=json.loads(caselist[0])
|
||||
|
||||
#抓取国外疫情信息
|
||||
def grapForeign(self):
|
||||
data=requests.get(self.url,headers=self.headers)
|
||||
data.encoding='utf-8'
|
||||
html=data.text
|
||||
#构建soup对象
|
||||
soup=BeautifulSoup(html,'html.parser')
|
||||
#查找目标标签
|
||||
tag=soup.find('script', attrs={'id':'captain-config'})
|
||||
tagstr=tag.string
|
||||
globallist=re.findall(r'"globalList":\[(.*?)\],"allForeignTrend"',tagstr)
|
||||
countries=re.findall(r'({"died":.*?"country".*?\})',str(globallist[0]))
|
||||
foreigns=[]
|
||||
foreignTuples=[]
|
||||
for country in countries:
|
||||
coun=json.loads(country)
|
||||
foreign=ForeignCountry()
|
||||
foreign.died=int(coun['died'])
|
||||
foreign.confirmed=int(coun['confirmed'])
|
||||
foreign.crued=int(coun['crued'])
|
||||
foreign.country=coun['country']
|
||||
foreign.curConfirm=int(coun['curConfirm'])
|
||||
foreign.confirmedRelative=int(coun['confirmedRelative'])
|
||||
foreign.pub_date=time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(time.time()))
|
||||
foreigns.append(foreign)
|
||||
ftuples=(foreign.confirmed,foreign.died,foreign.crued,foreign.country,
|
||||
foreign.curConfirm,foreign.confirmedRelative,foreign.pub_date)
|
||||
foreignTuples.append(ftuples)
|
||||
return foreignTuples
|
||||
|
||||
|
||||
|
||||
#抓取国外疫情信息概况
|
||||
def grapSummaryForeign(self):
|
||||
data=requests.get(self.url,headers=self.headers)
|
||||
data.encoding='utf-8'
|
||||
html=data.text
|
||||
#构建soup对象
|
||||
soup=BeautifulSoup(html,'html.parser')
|
||||
#查找目标标签
|
||||
tag=soup.find('script', attrs={'id':'captain-config'})
|
||||
tagstr=tag.string
|
||||
#国外疫情信息
|
||||
summaryDataIn=re.findall(r'"summaryDataOut":(\{.*?\})',tagstr)
|
||||
#转换为json
|
||||
DataOut=json.loads(summaryDataIn[0])
|
||||
Outside=OutsideChina()
|
||||
Outside.confirmed=int(DataOut['confirmed'])
|
||||
Outside.curConfirm=int(DataOut['curConfirm'])
|
||||
Outside.confirmedRelative=int(DataOut['confirmedRelative'])
|
||||
Outside.cured=int(DataOut['cured'])
|
||||
Outside.curedRelative=int(DataOut['curedRelative'])
|
||||
Outside.died=int(DataOut['died'])
|
||||
Outside.diedRelative=int(DataOut['diedRelative'])
|
||||
Outside.updatedTime=time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(time.time()))
|
||||
print(Outside)
|
||||
|
||||
|
||||
#抓取国内疫情信息概况
|
||||
def grapSummaryChina(self):
|
||||
data=requests.get(self.url,headers=self.headers)
|
||||
data.encoding='utf-8'
|
||||
html=data.text
|
||||
#构建soup对象
|
||||
soup=BeautifulSoup(html,'html.parser')
|
||||
#查找目标标签
|
||||
tag=soup.find('script', attrs={'id':'captain-config'})
|
||||
tagstr=tag.string
|
||||
#国内疫情信息
|
||||
summaryDataIn=re.findall(r'"summaryDataIn":(\{.*?\})',tagstr)
|
||||
#转换为json
|
||||
DataIn=json.loads(summaryDataIn[0])
|
||||
Inside=InsideChina()
|
||||
Inside.curConfirm=int(DataIn['confirmed'])
|
||||
Inside.died=int(DataIn['died'])
|
||||
Inside.cured=int(DataIn['cured'])
|
||||
Inside.asymptomatic=int(DataIn['asymptomatic'])
|
||||
Inside.asymptomaticRelative=int(DataIn['asymptomaticRelative'])
|
||||
Inside.unconfirmed=int(DataIn['unconfirmed'])
|
||||
Inside.unconfirmedRelative=int(DataIn['unconfirmedRelative'])
|
||||
Inside.confirmed=int(DataIn['confirmed'])
|
||||
Inside.confirmedRelative=int(DataIn['confirmedRelative'])
|
||||
Inside.curedRelative=int(DataIn['curedRelative'])
|
||||
Inside.diedRelative=int(DataIn['diedRelative'])
|
||||
Inside.icu=int(DataIn['icu'])
|
||||
Inside.icuRelative=int(DataIn['icuRelative'])
|
||||
Inside.overseasInput=int(DataIn['overseasInput'])
|
||||
Inside.overseasInputRelative=int(DataIn['overseasInputRelative'])
|
||||
Inside.curConfirmRelative=int(DataIn['curConfirmRelative'])
|
||||
Inside.updatedTime=time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(time.time()))
|
||||
print(Inside)
|
||||
|
||||
|
||||
def dealData(self,a,b):
|
||||
try:
|
||||
if a[b]=='':
|
||||
return 0
|
||||
else:
|
||||
return a[b]
|
||||
except:
|
||||
return 0
|
||||
|
||||
#转换时间格式
|
||||
def changeTime(self,t):
|
||||
localt=time.localtime(float(t)/1000)
|
||||
timestr = time.strftime("%Y-%m-%d %H:%M:%S", localt)
|
||||
return timestr
|
||||
|
@ -0,0 +1,12 @@
|
||||
from GrapData import Spider,SaveToDB
|
||||
|
||||
def main():
|
||||
spider=Spider()
|
||||
spider.url='https://voice.baidu.com/act/newpneumonia/newpneumonia/'
|
||||
spider.grapProvince()
|
||||
savedb=SaveToDB()
|
||||
foreignData=spider.grapForeign()
|
||||
savedb.InsertForeignCountry(foreignData)
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
Binary file not shown.
Binary file not shown.
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
Loading…
Reference in new issue