From 16ec5ced81a2cae7269ff7ed2ef1421ae253ef0d Mon Sep 17 00:00:00 2001 From: egg23333 <806261011@qq.com> Date: Sat, 15 Aug 2020 17:12:44 +0800 Subject: [PATCH] rename files,add sql module,add automatic update module editor:chenzw --- CaseInland.py | 120 --------------------- README.txt | 0 collectData.py | 49 +++++++++ createDB.py | 74 +++++++++++++ insideData.py | 218 ++++++++++++++++++++++++++++++++++++++ Di.py => insideSummary.py | 64 ++++++----- newsData.py | 97 +++++++++++++++++ outsideData.py | 142 +++++++++++++++++++++++++ outsideSummary.py | 103 ++++++++++++++++++ printCurData.py | 15 +++ 国外概况class.py | 25 ----- 国外疫情.py | 103 ------------------ 新闻class.py | 33 ------ 爬新闻main.py | 24 ----- 爬概况main.py | 34 ------ 15 files changed, 733 insertions(+), 368 deletions(-) delete mode 100644 CaseInland.py delete mode 100644 README.txt create mode 100644 collectData.py create mode 100644 createDB.py create mode 100644 insideData.py rename Di.py => insideSummary.py (74%) create mode 100644 newsData.py create mode 100644 outsideData.py create mode 100644 outsideSummary.py create mode 100644 printCurData.py delete mode 100644 国外概况class.py delete mode 100644 国外疫情.py delete mode 100644 新闻class.py delete mode 100644 爬新闻main.py delete mode 100644 爬概况main.py diff --git a/CaseInland.py b/CaseInland.py deleted file mode 100644 index 5944ef1..0000000 --- a/CaseInland.py +++ /dev/null @@ -1,120 +0,0 @@ -import requests -import re -from bs4 import BeautifulSoup -import json -import time - -class City: - def __init__(self,tempjs={},city='无数据',confiemed='无数据',died='无数据',crued='无数据',confirmedRelative='无数据',cityCode='无数据'): - self.city=city - self.confirmed=confiemed - self.died=died - self.crued=crued - self.confirmedRelative=confirmedRelative - self.cityCode=cityCode - js={'city': '无数据', 'confirmed': '无数据', 'died': '无数据', 'crued': '无数据', 'confirmedRelative': '无数据', 'curConfirm': '无数据', 'cityCode': '无数据'} - for k,v in tempjs.items(): - if tempjs[k]!='': - js[k]=v - self.city=js["city"] - self.confirmed=js["confirmed"] - self.died=js["died"] - self.crued=js["crued"] - self.confirmedRelative=js["confirmedRelative"] - self.cityCode=js["cityCode"] - def __str__(self): - if self.city=='境外输入': - return "其它情况:{:<10}城市代码:{:<4}新增患者:{:<4}累积确诊:{:<4}累积死亡:{:<4}累积治愈:{:<4}".format(self.city,self.cityCode,self.confirmedRelative, - self.confirmed,self.died,self.crued) - return "城市名:{:<10}城市代码:{:<4}新增患者:{:<4}累积确诊:{:<4}累积死亡:{:<4}累积治愈:{:<4}".format(self.city,self.cityCode,self.confirmedRelative, - self.confirmed,self.died,self.crued) - -class Provinces: - def __init__(self): - self.number=0 - self.province=[] - def addProvince(self,province): - self.province.append(province) - def printEveryProvince(self): - for p in self.province: - print(p) - def __str__(self): - return str(self.province) - - -class Province: - def __init__(self,tempjs={},confirmed='无数据',died='无数据',crued='无数据',relativeTime='无数据',confirmedRelative='无数据', - diedRelative='无数据',curedRelative='无数据',asymptomaticRelative='无数据',asymptomatic='无数据', - curConfirm='无数据',curConfirmRelative='无数据',icuDisable='无数据',area='无数据',subList=[]): - self.died=died - self.confirmed=confirmed - self.crued=crued - self.relativeTime=relativeTime - self.confirmedRelative=confirmedRelative - self.diedRelative=diedRelative - self.curedRelative=curedRelative - self.asymptomaticRelative=asymptomaticRelative - self.asymptomatic=asymptomatic - self.curConfirm=curConfirm - self.curConfirmRelative=curConfirmRelative - self.icuDisable=icuDisable - self.area=area - self.subList=subList - js={'confirmed': '无数据', 'died': '无数据', 'crued': '无数据', 'relativeTime': '无数据', 'confirmedRelative': '无数据', - 'diedRelative': '无数据', 'curedRelative': '无数据', 'asymptomaticRelative': '无数据', 'asymptomatic': '无数据', 'curConfirm': '无数据', - 'curConfirmRelative': '无数据', 'icuDisable': '无数据', 'area': '无数据', 'subList':[]} - for k,v in tempjs.items(): - if tempjs[k]!='': - js[k]=v - #print(js) - self.died=js['died'] - self.confirmed=js['confirmed'] - self.crued=js['crued'] - self.relativeTime=js['relativeTime'] - self.confirmedRelative=js['confirmedRelative'] - self.diedRelative=js['diedRelative'] - self.curedRelative=js['curedRelative'] - self.asymptomaticRelative=js['asymptomaticRelative'] - self.asymptomatic=js['asymptomatic'] - self.curConfirm=js['curConfirm'] - self.curConfirmRelative=js['curConfirmRelative'] - self.icuDisable=js['icuDisable'] - self.area=js['area'] - self.subList=js['subList'] - cities=js["subList"] - for c in cities: - city=City(c) - subList.append(city) - def printEveryCity(self): - for c in self.subList: - city=City(c) - print(city) - def __str__(self): - localobj=time.localtime(int(self.relativeTime)) - newtime=time.strftime("%Y-%m-%d %H:%M:%S",localobj) - return "省份:{:<4}现有患者:{:<4}现有新增患者:{:<4}累积无症状感染者:{:<4}新增无症状感染者:{:<4}累积确诊:{:<6}新增确诊:{:<4}累积治愈:{:<6}新增治愈:{:<4}累积死亡:{:<6}新增死亡:{:<4}累积重症:{:<4}更新时间:{:<10}".format(self.area,self.curConfirm,self.curConfirmRelative, - self.asymptomatic,self.asymptomaticRelative,self.confirmed,self.confirmedRelative,self.crued,self.curedRelative,self.died,self.diedRelative, - self.icuDisable,newtime) - - -def start(): - res=requests.get('https://voice.baidu.com/act/newpneumonia/newpneumonia/') - res=res.text - #script type="application/json" - soup=BeautifulSoup(res,'html.parser') - tag=soup.find('script',attrs={'id':'captain-config'}) - tagstr=str(tag) - findinland=re.findall('caseList".*"caseOutsideList',tagstr)[0] - province=re.findall('\{"confirmed.*?subList":\[.*?\]\}',findinland) - provinces=Provinces() - for item in province: - js=json.loads(item) - p=Province(js) - provinces.addProvince(p) - print(p) - p.printEveryCity() - print("++++++++++++++++++++++++++++++"*4) - - #provinces.printEveryProvince() - - diff --git a/README.txt b/README.txt deleted file mode 100644 index e69de29..0000000 diff --git a/collectData.py b/collectData.py new file mode 100644 index 0000000..313a552 --- /dev/null +++ b/collectData.py @@ -0,0 +1,49 @@ +import time,threading +#password填自己mysql的密码 +import pymysql,outsideData,newsData,insideSummary,outsideSummary,insideData +con=pymysql.connect(host='localhost',passwd='123456',charset='utf8',user='root',db='cov2019_data') +cursor=con.cursor() + +#多线程方案 +# th=[] + +# thread_insideData = threading.Thread(target=insideData.mysqlStart, args=(con, )) +# th.append(thread_insideData) + +# thread_outsideData = threading.Thread(target=outsideData.mysqlStart, args=(con, )) +# th.append(thread_outsideData) + +# thread_insideSummary = threading.Thread(target=insideSummary.mysqlStart, args=(con, )) +# th.append(thread_insideSummary) + +# thread_outsideSummary = threading.Thread(target=outsideSummary.mysqlStart, args=(con, )) +# th.append(thread_outsideSummary) + +# thread_news=threading.Thread(target=newsData.mysqlStart,args=(con,)) +# th.append(thread_news) +#多线程方案启动 +# while True: +# for t in th: +# t.run() +# for t in th: +# t.join() + +#单一线程方案 +def collect(con): + while True: + newsData.mysqlStart(con) + insideData.mysqlStart(con) + outsideData.mysqlStart(con) + insideSummary.mysqlStart(con) + outsideSummary.mysqlStart(con) + print("***********************每隔一小时自动更新数据***********************") + print("****************************此次更新完毕***************************") + updatetime=time.strftime("%Y-%m-%d %H:%M:%S",time.localtime(time.time()+3600)) + print("****************下一次启动时间: %s****************"%updatetime) + time.sleep(60*60) +#单一线程方案启动 +t=threading.Thread(target=collect,args=(con,)) +t.start() +t.join() +cursor.close() +con.close() \ No newline at end of file diff --git a/createDB.py b/createDB.py new file mode 100644 index 0000000..e3ce0e2 --- /dev/null +++ b/createDB.py @@ -0,0 +1,74 @@ +#password填自己mysql的密码 +import pymysql +con=pymysql.connect(host='localhost',port=3306,passwd='123456',charset='utf8',user='root') +cursor=con.cursor() +#建库 +sql=''' + create database if not exists cov2019_data + ''' +cursor.execute(sql) +cursor.close() +con.close() +con=pymysql.connect(host='localhost',port=3306,passwd='123456',charset='utf8',user='root',db='cov2019_data') +cursor=con.cursor() +#切换数据库 +cursor.db='cov2019_data' +#建表 +sql='''create table if not exists outsideSummary + (time char(20), + confirmed int(10), + died int(10), + curConfirm int(10), + cured int(10), + confirmedRelative int(10), + curedRelative int(10), + diedRelative int(10), + curConfirmRelative int(10), + primary key(time) + )''' +cursor.execute(sql) +sql='''create table if not exists insideSummary + (InsertTime char(20) ,mapLastUpdatedTime char(20),confirmed int(10),died int(10),cured int(10),asymptomatic int(10),asymptomaticRelative int(10), + unconfirmed int(10),relativeTime int(10),confirmedRelative int(10),unconfirmedRelative int(10), + curedRelative int(10),diedRelative int(10),icu int(10),icuRelative int(10),overseasInput int(10), + unOverseasInputCumulative int(10),overseasInputRelative int(10),unOverseasInputNewAdd int(10), + curConfirm int(10),curConfirmRelative int(10),icuDisable int(10), + primary key(InsertTime,mapLastUpdatedTime) + )''' +cursor.execute(sql) +sql='''create table if not exists insideProvince + (area char(10),relativeTime char(20),died int(10),confirmed int(10),crued int(10),confirmedRelative int(10), + diedRelative int(10),curedRelative int(10),asymptomaticRelative int(10), + asymptomatic int(10),curConfirm int(10),curConfirmRelative int(10),icuDisable int(10), + primary key(area,relativeTime) + )''' +cursor.execute(sql) +sql='''create table if not exists insideCity + (province char(10),city char(10),confirmed int(10),died int(10), + crued int(10),confirmedRelative int(10),relativeTime char(20),cityCode int(10), + primary key(city,province,relativeTime,cityCode) + )''' +cursor.execute(sql) +sql='''create table if not exists outsideCountry + ( + area char(20),relativeTime char(20), + confirmed int(10), + died int(10),crued int(10), + confirmedRelative int(10), + curConfirm int(10), + icuDisable int(10), + primary key(area,relativeTime) + ) + ''' +cursor.execute(sql) +sql='''create table if not exists allNews + (eventDescription char(200), + eventTime char(20), + eventUrl char(200), + siteName char(20), + primary key(eventDescription,eventTime,eventUrl,siteName) + ) + ''' +cursor.execute(sql) +cursor.close() +con.close() diff --git a/insideData.py b/insideData.py new file mode 100644 index 0000000..1df926e --- /dev/null +++ b/insideData.py @@ -0,0 +1,218 @@ +import requests +import re +from bs4 import BeautifulSoup +import json +import time +import pymysql + +class City: + def __init__(self,tempjs={},time='',area='',city='无数据',confirmed=0,died=0,crued=0,confirmedRelative=0,cityCode=0): + self.province=area + self.relativeTime=time + self.city=city + self.confirmed=confirmed + self.died=died + self.crued=crued + self.confirmedRelative=confirmedRelative + self.cityCode=cityCode + js={'city': '无数据', 'confirmed': 0, 'died': 0, 'crued': 0, 'confirmedRelative': 0, 'cityCode': 0} + for k,v in tempjs.items(): + if tempjs[k]!='': + js[k]=v + self.city=str(js["city"]) + self.confirmed=int(js["confirmed"]) + self.died=int(js["died"]) + self.crued=int(js["crued"]) + self.confirmedRelative=int(js["confirmedRelative"]) + self.cityCode=int(js["cityCode"]) + def get_db_tuple(self): + return (self.province,self.city,self.confirmed,self.died,self.crued,self.confirmedRelative,self.relativeTime,self.cityCode) + + def insert_sql(self,con): + sql="""insert into insideCity + (province,city,confirmed,died,crued,confirmedRelative,relativeTime,cityCode) + values(%s,%s,%s,%s,%s,%s,%s,%s) + """ + try: + con.cursor().execute(sql,self.get_db_tuple()) + con.commit() + except Exception as e: + print(e) + print('Insert insideCity failed.') + else: + print('Inserted insideCity successfully') + + def __str__(self): + if self.city=='境外输入': + return "其它情况:{:<10}城市代码:{:<4}新增患者:{:<4}累积确诊:{:<4}累积死亡:{:<4}累积治愈:{:<4}".format(self.city,self.cityCode,self.confirmedRelative, + self.confirmed,self.died,self.crued) + return "城市名:{:<10}城市代码:{:<4}新增患者:{:<4}累积确诊:{:<4}累积死亡:{:<4}累积治愈:{:<4}".format(self.city,self.cityCode,self.confirmedRelative, + self.confirmed,self.died,self.crued) + +class Provinces: + def __init__(self): + self.number=0 + self.province=[] + def addProvince(self,province): + self.province.append(province) + def printEveryProvince(self): + for p in self.province: + print(p) + def __str__(self): + return str(self.province) + + +class Province: + def __init__(self,tempjs={},confirmed=0,died=0,crued=0,relativeTime=0,confirmedRelative=0, + diedRelative=0,curedRelative=0,asymptomaticRelative=0,asymptomatic=0, + curConfirm=0,curConfirmRelative=0,icuDisable=0,area='',subList=[]): + self.died=died + self.confirmed=confirmed + self.crued=crued + self.relativeTime=relativeTime + self.confirmedRelative=confirmedRelative + self.diedRelative=diedRelative + self.curedRelative=curedRelative + self.asymptomaticRelative=asymptomaticRelative + self.asymptomatic=asymptomatic + self.curConfirm=curConfirm + self.curConfirmRelative=curConfirmRelative + self.icuDisable=icuDisable + self.area=area + self.subList=subList + js={'confirmed': 0, 'died': 0, 'crued': 0, 'relativeTime': 0, 'confirmedRelative': 0, + 'diedRelative': 0, 'curedRelative': 0, 'asymptomaticRelative': 0, 'asymptomatic': 0, 'curConfirm': 0, + 'curConfirmRelative': 0, 'icuDisable': 0, 'area': '', 'subList':[]} + for k,v in tempjs.items(): + if tempjs[k]!='': + js[k]=v + #print(js) + self.died=js['died'] + self.confirmed=js['confirmed'] + self.crued=js['crued'] + self.relativeTime=js['relativeTime'] + self.confirmedRelative=js['confirmedRelative'] + self.diedRelative=js['diedRelative'] + self.curedRelative=js['curedRelative'] + self.asymptomaticRelative=js['asymptomaticRelative'] + self.asymptomatic=js['asymptomatic'] + self.curConfirm=js['curConfirm'] + self.curConfirmRelative=js['curConfirmRelative'] + self.icuDisable=js['icuDisable'] + self.area=js['area'] + self.subList=js['subList'] + cities=js["subList"] + localobj=time.localtime(int(self.relativeTime)) + newtime=time.strftime("%Y-%m-%d %H:%M:%S",localobj) + for c in cities: + city=City(c,time=newtime,area=self.area) + subList.append(city) + + def get_db_tuple(self): + localobj=time.localtime(int(self.relativeTime)) + newtime=time.strftime("%Y-%m-%d %H:%M:%S",localobj) + return (self.died,self.confirmed,self.crued,newtime,self.confirmedRelative,self.diedRelative,self.curedRelative,self.asymptomaticRelative,self.asymptomatic,self.curConfirm,self.curConfirmRelative,self.icuDisable,self.area) + + def deleteProvince_sql(self,con): + localobj=time.localtime(int(self.relativeTime)) + cal=time.strftime("%Y-%m-%d %H:%M:%S",localobj)[0:10]+r"%" + sql=""" + delete from insideProvince + where area='%s' and relativeTime like '%s' + """%(self.area,cal) + try: + cursor=con.cursor() + cursor.execute(sql) + except Exception as e: + print(e) + print('delete deleteProvince failed.') + else: + print('delete deleteProvince successfully') + + def deleteCity_sql(self,con): + localobj=time.localtime(int(self.relativeTime)) + cal=time.strftime("%Y-%m-%d %H:%M:%S",localobj)[0:10]+r"%" + sql=""" + delete from insideCity + where province='%s' and relativeTime like '%s' + """%(self.area,cal) + try: + cursor=con.cursor() + cursor.execute(sql) + except Exception as e: + print(e) + print("delete %s'City failed."%self.area) + else: + print("delete %s'City successfully"%self.area) + + def insert_sql(self,con): + sql="""insert into insideProvince + (died,confirmed,crued,relativeTime,confirmedRelative, + diedRelative,curedRelative,asymptomaticRelative, + asymptomatic,curConfirm,curConfirmRelative,icuDisable,area) + values(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s) + """ + try: + self.deleteProvince_sql(con) + con.cursor().execute(sql,self.get_db_tuple()) + con.commit() + except Exception as e: + print(e) + print('Insert insideProvince failed.') + else: + print('Inserted insideProvince successfully') + self.deleteCity_sql(con) + for c in self.subList: + localobj=time.localtime(int(self.relativeTime)) + newtime=time.strftime("%Y-%m-%d %H:%M:%S",localobj) + city=City(c,newtime,self.area) + city.insert_sql(con) + + + def printEveryCity(self): + localobj=time.localtime(int(self.relativeTime)) + newtime=time.strftime("%Y-%m-%d %H:%M:%S",localobj) + for c in self.subList: + city=City(c,time=newtime,area=self.area) + print(city) + + def __str__(self): + localobj=time.localtime(int(self.relativeTime)) + newtime=time.strftime("%Y-%m-%d %H:%M:%S",localobj) + return "省份:{:<4}现有患者:{:<4}现有新增患者:{:<4}累积无症状感染者:{:<4}新增无症状感染者:{:<4}累积确诊:{:<6}新增确诊:{:<4}累积治愈:{:<6}新增治愈:{:<4}累积死亡:{:<6}新增死亡:{:<4}累积重症:{:<4}更新时间:{:<10}".format(self.area,self.curConfirm,self.curConfirmRelative, + self.asymptomatic,self.asymptomaticRelative,self.confirmed,self.confirmedRelative,self.crued,self.curedRelative,self.died,self.diedRelative, + self.icuDisable,newtime) + + +def getinsideData(): + res=requests.get('https://voice.baidu.com/act/newpneumonia/newpneumonia/') + res=res.text + #script type="application/json" + soup=BeautifulSoup(res,'html.parser') + tag=soup.find('script',attrs={'id':'captain-config'}) + tagstr=str(tag) + findinland=re.findall('caseList".*"caseOutsideList',tagstr)[0] + province=re.findall('\{"confirmed.*?subList":\[.*?\]\}',findinland) + provinces=Provinces() + for item in province: + js=json.loads(item) + p=Province(js) + provinces.addProvince(p) + #print(p) + #p.printEveryCity() + return provinces + +def printStart(): + provinces=getinsideData().province + for p in provinces: + print(p) + p.printEveryCity() + print("+++++++++++++++++"*6) + +def mysqlStart(con): + provinces=getinsideData().province + for p in provinces: + p.insert_sql(con) + + + \ No newline at end of file diff --git a/Di.py b/insideSummary.py similarity index 74% rename from Di.py rename to insideSummary.py index 58b5c7a..79ddf95 100644 --- a/Di.py +++ b/insideSummary.py @@ -5,6 +5,7 @@ from bs4 import BeautifulSoup import lxml import json import pymysql +import time class summary_cn: def __init__(self): @@ -36,30 +37,48 @@ class summary_cn: self.unOverseasInputCumulative,self.overseasInputRelative,self.unOverseasInputNewAdd, self.curConfirm,self.curConfirmRelative,self.icuDisable,self.mapLastUpdatedTime, datetime.datetime.now().strftime('%Y-%m-%d')) - def insert_sql(con): + + def delete_sql(self,con): + sql=""" + delete from insideSummary + """ + try: + cursor=con.cursor() + cursor.execute(sql) + except Exception as e: + print(e) + print('delete inside summary failed.') + else: + print('delete inside summary successfully') + + + def insert_sql(self,con): + self.delete_sql(con) sql=''' - insert into cn_sunmary(confirmed,died,cured,asymptomatic,asymptomaticRelative, - unconfirmed,relativeTime,confirmedRelative,unconfirmedRelative, - curedRelative,diedRelative,icu,icuRelative,overseasInput, - unOverseasInputCumulative,overseasInputRelative,unOverseasInputNewAdd, - curConfirm,curConfirmRelative,icuDisable,mapLastUpdatedTime,InsertTime) values(%d,%d,%d, - %d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%s) + insert into insideSummary( + confirmed,died,cured,asymptomatic,asymptomaticRelative, + unconfirmed,relativeTime,confirmedRelative,unconfirmedRelative, + curedRelative,diedRelative,icu,icuRelative,overseasInput, + unOverseasInputCumulative,overseasInputRelative,unOverseasInputNewAdd, + curConfirm,curConfirmRelative,icuDisable,mapLastUpdatedTime,InsertTime) + values(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s) ''' try: con.cursor().execute(sql,self.get_db_tuple()) con.commit() except Exception as e: print(e) - print('Insert failed.') + print('Insert insideSummary failed.') else: - print('Inserted successfully') + print('Inserted insideSummary successfully') + def __str__(self): string = '截止' + self.mapLastUpdatedTime + '\n' string+= "国内现有\n\t确诊:" + str(self.curConfirm) + '\t较昨日' + str(self.curConfirmRelative) string+="\n\t无症状:" + str(self.asymptomatic) + '\t较昨日' + str(self.asymptomaticRelative) string+='\n\t重症:' + str(self.icu) + '\t较昨日' + str(self.icuRelative) string+='\n\t疑似:' + str(self.unconfirmed) + '\t\t较昨日' + str(self.unconfirmedRelative) - string+='\n累计:\n\t确诊' + str(self.confirmed) + '\n\t治愈' + str(self.cured) + string+='\n累计:\n\t确诊:' + str(self.confirmed) + '\n\t治愈:' + str(self.cured) string+='\n\t死亡' + str(self.died) return string @@ -92,27 +111,14 @@ def getCnSummary(): sum.curConfirm = int(ans['curConfirm']) sum.curConfirmRelative = int(ans['curConfirmRelative']) sum.icuDisable = int(ans['icuDisable']) + ### sum.mapLastUpdatedTime = tagjs['component'][0]['mapLastUpdatedTime'] #print(sum) return sum -def getCnNews(): - b = webdriver.Chrome() - b.get("https://voice.baidu.com/act/newpneumonia/newpneumonia") - txt = b.page_source - b.quit() - soup = BeautifulSoup(txt,'html.parser') - tag1 = soup.select('div[class="Virus_1-1-284_2CVyXP"]') - tag2 = soup.select('div[class="Virus_1-1-284_TB6x3k"] > a[href]') - ans = [] - for i,j in zip(tag1,tag2): - tmp = str(i.string) - tmp+=' ' + j.get('href') - ans.append(tmp) - print(tmp) - return ans - - -if __name__ == "__main__": - getCnSummary() +def printStart(): + print(getCnSummary()) +def mysqlStart(con): + sum=getCnSummary() + sum.insert_sql(con) diff --git a/newsData.py b/newsData.py new file mode 100644 index 0000000..8980874 --- /dev/null +++ b/newsData.py @@ -0,0 +1,97 @@ +import requests +import json +from bs4 import BeautifulSoup +import re +import time +import pymysql + +#定义新闻类 +class InNews: + def __init__(self): + self.eventDescription='' + self.eventTime='' + self.eventUrl='' + self.siteName='' + self.Artical='' + + def get_db_tuple(self): + return ((self.eventDescription,self.time(),self.eventUrl,self.siteName)) + + def delete_sql(self,con): + sql=""" + delete from allNews + where eventDescription='%s' and eventTime = '%s' and eventUrl ='%s' and siteName='%s' + """%(self.eventDescription,self.time(),self.eventUrl,self.siteName) + try: + cursor=con.cursor() + cursor.execute(sql) + except Exception as e: + print(e) + print('delete news failed.') + else: + print('delete news successfully') + + def insert_sql(self,con): + self.delete_sql(con) + sql=''' + insert into allNews( + eventDescription,eventTime,eventUrl,siteName) + values(%s,%s,%s,%s) + ''' + try: + con.cursor().execute(sql,self.get_db_tuple()) + con.commit() + except Exception as e: + print(e) + print('Insert news failed.') + else: + print('Inserted news successfully') + + + def printArtical(self): + req = requests.get(self.eventUrl) + content = req.content.decode('utf-8') + contentBs = BeautifulSoup(content, 'html.parser') + tag = contentBs.findAll('span', attrs={'class': 'bjh-p'}) + artical = '' + for item in tag: + artical=artical+(' '+item.get_text())+'\n' + return artical + + def time(self): + ts = float(self.eventTime) + localt = time.localtime(ts) # ???localtime?? + timestr = time.strftime("%Y-%m-%d %H:%M:%S", localt) # ??? + return(timestr) + + def toString(self): + print('%s\n%s\n%s\n\n%s\n%s'%(self.eventDescription,self.eventUrl,self.time(),self.printArtical(),self.siteName)) + +#爬取数据函数 +def getNews(): + req = requests.get('https://opendata.baidu.com/data/inner?tn=reserved_all_res_tn&dspName=iphone&from_sf=1&dsp=iphone&resource_id=28565&alr=1&query=%E6%96%B0%E5%86%A0%E8%82%BA%E7%82%8E%E5%9B%BD%E5%A4%96%E7%96%AB%E6%83%85&cb=jsonp_1597232049116_92879') + content = req.content.decode('utf-8') + conDic = re.findall('\{.*\}',content)[0] + results = json.loads(conDic)['Result'][0]['DisplayData']['result']['items'] + + allNews=[] + for news in results: + # print (news['eventUrl']) + inNews = InNews() + inNews.eventDescription=news['eventDescription'] + inNews.eventTime=news['eventTime'] + inNews.eventUrl=news['eventUrl'] + inNews.siteName=news['siteName'] + allNews.append(inNews) + return allNews + +def printStart(): + allNews=getNews() + for news in allNews: + news.toString() + print('************'*6) + +def mysqlStart(con): + allNews=getNews() + for news in allNews: + news.insert_sql(con) diff --git a/outsideData.py b/outsideData.py new file mode 100644 index 0000000..3ddb8f0 --- /dev/null +++ b/outsideData.py @@ -0,0 +1,142 @@ +import requests +from bs4 import BeautifulSoup +import re +import json +import time +import pymysql + +class Outside: + def __init__(self): + self.confirmed = 0 + self.died = 0 + self.crued = 0 + self.relativeTime = 0 + self.confirmedRelative = 0 + #self.asymptomatic = 0 + self.curConfirm = 0 + self.icuDisable = 0 + self.area = '' + self.subList = [] + + def __str__(self): + return '地区: %s\t累计确诊:%s \t死亡: %d \t治愈: %d \t时间: %s \t新增: %s\t 现有: %d' % (self.area,self.confirmed, self.died, self.crued, self.relativeTime, self.confirmedRelative, self.curConfirm, ) + def get_db_tuple(self): + return (self.confirmed,self.died,self.crued,self.relativeTime,self.confirmedRelative,self.curConfirm,self.icuDisable,self.area) + + def delete_sql(self,con): + cal=self.relativeTime + sql=""" + delete from outsideCountry + where area='%s' and relativeTime like '%s' + """%(self.area,cal) + try: + cursor=con.cursor() + cursor.execute(sql) + except Exception as e: + print(e) + print('delete outside country failed.') + else: + print('delete outside country successfully') + + + def insert_sql(self,con): + self.delete_sql(con) + sql="""insert into outsideCountry( + confirmed,died,crued,relativeTime, + confirmedRelative,curConfirm,icuDisable,area) + values(%s,%s,%s,%s,%s,%s,%s,%s) + """ + try: + con.cursor().execute(sql,self.get_db_tuple()) + con.commit() + except Exception as e: + print(e) + print('Insert outsideCountry failed.') + else: + print('Inserted outsideCountry successfully') + +class City: + def __init__(self): + self.confirmed = 0 + self.died = 0 + self.crued = 0 + self.curConfirm = 0 + self.city = '' + + def __str__(self): + return '城市:%s\t累计确诊:%d\t死亡:%d\t治愈:%d\t现有:%d'%(self.city,self.confirmed,self.died,self.crued,self.curConfirm) + +def getOrElse(target, key): + ret = '' + if target.get(key) != None: + ret = target[key] + else: + ret = '' + return ret + +def processStr(s): + ret = [] + if s==None or s == '': + ret = "0" + else: + ret = s + return ret +# 打印信息 +def print_all_infos(infos): + for p in infos: + print(p) + for c in p.subList: + print(c) + print('+++++++++++++++++++++++++++++++++' * 3) + #爬取页面 + +def getouside(): + res=requests.get('https://voice.baidu.com/act/newpneumonia/newpneumonia') + # res = res.content.decode('utf-8') + res=res.text + soup = BeautifulSoup(res,'html.parser') + #print(soup) + tag = soup.find('script',attrs={'id':'captain-config'}) + tagstr=tag.string + tagJson=json.loads(tagstr) + alleDataInfos = tagJson['component'] + # results = re.findall('(\{"died".*?"subList".*?\})',tagstr) + # print(len(results)) + # print(results) + outsideDataInfos = alleDataInfos[0]['caseOutsideList'] + all_outside = [] + for item in outsideDataInfos: + outside = Outside() + outside.confirmed = int(item['confirmed']) + outside.died = int(item['died']) + outside.crued = int(item['crued']) + outside.relativeTime = time.strftime("%Y-%m-%d", time.localtime( int(item['relativeTime']))) + outside.confirmedRelative = int(item['confirmedRelative']) + #self.asymptomatic = 0 + outside.curConfirm = int(item['curConfirm']) + outside.area = item['area'] + sublist = item['subList'] + for cityitem in sublist: + city = City() + city.city = getOrElse(cityitem, 'city') + city.confirmed = int(processStr(getOrElse(cityitem, 'confirmed'))) + city.died = int(processStr(getOrElse(cityitem, 'died'))) + city.crued = int(processStr(getOrElse(cityitem, 'crued'))) + city.curConfirm = int(processStr(getOrElse(cityitem, 'curConfirm'))) + outside.subList.append(city) + all_outside.append(outside) + return all_outside + +def printStart(): + all_outside=(getouside()) + print_all_infos(all_outside) + +def mysqlStart(con): + all_outside=getouside() + for c in all_outside: + c.insert_sql(con) + + + + + diff --git a/outsideSummary.py b/outsideSummary.py new file mode 100644 index 0000000..42a57fc --- /dev/null +++ b/outsideSummary.py @@ -0,0 +1,103 @@ +import requests +import re +from bs4 import BeautifulSoup +import json +import pymysql +import time + +#定义类 +class summaryDataOut: + def __init__(self,js={}): + self.confirmed=0 + self.died=0 + self.curConfirm=0 + self.cured=0 + self.confirmedRelative=0 + self.curedRelative=0 + self.diedRelative=0 + self.curConfirmRelative=0 + self.time='' + + def get_db_tuple(self): + return (self.confirmed,self.died,self.curConfirm,self.cured,self.confirmedRelative,self.curedRelative,self.diedRelative,self.curConfirmRelative,self.time) + + def delete_sql(self,con): + sql=""" + delete from outsideSummary + """ + try: + cursor=con.cursor() + cursor.execute(sql) + except Exception as e: + print(e) + print('delete outside summary failed.') + else: + print('delete outside summary successfully') + + + + def insert_sql(self,con): + self.delete_sql(con) + sql="""insert into outsideSummary(confirmed,died,curConfirm,cured, + confirmedRelative,curedRelative,diedRelative,curConfirmRelative,time) + values(%s,%s,%s,%s,%s,%s,%s,%s,%s) + """ + try: + con.cursor().execute(sql,self.get_db_tuple()) + con.commit() + except Exception as e: + print(e) + print('Insert outsideSummary failed.') + else: + print('Inserted outsideSummary successfully') + + def blankNum (self,object): + return ' '*(12-len(str(object))) + + def PrintOut(self): + print("截至%s国外疫情概况:"%self.time) + print("现有确诊:%d" % self.curConfirm, end=self.blankNum(self.curConfirm)) + print("相对昨日新增:%d"%self.curConfirmRelative) + print("累计确诊:%d"%self.confirmed,end=self.blankNum(self.confirmed)) + print("相对昨日新增:%d"%self.confirmedRelative) + print("累计治愈:%d"%self.cured,end=self.blankNum(self.cured)) + print("相对昨日新增:%d"%self.curedRelative) + print("累计死亡:%d"%self.died,end=self.blankNum(self.died)) + print("相对昨日新增:%d"%self.diedRelative) + +def getoutsideSummary(): + # 获取数据 + req = requests.get ('https://voice.baidu.com/act/newpneumonia/newpneumonia/?from=osari_pc_3#tab4') + content = req.content.decode('utf-8') + soup = BeautifulSoup(content,'html.parser') + + # 过滤筛选 + tag = soup.find('script',attrs={'type':'application/json','id':'captain-config'}) + tagstr=tag.string #标签转化为字符串 + tagdic=json.loads(tagstr) #标签字符串转化为字典 + component=tagdic['component'][0] #获得疫情状况字典 + time= component['mapLastUpdatedTime'] #字典中找出时间 + result = component['summaryDataOut'] #字典中找出'component'key下的'summaryDataOut'key内容 + + # 存储 + OutData=summaryDataOut() + + OutData.confirmed=int(result['confirmed']) + OutData.confirmedRelative=int(result['confirmedRelative']) + OutData.cured=int(result['cured']) + OutData.curedRelative=int(result['curedRelative']) + OutData.died=int(result['died']) + OutData.diedRelative=int(result['diedRelative']) + OutData.curConfirm=int(result['curConfirm']) + OutData.curConfirmRelative=int(result['curConfirmRelative']) + # + OutData.time=time + return OutData + +def printStart(): + getoutsideSummary().PrintOut() + +def mysqlStart(con): + sum=getoutsideSummary() + sum.insert_sql(con) + diff --git a/printCurData.py b/printCurData.py new file mode 100644 index 0000000..7c84de4 --- /dev/null +++ b/printCurData.py @@ -0,0 +1,15 @@ +import insideData,outsideData,newsData,insideSummary,outsideSummary +print("++++++++++++++++++"+"开始打印国内疫情概况"+"++++++++++++++++++") +insideSummary.printStart() +print("\n\n\n\n\n"+"++++++++++++++++++"+"开始打印国内各省份及地级市数据"+"++++++++++++++++++") +insideData.printStart() +print("++++++++++++++++++"+"国内各省份及地级市数据打印完毕"+"++++++++++++++++++"+"\n\n\n\n\n") +print("++++++++++++++++++"+"开始打印国外疫情概况"+"++++++++++++++++++") +outsideSummary.printStart() +print("\n\n\n\n\n"+"++++++++++++++++++"+"开始打印国外各国及地区数据"+"++++++++++++++++++") +outsideData.printStart() +print("++++++++++++++++++"+"国外各国及地区数据打印完毕"+"++++++++++++++++++""\n\n\n\n\n") +print("++++++++++++++++++"+"开始打印实时新闻资讯数据"+"++++++++++++++++++") +newsData.printStart() +print("++++++++++++++++++"+"实时新闻资讯数据打印完毕"+"++++++++++++++++++""\n\n\n\n\n") +i=input("输入任意键结束") \ No newline at end of file diff --git a/国外概况class.py b/国外概况class.py deleted file mode 100644 index 9eca84b..0000000 --- a/国外概况class.py +++ /dev/null @@ -1,25 +0,0 @@ -class summaryDataOut: - confirmed=0 - died=0 - curConfirm=0 - cured=0 - confirmedRelative=0 - curedRelative=0 - diedRelative=0 - curConfirmRelative=0 - relativeTime=0 - time='' - - def blankNum (self,object): - return ' '*(12-len(str(object))) - - def PrintOut(self): - print("截至%s国外疫情概况:"%self.time) - print("现有确诊:%d" % self.curConfirm, end=self.blankNum(self.curConfirm)) - print("相对昨日新增:%d"%self.curConfirmRelative) - print("累计确诊:%d"%self.confirmed,end=self.blankNum(self.confirmed)) - print("相对昨日新增:%d"%self.confirmedRelative) - print("累计治愈:%d"%self.cured,end=self.blankNum(self.cured)) - print("相对昨日新增:%d"%self.curedRelative) - print("累计死亡:%d"%self.died,end=self.blankNum(self.died)) - print("相对昨日新增:%d"%self.diedRelative) diff --git a/国外疫情.py b/国外疫情.py deleted file mode 100644 index a7f3a18..0000000 --- a/国外疫情.py +++ /dev/null @@ -1,103 +0,0 @@ -class Outside: - def __init__(self): - self.confirmed = 0 - self.died = 0 - self.crued = 0 - self.relativeTime = 0 - self.confirmedRelative = 0 - #self.asymptomatic = 0 - self.curConfirm = 0 - self.icuDisable = 0 - self.area = '' - self.subList = [] - def __str__(self): - return '地区: %s\t累计确诊:%s \t死亡: %d \t治愈: %d \t时间: %s \t新增: %s\t 现有: %d' % (self.area, self.confirmed, self.died, self.crued, self.relativeTime, self.confirmedRelative, self.curConfirm, ) - -class City: - def __init__(self): - self.confirmed = 0 - self.died = 0 - self.crued = 0 - self.curConfirm = 0 - self.city = '' - - def __str__(self): - return '城市:%s\t累计确诊:%d\t死亡:%d\t治愈:%d\t现有:%d' % (self.city,self.confirmed,self.died,self.crued,self.curConfirm) - - - -import requests -from bs4 import BeautifulSoup -import re -import json - -def getOrElse(target, key): - ret = '' - if target.get(key) != None: - ret = target[key] - else: - ret = '' - return ret - -def processStr(s): - ret = [] - if s==None or s == '': - ret = "0" - else: - ret = s - return ret -# 打印信息 -def print_all_infos(infos): - for p in infos: - print(p) - for c in p.subList: - print(c) - print('+++++++++++++++++++++++++++++++++' * 3) - #爬取页面 -res=requests.get('https://voice.baidu.com/act/newpneumonia/newpneumonia') - - -# res = res.content.decode('utf-8') -res=res.text -soup = BeautifulSoup(res,'html.parser') - -#print(soup) - -tag = soup.find('script',attrs={'id':'captain-config'}) - -tagstr=tag.string - -tagJson=json.loads(tagstr) - -alleDataInfos = tagJson['component'] - -# results = re.findall('(\{"died".*?"subList".*?\})',tagstr) -# print(len(results)) -# print(results) -outsideDataInfos = alleDataInfos[0]['caseOutsideList'] -all_outside = [] - -for item in outsideDataInfos: - outside = Outside() - outside.confirmed = int(item['confirmed']) - outside.died = int(item['died']) - outside.crued = int(item['crued']) - outside.relativeTime = time.strftime("%Y-%m-%d ", time.localtime( int(item['relativeTime']))) - outside.confirmedRelative = int(item['confirmedRelative']) - #self.asymptomatic = 0 - outside.curConfirm = int(item['curConfirm']) - outside.area = item['area'] - - sublist = item['subList'] - for cityitem in sublist: - city = City() - city.city = getOrElse(cityitem, 'city') - city.confirmed = int(processStr(getOrElse(cityitem, 'confirmed'))) - city.died = int(processStr(getOrElse(cityitem, 'died'))) - city.crued = int(processStr(getOrElse(cityitem, 'crued'))) - city.curConfirm = int(processStr(getOrElse(cityitem, 'curConfirm'))) - outside.subList.append(city) - - all_outside.append(outside) - -print_all_infos(all_outside) \ No newline at end of file diff --git a/新闻class.py b/新闻class.py deleted file mode 100644 index d6d41e2..0000000 --- a/新闻class.py +++ /dev/null @@ -1,33 +0,0 @@ -import requests -import json -from bs4 import BeautifulSoup -import re -import time - - -class InNews: - def __init__(self): - self.eventDescription='' - self.eventTime='' - self.eventUrl='' - self.siteName='' - self.Artical='' - - def printArtical(self): - req = requests.get(self.eventUrl) - content = req.content.decode('utf-8') - contentBs = BeautifulSoup(content, 'html.parser') - tag = contentBs.findAll('span', attrs={'class': 'bjh-p'}) - artical = '' - for item in tag: - artical=artical+(' '+item.get_text())+'\n' - return artical - - def time(self): - ts = float(self.eventTime) - localt = time.localtime(ts) # 转换为localtime对象 - timestr = time.strftime("%Y-%m-%d %H:%M:%S", localt) # 格式化 - return(timestr) - - def toString(self): - print('%s\n原网:%s\n%s\n\n%s\n%s'%(self.eventDescription,self.eventUrl,self.time(),self.printArtical(),self.siteName)) diff --git a/爬新闻main.py b/爬新闻main.py deleted file mode 100644 index 189a733..0000000 --- a/爬新闻main.py +++ /dev/null @@ -1,24 +0,0 @@ -import requests -import json -from bs4 import BeautifulSoup -import re -from 新闻类 import InNews - -req = requests.get('https://opendata.baidu.com/data/inner?tn=reserved_all_res_tn&dspName=iphone&from_sf=1&dsp=iphone&resource_id=28565&alr=1&query=%E6%96%B0%E5%86%A0%E8%82%BA%E7%82%8E%E5%9B%BD%E5%A4%96%E7%96%AB%E6%83%85&cb=jsonp_1597232049116_92879') -content = req.content.decode('utf-8') -conDic = re.findall('\{.*\}',content)[0] -results = json.loads(conDic)['Result'][0]['DisplayData']['result']['items'] - -allNews=[] -for news in results: - # print (news['eventUrl']) - inNews = InNews() - inNews.eventDescription=news['eventDescription'] - inNews.eventTime=news['eventTime'] - inNews.eventUrl=news['eventUrl'] - inNews.siteName=news['siteName'] - allNews.append(inNews) - -for news in allNews: - news.toString() - print('************'*6) \ No newline at end of file diff --git a/爬概况main.py b/爬概况main.py deleted file mode 100644 index 9b6c46a..0000000 --- a/爬概况main.py +++ /dev/null @@ -1,34 +0,0 @@ -import requests -import re -from bs4 import BeautifulSoup -import json - -from 国外总数据 import summaryDataOut - -# 获取数据 -req = requests.get ('https://voice.baidu.com/act/newpneumonia/newpneumonia/?from=osari_pc_3#tab4') -content = req.content.decode('utf-8') -soup = BeautifulSoup(content,'html.parser') - -# 过滤筛选 -tag = soup.find('script',attrs={'type':'application/json','id':'captain-config'}) -tagstr=tag.string #标签转化为字符串 -tagdic=json.loads(tagstr) #标签字符串转化为字典 -component=tagdic['component'][0] #获得疫情状况字典 -time= component['mapLastUpdatedTime'] #字典中找出时间 -result = component['summaryDataOut'] #字典中找出'component'key下的'summaryDataOut'key内容 - -# 存储 -OutData=summaryDataOut() - -OutData.confirmed=int(result['confirmed']) -OutData.confirmedRelative=int(result['confirmedRelative']) -OutData.cured=int(result['cured']) -OutData.curedRelative=int(result['curedRelative']) -OutData.died=int(result['died']) -OutData.diedRelative=int(result['diedRelative']) -OutData.curConfirm=int(result['curConfirm']) -OutData.curConfirmRelative=int(result['curConfirmRelative']) -OutData.time=time - -OutData.PrintOut() \ No newline at end of file