diff --git a/CaseInland.py b/CaseInland.py deleted file mode 100644 index 5944ef1..0000000 --- a/CaseInland.py +++ /dev/null @@ -1,120 +0,0 @@ -import requests -import re -from bs4 import BeautifulSoup -import json -import time - -class City: - def __init__(self,tempjs={},city='无数据',confiemed='无数据',died='无数据',crued='无数据',confirmedRelative='无数据',cityCode='无数据'): - self.city=city - self.confirmed=confiemed - self.died=died - self.crued=crued - self.confirmedRelative=confirmedRelative - self.cityCode=cityCode - js={'city': '无数据', 'confirmed': '无数据', 'died': '无数据', 'crued': '无数据', 'confirmedRelative': '无数据', 'curConfirm': '无数据', 'cityCode': '无数据'} - for k,v in tempjs.items(): - if tempjs[k]!='': - js[k]=v - self.city=js["city"] - self.confirmed=js["confirmed"] - self.died=js["died"] - self.crued=js["crued"] - self.confirmedRelative=js["confirmedRelative"] - self.cityCode=js["cityCode"] - def __str__(self): - if self.city=='境外输入': - return "其它情况:{:<10}城市代码:{:<4}新增患者:{:<4}累积确诊:{:<4}累积死亡:{:<4}累积治愈:{:<4}".format(self.city,self.cityCode,self.confirmedRelative, - self.confirmed,self.died,self.crued) - return "城市名:{:<10}城市代码:{:<4}新增患者:{:<4}累积确诊:{:<4}累积死亡:{:<4}累积治愈:{:<4}".format(self.city,self.cityCode,self.confirmedRelative, - self.confirmed,self.died,self.crued) - -class Provinces: - def __init__(self): - self.number=0 - self.province=[] - def addProvince(self,province): - self.province.append(province) - def printEveryProvince(self): - for p in self.province: - print(p) - def __str__(self): - return str(self.province) - - -class Province: - def __init__(self,tempjs={},confirmed='无数据',died='无数据',crued='无数据',relativeTime='无数据',confirmedRelative='无数据', - diedRelative='无数据',curedRelative='无数据',asymptomaticRelative='无数据',asymptomatic='无数据', - curConfirm='无数据',curConfirmRelative='无数据',icuDisable='无数据',area='无数据',subList=[]): - self.died=died - self.confirmed=confirmed - self.crued=crued - self.relativeTime=relativeTime - self.confirmedRelative=confirmedRelative - self.diedRelative=diedRelative - self.curedRelative=curedRelative - self.asymptomaticRelative=asymptomaticRelative - self.asymptomatic=asymptomatic - self.curConfirm=curConfirm - self.curConfirmRelative=curConfirmRelative - self.icuDisable=icuDisable - self.area=area - self.subList=subList - js={'confirmed': '无数据', 'died': '无数据', 'crued': '无数据', 'relativeTime': '无数据', 'confirmedRelative': '无数据', - 'diedRelative': '无数据', 'curedRelative': '无数据', 'asymptomaticRelative': '无数据', 'asymptomatic': '无数据', 'curConfirm': '无数据', - 'curConfirmRelative': '无数据', 'icuDisable': '无数据', 'area': '无数据', 'subList':[]} - for k,v in tempjs.items(): - if tempjs[k]!='': - js[k]=v - #print(js) - self.died=js['died'] - self.confirmed=js['confirmed'] - self.crued=js['crued'] - self.relativeTime=js['relativeTime'] - self.confirmedRelative=js['confirmedRelative'] - self.diedRelative=js['diedRelative'] - self.curedRelative=js['curedRelative'] - self.asymptomaticRelative=js['asymptomaticRelative'] - self.asymptomatic=js['asymptomatic'] - self.curConfirm=js['curConfirm'] - self.curConfirmRelative=js['curConfirmRelative'] - self.icuDisable=js['icuDisable'] - self.area=js['area'] - self.subList=js['subList'] - cities=js["subList"] - for c in cities: - city=City(c) - subList.append(city) - def printEveryCity(self): - for c in self.subList: - city=City(c) - print(city) - def __str__(self): - localobj=time.localtime(int(self.relativeTime)) - newtime=time.strftime("%Y-%m-%d %H:%M:%S",localobj) - return "省份:{:<4}现有患者:{:<4}现有新增患者:{:<4}累积无症状感染者:{:<4}新增无症状感染者:{:<4}累积确诊:{:<6}新增确诊:{:<4}累积治愈:{:<6}新增治愈:{:<4}累积死亡:{:<6}新增死亡:{:<4}累积重症:{:<4}更新时间:{:<10}".format(self.area,self.curConfirm,self.curConfirmRelative, - self.asymptomatic,self.asymptomaticRelative,self.confirmed,self.confirmedRelative,self.crued,self.curedRelative,self.died,self.diedRelative, - self.icuDisable,newtime) - - -def start(): - res=requests.get('https://voice.baidu.com/act/newpneumonia/newpneumonia/') - res=res.text - #script type="application/json" - soup=BeautifulSoup(res,'html.parser') - tag=soup.find('script',attrs={'id':'captain-config'}) - tagstr=str(tag) - findinland=re.findall('caseList".*"caseOutsideList',tagstr)[0] - province=re.findall('\{"confirmed.*?subList":\[.*?\]\}',findinland) - provinces=Provinces() - for item in province: - js=json.loads(item) - p=Province(js) - provinces.addProvince(p) - print(p) - p.printEveryCity() - print("++++++++++++++++++++++++++++++"*4) - - #provinces.printEveryProvince() - - diff --git a/README.txt b/README.txt deleted file mode 100644 index e69de29..0000000 diff --git a/collectData.py b/collectData.py new file mode 100644 index 0000000..313a552 --- /dev/null +++ b/collectData.py @@ -0,0 +1,49 @@ +import time,threading +#password填自己mysql的密码 +import pymysql,outsideData,newsData,insideSummary,outsideSummary,insideData +con=pymysql.connect(host='localhost',passwd='123456',charset='utf8',user='root',db='cov2019_data') +cursor=con.cursor() + +#多线程方案 +# th=[] + +# thread_insideData = threading.Thread(target=insideData.mysqlStart, args=(con, )) +# th.append(thread_insideData) + +# thread_outsideData = threading.Thread(target=outsideData.mysqlStart, args=(con, )) +# th.append(thread_outsideData) + +# thread_insideSummary = threading.Thread(target=insideSummary.mysqlStart, args=(con, )) +# th.append(thread_insideSummary) + +# thread_outsideSummary = threading.Thread(target=outsideSummary.mysqlStart, args=(con, )) +# th.append(thread_outsideSummary) + +# thread_news=threading.Thread(target=newsData.mysqlStart,args=(con,)) +# th.append(thread_news) +#多线程方案启动 +# while True: +# for t in th: +# t.run() +# for t in th: +# t.join() + +#单一线程方案 +def collect(con): + while True: + newsData.mysqlStart(con) + insideData.mysqlStart(con) + outsideData.mysqlStart(con) + insideSummary.mysqlStart(con) + outsideSummary.mysqlStart(con) + print("***********************每隔一小时自动更新数据***********************") + print("****************************此次更新完毕***************************") + updatetime=time.strftime("%Y-%m-%d %H:%M:%S",time.localtime(time.time()+3600)) + print("****************下一次启动时间: %s****************"%updatetime) + time.sleep(60*60) +#单一线程方案启动 +t=threading.Thread(target=collect,args=(con,)) +t.start() +t.join() +cursor.close() +con.close() \ No newline at end of file diff --git a/createDB.py b/createDB.py new file mode 100644 index 0000000..e3ce0e2 --- /dev/null +++ b/createDB.py @@ -0,0 +1,74 @@ +#password填自己mysql的密码 +import pymysql +con=pymysql.connect(host='localhost',port=3306,passwd='123456',charset='utf8',user='root') +cursor=con.cursor() +#建库 +sql=''' + create database if not exists cov2019_data + ''' +cursor.execute(sql) +cursor.close() +con.close() +con=pymysql.connect(host='localhost',port=3306,passwd='123456',charset='utf8',user='root',db='cov2019_data') +cursor=con.cursor() +#切换数据库 +cursor.db='cov2019_data' +#建表 +sql='''create table if not exists outsideSummary + (time char(20), + confirmed int(10), + died int(10), + curConfirm int(10), + cured int(10), + confirmedRelative int(10), + curedRelative int(10), + diedRelative int(10), + curConfirmRelative int(10), + primary key(time) + )''' +cursor.execute(sql) +sql='''create table if not exists insideSummary + (InsertTime char(20) ,mapLastUpdatedTime char(20),confirmed int(10),died int(10),cured int(10),asymptomatic int(10),asymptomaticRelative int(10), + unconfirmed int(10),relativeTime int(10),confirmedRelative int(10),unconfirmedRelative int(10), + curedRelative int(10),diedRelative int(10),icu int(10),icuRelative int(10),overseasInput int(10), + unOverseasInputCumulative int(10),overseasInputRelative int(10),unOverseasInputNewAdd int(10), + curConfirm int(10),curConfirmRelative int(10),icuDisable int(10), + primary key(InsertTime,mapLastUpdatedTime) + )''' +cursor.execute(sql) +sql='''create table if not exists insideProvince + (area char(10),relativeTime char(20),died int(10),confirmed int(10),crued int(10),confirmedRelative int(10), + diedRelative int(10),curedRelative int(10),asymptomaticRelative int(10), + asymptomatic int(10),curConfirm int(10),curConfirmRelative int(10),icuDisable int(10), + primary key(area,relativeTime) + )''' +cursor.execute(sql) +sql='''create table if not exists insideCity + (province char(10),city char(10),confirmed int(10),died int(10), + crued int(10),confirmedRelative int(10),relativeTime char(20),cityCode int(10), + primary key(city,province,relativeTime,cityCode) + )''' +cursor.execute(sql) +sql='''create table if not exists outsideCountry + ( + area char(20),relativeTime char(20), + confirmed int(10), + died int(10),crued int(10), + confirmedRelative int(10), + curConfirm int(10), + icuDisable int(10), + primary key(area,relativeTime) + ) + ''' +cursor.execute(sql) +sql='''create table if not exists allNews + (eventDescription char(200), + eventTime char(20), + eventUrl char(200), + siteName char(20), + primary key(eventDescription,eventTime,eventUrl,siteName) + ) + ''' +cursor.execute(sql) +cursor.close() +con.close() diff --git a/insideData.py b/insideData.py new file mode 100644 index 0000000..1df926e --- /dev/null +++ b/insideData.py @@ -0,0 +1,218 @@ +import requests +import re +from bs4 import BeautifulSoup +import json +import time +import pymysql + +class City: + def __init__(self,tempjs={},time='',area='',city='无数据',confirmed=0,died=0,crued=0,confirmedRelative=0,cityCode=0): + self.province=area + self.relativeTime=time + self.city=city + self.confirmed=confirmed + self.died=died + self.crued=crued + self.confirmedRelative=confirmedRelative + self.cityCode=cityCode + js={'city': '无数据', 'confirmed': 0, 'died': 0, 'crued': 0, 'confirmedRelative': 0, 'cityCode': 0} + for k,v in tempjs.items(): + if tempjs[k]!='': + js[k]=v + self.city=str(js["city"]) + self.confirmed=int(js["confirmed"]) + self.died=int(js["died"]) + self.crued=int(js["crued"]) + self.confirmedRelative=int(js["confirmedRelative"]) + self.cityCode=int(js["cityCode"]) + def get_db_tuple(self): + return (self.province,self.city,self.confirmed,self.died,self.crued,self.confirmedRelative,self.relativeTime,self.cityCode) + + def insert_sql(self,con): + sql="""insert into insideCity + (province,city,confirmed,died,crued,confirmedRelative,relativeTime,cityCode) + values(%s,%s,%s,%s,%s,%s,%s,%s) + """ + try: + con.cursor().execute(sql,self.get_db_tuple()) + con.commit() + except Exception as e: + print(e) + print('Insert insideCity failed.') + else: + print('Inserted insideCity successfully') + + def __str__(self): + if self.city=='境外输入': + return "其它情况:{:<10}城市代码:{:<4}新增患者:{:<4}累积确诊:{:<4}累积死亡:{:<4}累积治愈:{:<4}".format(self.city,self.cityCode,self.confirmedRelative, + self.confirmed,self.died,self.crued) + return "城市名:{:<10}城市代码:{:<4}新增患者:{:<4}累积确诊:{:<4}累积死亡:{:<4}累积治愈:{:<4}".format(self.city,self.cityCode,self.confirmedRelative, + self.confirmed,self.died,self.crued) + +class Provinces: + def __init__(self): + self.number=0 + self.province=[] + def addProvince(self,province): + self.province.append(province) + def printEveryProvince(self): + for p in self.province: + print(p) + def __str__(self): + return str(self.province) + + +class Province: + def __init__(self,tempjs={},confirmed=0,died=0,crued=0,relativeTime=0,confirmedRelative=0, + diedRelative=0,curedRelative=0,asymptomaticRelative=0,asymptomatic=0, + curConfirm=0,curConfirmRelative=0,icuDisable=0,area='',subList=[]): + self.died=died + self.confirmed=confirmed + self.crued=crued + self.relativeTime=relativeTime + self.confirmedRelative=confirmedRelative + self.diedRelative=diedRelative + self.curedRelative=curedRelative + self.asymptomaticRelative=asymptomaticRelative + self.asymptomatic=asymptomatic + self.curConfirm=curConfirm + self.curConfirmRelative=curConfirmRelative + self.icuDisable=icuDisable + self.area=area + self.subList=subList + js={'confirmed': 0, 'died': 0, 'crued': 0, 'relativeTime': 0, 'confirmedRelative': 0, + 'diedRelative': 0, 'curedRelative': 0, 'asymptomaticRelative': 0, 'asymptomatic': 0, 'curConfirm': 0, + 'curConfirmRelative': 0, 'icuDisable': 0, 'area': '', 'subList':[]} + for k,v in tempjs.items(): + if tempjs[k]!='': + js[k]=v + #print(js) + self.died=js['died'] + self.confirmed=js['confirmed'] + self.crued=js['crued'] + self.relativeTime=js['relativeTime'] + self.confirmedRelative=js['confirmedRelative'] + self.diedRelative=js['diedRelative'] + self.curedRelative=js['curedRelative'] + self.asymptomaticRelative=js['asymptomaticRelative'] + self.asymptomatic=js['asymptomatic'] + self.curConfirm=js['curConfirm'] + self.curConfirmRelative=js['curConfirmRelative'] + self.icuDisable=js['icuDisable'] + self.area=js['area'] + self.subList=js['subList'] + cities=js["subList"] + localobj=time.localtime(int(self.relativeTime)) + newtime=time.strftime("%Y-%m-%d %H:%M:%S",localobj) + for c in cities: + city=City(c,time=newtime,area=self.area) + subList.append(city) + + def get_db_tuple(self): + localobj=time.localtime(int(self.relativeTime)) + newtime=time.strftime("%Y-%m-%d %H:%M:%S",localobj) + return (self.died,self.confirmed,self.crued,newtime,self.confirmedRelative,self.diedRelative,self.curedRelative,self.asymptomaticRelative,self.asymptomatic,self.curConfirm,self.curConfirmRelative,self.icuDisable,self.area) + + def deleteProvince_sql(self,con): + localobj=time.localtime(int(self.relativeTime)) + cal=time.strftime("%Y-%m-%d %H:%M:%S",localobj)[0:10]+r"%" + sql=""" + delete from insideProvince + where area='%s' and relativeTime like '%s' + """%(self.area,cal) + try: + cursor=con.cursor() + cursor.execute(sql) + except Exception as e: + print(e) + print('delete deleteProvince failed.') + else: + print('delete deleteProvince successfully') + + def deleteCity_sql(self,con): + localobj=time.localtime(int(self.relativeTime)) + cal=time.strftime("%Y-%m-%d %H:%M:%S",localobj)[0:10]+r"%" + sql=""" + delete from insideCity + where province='%s' and relativeTime like '%s' + """%(self.area,cal) + try: + cursor=con.cursor() + cursor.execute(sql) + except Exception as e: + print(e) + print("delete %s'City failed."%self.area) + else: + print("delete %s'City successfully"%self.area) + + def insert_sql(self,con): + sql="""insert into insideProvince + (died,confirmed,crued,relativeTime,confirmedRelative, + diedRelative,curedRelative,asymptomaticRelative, + asymptomatic,curConfirm,curConfirmRelative,icuDisable,area) + values(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s) + """ + try: + self.deleteProvince_sql(con) + con.cursor().execute(sql,self.get_db_tuple()) + con.commit() + except Exception as e: + print(e) + print('Insert insideProvince failed.') + else: + print('Inserted insideProvince successfully') + self.deleteCity_sql(con) + for c in self.subList: + localobj=time.localtime(int(self.relativeTime)) + newtime=time.strftime("%Y-%m-%d %H:%M:%S",localobj) + city=City(c,newtime,self.area) + city.insert_sql(con) + + + def printEveryCity(self): + localobj=time.localtime(int(self.relativeTime)) + newtime=time.strftime("%Y-%m-%d %H:%M:%S",localobj) + for c in self.subList: + city=City(c,time=newtime,area=self.area) + print(city) + + def __str__(self): + localobj=time.localtime(int(self.relativeTime)) + newtime=time.strftime("%Y-%m-%d %H:%M:%S",localobj) + return "省份:{:<4}现有患者:{:<4}现有新增患者:{:<4}累积无症状感染者:{:<4}新增无症状感染者:{:<4}累积确诊:{:<6}新增确诊:{:<4}累积治愈:{:<6}新增治愈:{:<4}累积死亡:{:<6}新增死亡:{:<4}累积重症:{:<4}更新时间:{:<10}".format(self.area,self.curConfirm,self.curConfirmRelative, + self.asymptomatic,self.asymptomaticRelative,self.confirmed,self.confirmedRelative,self.crued,self.curedRelative,self.died,self.diedRelative, + self.icuDisable,newtime) + + +def getinsideData(): + res=requests.get('https://voice.baidu.com/act/newpneumonia/newpneumonia/') + res=res.text + #script type="application/json" + soup=BeautifulSoup(res,'html.parser') + tag=soup.find('script',attrs={'id':'captain-config'}) + tagstr=str(tag) + findinland=re.findall('caseList".*"caseOutsideList',tagstr)[0] + province=re.findall('\{"confirmed.*?subList":\[.*?\]\}',findinland) + provinces=Provinces() + for item in province: + js=json.loads(item) + p=Province(js) + provinces.addProvince(p) + #print(p) + #p.printEveryCity() + return provinces + +def printStart(): + provinces=getinsideData().province + for p in provinces: + print(p) + p.printEveryCity() + print("+++++++++++++++++"*6) + +def mysqlStart(con): + provinces=getinsideData().province + for p in provinces: + p.insert_sql(con) + + + \ No newline at end of file diff --git a/Di.py b/insideSummary.py similarity index 74% rename from Di.py rename to insideSummary.py index 58b5c7a..79ddf95 100644 --- a/Di.py +++ b/insideSummary.py @@ -5,6 +5,7 @@ from bs4 import BeautifulSoup import lxml import json import pymysql +import time class summary_cn: def __init__(self): @@ -36,30 +37,48 @@ class summary_cn: self.unOverseasInputCumulative,self.overseasInputRelative,self.unOverseasInputNewAdd, self.curConfirm,self.curConfirmRelative,self.icuDisable,self.mapLastUpdatedTime, datetime.datetime.now().strftime('%Y-%m-%d')) - def insert_sql(con): + + def delete_sql(self,con): + sql=""" + delete from insideSummary + """ + try: + cursor=con.cursor() + cursor.execute(sql) + except Exception as e: + print(e) + print('delete inside summary failed.') + else: + print('delete inside summary successfully') + + + def insert_sql(self,con): + self.delete_sql(con) sql=''' - insert into cn_sunmary(confirmed,died,cured,asymptomatic,asymptomaticRelative, - unconfirmed,relativeTime,confirmedRelative,unconfirmedRelative, - curedRelative,diedRelative,icu,icuRelative,overseasInput, - unOverseasInputCumulative,overseasInputRelative,unOverseasInputNewAdd, - curConfirm,curConfirmRelative,icuDisable,mapLastUpdatedTime,InsertTime) values(%d,%d,%d, - %d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%s) + insert into insideSummary( + confirmed,died,cured,asymptomatic,asymptomaticRelative, + unconfirmed,relativeTime,confirmedRelative,unconfirmedRelative, + curedRelative,diedRelative,icu,icuRelative,overseasInput, + unOverseasInputCumulative,overseasInputRelative,unOverseasInputNewAdd, + curConfirm,curConfirmRelative,icuDisable,mapLastUpdatedTime,InsertTime) + values(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s) ''' try: con.cursor().execute(sql,self.get_db_tuple()) con.commit() except Exception as e: print(e) - print('Insert failed.') + print('Insert insideSummary failed.') else: - print('Inserted successfully') + print('Inserted insideSummary successfully') + def __str__(self): string = '截止' + self.mapLastUpdatedTime + '\n' string+= "国内现有\n\t确诊:" + str(self.curConfirm) + '\t较昨日' + str(self.curConfirmRelative) string+="\n\t无症状:" + str(self.asymptomatic) + '\t较昨日' + str(self.asymptomaticRelative) string+='\n\t重症:' + str(self.icu) + '\t较昨日' + str(self.icuRelative) string+='\n\t疑似:' + str(self.unconfirmed) + '\t\t较昨日' + str(self.unconfirmedRelative) - string+='\n累计:\n\t确诊' + str(self.confirmed) + '\n\t治愈' + str(self.cured) + string+='\n累计:\n\t确诊:' + str(self.confirmed) + '\n\t治愈:' + str(self.cured) string+='\n\t死亡' + str(self.died) return string @@ -92,27 +111,14 @@ def getCnSummary(): sum.curConfirm = int(ans['curConfirm']) sum.curConfirmRelative = int(ans['curConfirmRelative']) sum.icuDisable = int(ans['icuDisable']) + ### sum.mapLastUpdatedTime = tagjs['component'][0]['mapLastUpdatedTime'] #print(sum) return sum -def getCnNews(): - b = webdriver.Chrome() - b.get("https://voice.baidu.com/act/newpneumonia/newpneumonia") - txt = b.page_source - b.quit() - soup = BeautifulSoup(txt,'html.parser') - tag1 = soup.select('div[class="Virus_1-1-284_2CVyXP"]') - tag2 = soup.select('div[class="Virus_1-1-284_TB6x3k"] > a[href]') - ans = [] - for i,j in zip(tag1,tag2): - tmp = str(i.string) - tmp+=' ' + j.get('href') - ans.append(tmp) - print(tmp) - return ans - - -if __name__ == "__main__": - getCnSummary() +def printStart(): + print(getCnSummary()) +def mysqlStart(con): + sum=getCnSummary() + sum.insert_sql(con) diff --git a/newsData.py b/newsData.py new file mode 100644 index 0000000..8980874 --- /dev/null +++ b/newsData.py @@ -0,0 +1,97 @@ +import requests +import json +from bs4 import BeautifulSoup +import re +import time +import pymysql + +#定义新闻类 +class InNews: + def __init__(self): + self.eventDescription='' + self.eventTime='' + self.eventUrl='' + self.siteName='' + self.Artical='' + + def get_db_tuple(self): + return ((self.eventDescription,self.time(),self.eventUrl,self.siteName)) + + def delete_sql(self,con): + sql=""" + delete from allNews + where eventDescription='%s' and eventTime = '%s' and eventUrl ='%s' and siteName='%s' + """%(self.eventDescription,self.time(),self.eventUrl,self.siteName) + try: + cursor=con.cursor() + cursor.execute(sql) + except Exception as e: + print(e) + print('delete news failed.') + else: + print('delete news successfully') + + def insert_sql(self,con): + self.delete_sql(con) + sql=''' + insert into allNews( + eventDescription,eventTime,eventUrl,siteName) + values(%s,%s,%s,%s) + ''' + try: + con.cursor().execute(sql,self.get_db_tuple()) + con.commit() + except Exception as e: + print(e) + print('Insert news failed.') + else: + print('Inserted news successfully') + + + def printArtical(self): + req = requests.get(self.eventUrl) + content = req.content.decode('utf-8') + contentBs = BeautifulSoup(content, 'html.parser') + tag = contentBs.findAll('span', attrs={'class': 'bjh-p'}) + artical = '' + for item in tag: + artical=artical+(' '+item.get_text())+'\n' + return artical + + def time(self): + ts = float(self.eventTime) + localt = time.localtime(ts) # ???localtime?? + timestr = time.strftime("%Y-%m-%d %H:%M:%S", localt) # ??? + return(timestr) + + def toString(self): + print('%s\n%s\n%s\n\n%s\n%s'%(self.eventDescription,self.eventUrl,self.time(),self.printArtical(),self.siteName)) + +#爬取数据函数 +def getNews(): + req = requests.get('https://opendata.baidu.com/data/inner?tn=reserved_all_res_tn&dspName=iphone&from_sf=1&dsp=iphone&resource_id=28565&alr=1&query=%E6%96%B0%E5%86%A0%E8%82%BA%E7%82%8E%E5%9B%BD%E5%A4%96%E7%96%AB%E6%83%85&cb=jsonp_1597232049116_92879') + content = req.content.decode('utf-8') + conDic = re.findall('\{.*\}',content)[0] + results = json.loads(conDic)['Result'][0]['DisplayData']['result']['items'] + + allNews=[] + for news in results: + # print (news['eventUrl']) + inNews = InNews() + inNews.eventDescription=news['eventDescription'] + inNews.eventTime=news['eventTime'] + inNews.eventUrl=news['eventUrl'] + inNews.siteName=news['siteName'] + allNews.append(inNews) + return allNews + +def printStart(): + allNews=getNews() + for news in allNews: + news.toString() + print('************'*6) + +def mysqlStart(con): + allNews=getNews() + for news in allNews: + news.insert_sql(con) diff --git a/outsideData.py b/outsideData.py new file mode 100644 index 0000000..3ddb8f0 --- /dev/null +++ b/outsideData.py @@ -0,0 +1,142 @@ +import requests +from bs4 import BeautifulSoup +import re +import json +import time +import pymysql + +class Outside: + def __init__(self): + self.confirmed = 0 + self.died = 0 + self.crued = 0 + self.relativeTime = 0 + self.confirmedRelative = 0 + #self.asymptomatic = 0 + self.curConfirm = 0 + self.icuDisable = 0 + self.area = '' + self.subList = [] + + def __str__(self): + return '地区: %s\t累计确诊:%s \t死亡: %d \t治愈: %d \t时间: %s \t新增: %s\t 现有: %d' % (self.area,self.confirmed, self.died, self.crued, self.relativeTime, self.confirmedRelative, self.curConfirm, ) + def get_db_tuple(self): + return (self.confirmed,self.died,self.crued,self.relativeTime,self.confirmedRelative,self.curConfirm,self.icuDisable,self.area) + + def delete_sql(self,con): + cal=self.relativeTime + sql=""" + delete from outsideCountry + where area='%s' and relativeTime like '%s' + """%(self.area,cal) + try: + cursor=con.cursor() + cursor.execute(sql) + except Exception as e: + print(e) + print('delete outside country failed.') + else: + print('delete outside country successfully') + + + def insert_sql(self,con): + self.delete_sql(con) + sql="""insert into outsideCountry( + confirmed,died,crued,relativeTime, + confirmedRelative,curConfirm,icuDisable,area) + values(%s,%s,%s,%s,%s,%s,%s,%s) + """ + try: + con.cursor().execute(sql,self.get_db_tuple()) + con.commit() + except Exception as e: + print(e) + print('Insert outsideCountry failed.') + else: + print('Inserted outsideCountry successfully') + +class City: + def __init__(self): + self.confirmed = 0 + self.died = 0 + self.crued = 0 + self.curConfirm = 0 + self.city = '' + + def __str__(self): + return '城市:%s\t累计确诊:%d\t死亡:%d\t治愈:%d\t现有:%d'%(self.city,self.confirmed,self.died,self.crued,self.curConfirm) + +def getOrElse(target, key): + ret = '' + if target.get(key) != None: + ret = target[key] + else: + ret = '' + return ret + +def processStr(s): + ret = [] + if s==None or s == '': + ret = "0" + else: + ret = s + return ret +# 打印信息 +def print_all_infos(infos): + for p in infos: + print(p) + for c in p.subList: + print(c) + print('+++++++++++++++++++++++++++++++++' * 3) + #爬取页面 + +def getouside(): + res=requests.get('https://voice.baidu.com/act/newpneumonia/newpneumonia') + # res = res.content.decode('utf-8') + res=res.text + soup = BeautifulSoup(res,'html.parser') + #print(soup) + tag = soup.find('script',attrs={'id':'captain-config'}) + tagstr=tag.string + tagJson=json.loads(tagstr) + alleDataInfos = tagJson['component'] + # results = re.findall('(\{"died".*?"subList".*?\})',tagstr) + # print(len(results)) + # print(results) + outsideDataInfos = alleDataInfos[0]['caseOutsideList'] + all_outside = [] + for item in outsideDataInfos: + outside = Outside() + outside.confirmed = int(item['confirmed']) + outside.died = int(item['died']) + outside.crued = int(item['crued']) + outside.relativeTime = time.strftime("%Y-%m-%d", time.localtime( int(item['relativeTime']))) + outside.confirmedRelative = int(item['confirmedRelative']) + #self.asymptomatic = 0 + outside.curConfirm = int(item['curConfirm']) + outside.area = item['area'] + sublist = item['subList'] + for cityitem in sublist: + city = City() + city.city = getOrElse(cityitem, 'city') + city.confirmed = int(processStr(getOrElse(cityitem, 'confirmed'))) + city.died = int(processStr(getOrElse(cityitem, 'died'))) + city.crued = int(processStr(getOrElse(cityitem, 'crued'))) + city.curConfirm = int(processStr(getOrElse(cityitem, 'curConfirm'))) + outside.subList.append(city) + all_outside.append(outside) + return all_outside + +def printStart(): + all_outside=(getouside()) + print_all_infos(all_outside) + +def mysqlStart(con): + all_outside=getouside() + for c in all_outside: + c.insert_sql(con) + + + + + diff --git a/outsideSummary.py b/outsideSummary.py new file mode 100644 index 0000000..42a57fc --- /dev/null +++ b/outsideSummary.py @@ -0,0 +1,103 @@ +import requests +import re +from bs4 import BeautifulSoup +import json +import pymysql +import time + +#定义类 +class summaryDataOut: + def __init__(self,js={}): + self.confirmed=0 + self.died=0 + self.curConfirm=0 + self.cured=0 + self.confirmedRelative=0 + self.curedRelative=0 + self.diedRelative=0 + self.curConfirmRelative=0 + self.time='' + + def get_db_tuple(self): + return (self.confirmed,self.died,self.curConfirm,self.cured,self.confirmedRelative,self.curedRelative,self.diedRelative,self.curConfirmRelative,self.time) + + def delete_sql(self,con): + sql=""" + delete from outsideSummary + """ + try: + cursor=con.cursor() + cursor.execute(sql) + except Exception as e: + print(e) + print('delete outside summary failed.') + else: + print('delete outside summary successfully') + + + + def insert_sql(self,con): + self.delete_sql(con) + sql="""insert into outsideSummary(confirmed,died,curConfirm,cured, + confirmedRelative,curedRelative,diedRelative,curConfirmRelative,time) + values(%s,%s,%s,%s,%s,%s,%s,%s,%s) + """ + try: + con.cursor().execute(sql,self.get_db_tuple()) + con.commit() + except Exception as e: + print(e) + print('Insert outsideSummary failed.') + else: + print('Inserted outsideSummary successfully') + + def blankNum (self,object): + return ' '*(12-len(str(object))) + + def PrintOut(self): + print("截至%s国外疫情概况:"%self.time) + print("现有确诊:%d" % self.curConfirm, end=self.blankNum(self.curConfirm)) + print("相对昨日新增:%d"%self.curConfirmRelative) + print("累计确诊:%d"%self.confirmed,end=self.blankNum(self.confirmed)) + print("相对昨日新增:%d"%self.confirmedRelative) + print("累计治愈:%d"%self.cured,end=self.blankNum(self.cured)) + print("相对昨日新增:%d"%self.curedRelative) + print("累计死亡:%d"%self.died,end=self.blankNum(self.died)) + print("相对昨日新增:%d"%self.diedRelative) + +def getoutsideSummary(): + # 获取数据 + req = requests.get ('https://voice.baidu.com/act/newpneumonia/newpneumonia/?from=osari_pc_3#tab4') + content = req.content.decode('utf-8') + soup = BeautifulSoup(content,'html.parser') + + # 过滤筛选 + tag = soup.find('script',attrs={'type':'application/json','id':'captain-config'}) + tagstr=tag.string #标签转化为字符串 + tagdic=json.loads(tagstr) #标签字符串转化为字典 + component=tagdic['component'][0] #获得疫情状况字典 + time= component['mapLastUpdatedTime'] #字典中找出时间 + result = component['summaryDataOut'] #字典中找出'component'key下的'summaryDataOut'key内容 + + # 存储 + OutData=summaryDataOut() + + OutData.confirmed=int(result['confirmed']) + OutData.confirmedRelative=int(result['confirmedRelative']) + OutData.cured=int(result['cured']) + OutData.curedRelative=int(result['curedRelative']) + OutData.died=int(result['died']) + OutData.diedRelative=int(result['diedRelative']) + OutData.curConfirm=int(result['curConfirm']) + OutData.curConfirmRelative=int(result['curConfirmRelative']) + # + OutData.time=time + return OutData + +def printStart(): + getoutsideSummary().PrintOut() + +def mysqlStart(con): + sum=getoutsideSummary() + sum.insert_sql(con) + diff --git a/printCurData.py b/printCurData.py new file mode 100644 index 0000000..7c84de4 --- /dev/null +++ b/printCurData.py @@ -0,0 +1,15 @@ +import insideData,outsideData,newsData,insideSummary,outsideSummary +print("++++++++++++++++++"+"开始打印国内疫情概况"+"++++++++++++++++++") +insideSummary.printStart() +print("\n\n\n\n\n"+"++++++++++++++++++"+"开始打印国内各省份及地级市数据"+"++++++++++++++++++") +insideData.printStart() +print("++++++++++++++++++"+"国内各省份及地级市数据打印完毕"+"++++++++++++++++++"+"\n\n\n\n\n") +print("++++++++++++++++++"+"开始打印国外疫情概况"+"++++++++++++++++++") +outsideSummary.printStart() +print("\n\n\n\n\n"+"++++++++++++++++++"+"开始打印国外各国及地区数据"+"++++++++++++++++++") +outsideData.printStart() +print("++++++++++++++++++"+"国外各国及地区数据打印完毕"+"++++++++++++++++++""\n\n\n\n\n") +print("++++++++++++++++++"+"开始打印实时新闻资讯数据"+"++++++++++++++++++") +newsData.printStart() +print("++++++++++++++++++"+"实时新闻资讯数据打印完毕"+"++++++++++++++++++""\n\n\n\n\n") +i=input("输入任意键结束") \ No newline at end of file diff --git a/国外概况class.py b/国外概况class.py deleted file mode 100644 index 9eca84b..0000000 --- a/国外概况class.py +++ /dev/null @@ -1,25 +0,0 @@ -class summaryDataOut: - confirmed=0 - died=0 - curConfirm=0 - cured=0 - confirmedRelative=0 - curedRelative=0 - diedRelative=0 - curConfirmRelative=0 - relativeTime=0 - time='' - - def blankNum (self,object): - return ' '*(12-len(str(object))) - - def PrintOut(self): - print("截至%s国外疫情概况:"%self.time) - print("现有确诊:%d" % self.curConfirm, end=self.blankNum(self.curConfirm)) - print("相对昨日新增:%d"%self.curConfirmRelative) - print("累计确诊:%d"%self.confirmed,end=self.blankNum(self.confirmed)) - print("相对昨日新增:%d"%self.confirmedRelative) - print("累计治愈:%d"%self.cured,end=self.blankNum(self.cured)) - print("相对昨日新增:%d"%self.curedRelative) - print("累计死亡:%d"%self.died,end=self.blankNum(self.died)) - print("相对昨日新增:%d"%self.diedRelative) diff --git a/国外疫情.py b/国外疫情.py deleted file mode 100644 index a7f3a18..0000000 --- a/国外疫情.py +++ /dev/null @@ -1,103 +0,0 @@ -class Outside: - def __init__(self): - self.confirmed = 0 - self.died = 0 - self.crued = 0 - self.relativeTime = 0 - self.confirmedRelative = 0 - #self.asymptomatic = 0 - self.curConfirm = 0 - self.icuDisable = 0 - self.area = '' - self.subList = [] - def __str__(self): - return '地区: %s\t累计确诊:%s \t死亡: %d \t治愈: %d \t时间: %s \t新增: %s\t 现有: %d' % (self.area, self.confirmed, self.died, self.crued, self.relativeTime, self.confirmedRelative, self.curConfirm, ) - -class City: - def __init__(self): - self.confirmed = 0 - self.died = 0 - self.crued = 0 - self.curConfirm = 0 - self.city = '' - - def __str__(self): - return '城市:%s\t累计确诊:%d\t死亡:%d\t治愈:%d\t现有:%d' % (self.city,self.confirmed,self.died,self.crued,self.curConfirm) - - - -import requests -from bs4 import BeautifulSoup -import re -import json - -def getOrElse(target, key): - ret = '' - if target.get(key) != None: - ret = target[key] - else: - ret = '' - return ret - -def processStr(s): - ret = [] - if s==None or s == '': - ret = "0" - else: - ret = s - return ret -# 打印信息 -def print_all_infos(infos): - for p in infos: - print(p) - for c in p.subList: - print(c) - print('+++++++++++++++++++++++++++++++++' * 3) - #爬取页面 -res=requests.get('https://voice.baidu.com/act/newpneumonia/newpneumonia') - - -# res = res.content.decode('utf-8') -res=res.text -soup = BeautifulSoup(res,'html.parser') - -#print(soup) - -tag = soup.find('script',attrs={'id':'captain-config'}) - -tagstr=tag.string - -tagJson=json.loads(tagstr) - -alleDataInfos = tagJson['component'] - -# results = re.findall('(\{"died".*?"subList".*?\})',tagstr) -# print(len(results)) -# print(results) -outsideDataInfos = alleDataInfos[0]['caseOutsideList'] -all_outside = [] - -for item in outsideDataInfos: - outside = Outside() - outside.confirmed = int(item['confirmed']) - outside.died = int(item['died']) - outside.crued = int(item['crued']) - outside.relativeTime = time.strftime("%Y-%m-%d ", time.localtime( int(item['relativeTime']))) - outside.confirmedRelative = int(item['confirmedRelative']) - #self.asymptomatic = 0 - outside.curConfirm = int(item['curConfirm']) - outside.area = item['area'] - - sublist = item['subList'] - for cityitem in sublist: - city = City() - city.city = getOrElse(cityitem, 'city') - city.confirmed = int(processStr(getOrElse(cityitem, 'confirmed'))) - city.died = int(processStr(getOrElse(cityitem, 'died'))) - city.crued = int(processStr(getOrElse(cityitem, 'crued'))) - city.curConfirm = int(processStr(getOrElse(cityitem, 'curConfirm'))) - outside.subList.append(city) - - all_outside.append(outside) - -print_all_infos(all_outside) \ No newline at end of file diff --git a/新闻class.py b/新闻class.py deleted file mode 100644 index d6d41e2..0000000 --- a/新闻class.py +++ /dev/null @@ -1,33 +0,0 @@ -import requests -import json -from bs4 import BeautifulSoup -import re -import time - - -class InNews: - def __init__(self): - self.eventDescription='' - self.eventTime='' - self.eventUrl='' - self.siteName='' - self.Artical='' - - def printArtical(self): - req = requests.get(self.eventUrl) - content = req.content.decode('utf-8') - contentBs = BeautifulSoup(content, 'html.parser') - tag = contentBs.findAll('span', attrs={'class': 'bjh-p'}) - artical = '' - for item in tag: - artical=artical+(' '+item.get_text())+'\n' - return artical - - def time(self): - ts = float(self.eventTime) - localt = time.localtime(ts) # 转换为localtime对象 - timestr = time.strftime("%Y-%m-%d %H:%M:%S", localt) # 格式化 - return(timestr) - - def toString(self): - print('%s\n原网:%s\n%s\n\n%s\n%s'%(self.eventDescription,self.eventUrl,self.time(),self.printArtical(),self.siteName)) diff --git a/爬新闻main.py b/爬新闻main.py deleted file mode 100644 index 189a733..0000000 --- a/爬新闻main.py +++ /dev/null @@ -1,24 +0,0 @@ -import requests -import json -from bs4 import BeautifulSoup -import re -from 新闻类 import InNews - -req = requests.get('https://opendata.baidu.com/data/inner?tn=reserved_all_res_tn&dspName=iphone&from_sf=1&dsp=iphone&resource_id=28565&alr=1&query=%E6%96%B0%E5%86%A0%E8%82%BA%E7%82%8E%E5%9B%BD%E5%A4%96%E7%96%AB%E6%83%85&cb=jsonp_1597232049116_92879') -content = req.content.decode('utf-8') -conDic = re.findall('\{.*\}',content)[0] -results = json.loads(conDic)['Result'][0]['DisplayData']['result']['items'] - -allNews=[] -for news in results: - # print (news['eventUrl']) - inNews = InNews() - inNews.eventDescription=news['eventDescription'] - inNews.eventTime=news['eventTime'] - inNews.eventUrl=news['eventUrl'] - inNews.siteName=news['siteName'] - allNews.append(inNews) - -for news in allNews: - news.toString() - print('************'*6) \ No newline at end of file diff --git a/爬概况main.py b/爬概况main.py deleted file mode 100644 index 9b6c46a..0000000 --- a/爬概况main.py +++ /dev/null @@ -1,34 +0,0 @@ -import requests -import re -from bs4 import BeautifulSoup -import json - -from 国外总数据 import summaryDataOut - -# 获取数据 -req = requests.get ('https://voice.baidu.com/act/newpneumonia/newpneumonia/?from=osari_pc_3#tab4') -content = req.content.decode('utf-8') -soup = BeautifulSoup(content,'html.parser') - -# 过滤筛选 -tag = soup.find('script',attrs={'type':'application/json','id':'captain-config'}) -tagstr=tag.string #标签转化为字符串 -tagdic=json.loads(tagstr) #标签字符串转化为字典 -component=tagdic['component'][0] #获得疫情状况字典 -time= component['mapLastUpdatedTime'] #字典中找出时间 -result = component['summaryDataOut'] #字典中找出'component'key下的'summaryDataOut'key内容 - -# 存储 -OutData=summaryDataOut() - -OutData.confirmed=int(result['confirmed']) -OutData.confirmedRelative=int(result['confirmedRelative']) -OutData.cured=int(result['cured']) -OutData.curedRelative=int(result['curedRelative']) -OutData.died=int(result['died']) -OutData.diedRelative=int(result['diedRelative']) -OutData.curConfirm=int(result['curConfirm']) -OutData.curConfirmRelative=int(result['curConfirmRelative']) -OutData.time=time - -OutData.PrintOut() \ No newline at end of file