From 81a00d68af8ea54374a1d9e670acdc8d355f78a5 Mon Sep 17 00:00:00 2001 From: egg23333 <806261011@qq.com> Date: Thu, 13 Aug 2020 23:31:59 +0800 Subject: [PATCH] merge caigy --- 国外概况class.py | 25 +++++++++++++++++++++++++ 新闻class.py | 33 +++++++++++++++++++++++++++++++++ 爬新闻main.py | 24 ++++++++++++++++++++++++ 爬概况main.py | 34 ++++++++++++++++++++++++++++++++++ 4 files changed, 116 insertions(+) create mode 100644 国外概况class.py create mode 100644 新闻class.py create mode 100644 爬新闻main.py create mode 100644 爬概况main.py diff --git a/国外概况class.py b/国外概况class.py new file mode 100644 index 0000000..9eca84b --- /dev/null +++ b/国外概况class.py @@ -0,0 +1,25 @@ +class summaryDataOut: + confirmed=0 + died=0 + curConfirm=0 + cured=0 + confirmedRelative=0 + curedRelative=0 + diedRelative=0 + curConfirmRelative=0 + relativeTime=0 + time='' + + def blankNum (self,object): + return ' '*(12-len(str(object))) + + def PrintOut(self): + print("截至%s国外疫情概况:"%self.time) + print("现有确诊:%d" % self.curConfirm, end=self.blankNum(self.curConfirm)) + print("相对昨日新增:%d"%self.curConfirmRelative) + print("累计确诊:%d"%self.confirmed,end=self.blankNum(self.confirmed)) + print("相对昨日新增:%d"%self.confirmedRelative) + print("累计治愈:%d"%self.cured,end=self.blankNum(self.cured)) + print("相对昨日新增:%d"%self.curedRelative) + print("累计死亡:%d"%self.died,end=self.blankNum(self.died)) + print("相对昨日新增:%d"%self.diedRelative) diff --git a/新闻class.py b/新闻class.py new file mode 100644 index 0000000..d6d41e2 --- /dev/null +++ b/新闻class.py @@ -0,0 +1,33 @@ +import requests +import json +from bs4 import BeautifulSoup +import re +import time + + +class InNews: + def __init__(self): + self.eventDescription='' + self.eventTime='' + self.eventUrl='' + self.siteName='' + self.Artical='' + + def printArtical(self): + req = requests.get(self.eventUrl) + content = req.content.decode('utf-8') + contentBs = BeautifulSoup(content, 'html.parser') + tag = contentBs.findAll('span', attrs={'class': 'bjh-p'}) + artical = '' + for item in tag: + artical=artical+(' '+item.get_text())+'\n' + return artical + + def time(self): + ts = float(self.eventTime) + localt = time.localtime(ts) # 转换为localtime对象 + timestr = time.strftime("%Y-%m-%d %H:%M:%S", localt) # 格式化 + return(timestr) + + def toString(self): + print('%s\n原网:%s\n%s\n\n%s\n%s'%(self.eventDescription,self.eventUrl,self.time(),self.printArtical(),self.siteName)) diff --git a/爬新闻main.py b/爬新闻main.py new file mode 100644 index 0000000..189a733 --- /dev/null +++ b/爬新闻main.py @@ -0,0 +1,24 @@ +import requests +import json +from bs4 import BeautifulSoup +import re +from 新闻类 import InNews + +req = requests.get('https://opendata.baidu.com/data/inner?tn=reserved_all_res_tn&dspName=iphone&from_sf=1&dsp=iphone&resource_id=28565&alr=1&query=%E6%96%B0%E5%86%A0%E8%82%BA%E7%82%8E%E5%9B%BD%E5%A4%96%E7%96%AB%E6%83%85&cb=jsonp_1597232049116_92879') +content = req.content.decode('utf-8') +conDic = re.findall('\{.*\}',content)[0] +results = json.loads(conDic)['Result'][0]['DisplayData']['result']['items'] + +allNews=[] +for news in results: + # print (news['eventUrl']) + inNews = InNews() + inNews.eventDescription=news['eventDescription'] + inNews.eventTime=news['eventTime'] + inNews.eventUrl=news['eventUrl'] + inNews.siteName=news['siteName'] + allNews.append(inNews) + +for news in allNews: + news.toString() + print('************'*6) \ No newline at end of file diff --git a/爬概况main.py b/爬概况main.py new file mode 100644 index 0000000..9b6c46a --- /dev/null +++ b/爬概况main.py @@ -0,0 +1,34 @@ +import requests +import re +from bs4 import BeautifulSoup +import json + +from 国外总数据 import summaryDataOut + +# 获取数据 +req = requests.get ('https://voice.baidu.com/act/newpneumonia/newpneumonia/?from=osari_pc_3#tab4') +content = req.content.decode('utf-8') +soup = BeautifulSoup(content,'html.parser') + +# 过滤筛选 +tag = soup.find('script',attrs={'type':'application/json','id':'captain-config'}) +tagstr=tag.string #标签转化为字符串 +tagdic=json.loads(tagstr) #标签字符串转化为字典 +component=tagdic['component'][0] #获得疫情状况字典 +time= component['mapLastUpdatedTime'] #字典中找出时间 +result = component['summaryDataOut'] #字典中找出'component'key下的'summaryDataOut'key内容 + +# 存储 +OutData=summaryDataOut() + +OutData.confirmed=int(result['confirmed']) +OutData.confirmedRelative=int(result['confirmedRelative']) +OutData.cured=int(result['cured']) +OutData.curedRelative=int(result['curedRelative']) +OutData.died=int(result['died']) +OutData.diedRelative=int(result['diedRelative']) +OutData.curConfirm=int(result['curConfirm']) +OutData.curConfirmRelative=int(result['curConfirmRelative']) +OutData.time=time + +OutData.PrintOut() \ No newline at end of file