From f882423518a165d8e06911765ed452c4504aa873 Mon Sep 17 00:00:00 2001 From: unknown <919497931@qq.com> Date: Sun, 16 Aug 2020 23:52:01 +0800 Subject: [PATCH] =?UTF-8?q?=E6=8F=90=E5=8F=96=E5=AE=9E=E6=97=B6=E6=96=B0?= =?UTF-8?q?=E9=97=BB=E6=95=B0=E6=8D=AE=E4=BB=A3=E7=A0=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- 提取实时新闻数据.html | 13802 +++++++++++++++++++++++++++++++ 提取实时新闻数据.ipynb | 669 ++ 2 files changed, 14471 insertions(+) create mode 100644 提取实时新闻数据.html create mode 100644 提取实时新闻数据.ipynb diff --git a/提取实时新闻数据.html b/提取实时新闻数据.html new file mode 100644 index 0000000..cd88483 --- /dev/null +++ b/提取实时新闻数据.html @@ -0,0 +1,13802 @@ + + + + +20200813学生版 + + + + + + + + + + + + + + + + + + + + + + +
+
+ +
+
+
+

获取国内疫情实时滚动新闻播报数据

+
+
+
+
+
+
In [33]:
+
+
+
import requests
+from bs4 import BeautifulSoup
+import re 
+import json 
+# 定义实时国内新闻类
+class insideNews: 
+    def __init__(self): 
+        insidenews.eventTime = ''
+        self.eventDescription = ''  
+        self.siteName = '' 
+        self.eventUrl = '' 
+        self.homepageUrl = ''
+        self.item_avatar = ''
+        
+    def __str__(self): 
+        return 'eventTime: %s, eventDescription: %s, siteName: %s, eventUrl: %s, homepageUrl: %s, item_avatar: %s' % (self.eventTime, self.eventDescription, self.siteName, self.eventUrl, self.homepageUrl, self.item_avatar)
+
+    
+response = requests.get('https://opendata.baidu.com/data/inner?tn=reserved_all_res_tn&dspName=iphone&from_sf=1&dsp=iphone&resource_id=28565&alr=1&query=%E8%82%BA%E7%82%8E&cb=jsonp_1597231600668_30527&qq-pf-to=pcqq.group') #发送get请求,获得目标服务器相应 
+html = response.content.decode('unicode-escape')# 解码 
+soup = BeautifulSoup(html)# 构建soup对象 
+tag = soup.find('p')# 查找指定标签
+tagstr = tag.string# 转换成字符串
+results = re.findall(r'(\{"bjh_na".*?"eventDescription":.*?\})', tagstr)# 使用正则表达式查找所有的实时新闻
+
+all_insidenews = [] 
+for item in results:
+    insidenews = insideNews() 
+    print(item)
+    itemjson = json.loads(item)      
+    insidenews.eventTime = itemjson['eventTime']
+    insidenews.eventDescription = itemjson['eventDescription'] 
+    insidenews.siteName = itemjson['siteName']   
+    insidenews.eventUrl = itemjson['eventUrl'] 
+    insidenews.homepageUrl = itemjson['homepageUrl'] 
+    insidenews.item_avatar = itemjson['item_avatar'] 
+    
+    all_insidenews.append(insidenews) 
+    
+for insidenews in all_insidenews: 
+    print(insidenews) 
+    print('+++++' * 10)
+
+ +
+
+
+ +
+
+ + +
+ +
+ + +
+
{"bjh_na":{"easyBrowse":"1","easyBrowseConfirm":"1","nid":"10193162063076338121","thread_id":"1125000033495772","type":"news"},"eventDescription":"北京连续6日零新增","eventTime":"1597278720","eventUrl":"http:\/\/baijiahao.baidu.com\/s?id=1674868131526596687","homepageUrl":"https:\/\/baijiahao.baidu.com\/u?app_id=1601149438053974","item_avatar":"http:\/\/pic.rmb.bdstatic.com\/814a110b34d52c106f9ba99c5d415478.jpeg","siteName":"北京日报客户端"}
+{"bjh_na":{"easyBrowse":"1","easyBrowseConfirm":"1","nid":"9729390377660864175","thread_id":"1062000033493672","type":"news"},"eventDescription":"国家卫健委:昨日新增确诊病例19例 本土病例8例","eventTime":"1597278262","eventUrl":"http:\/\/baijiahao.baidu.com\/s?id=1674867651765148223","homepageUrl":"https:\/\/baijiahao.baidu.com\/u?app_id=1551968238585112","item_avatar":"http:\/\/pic.rmb.bdstatic.com\/07feda26036edb976980ce1e803de532.jpeg@c_1,w_640,h_640,x_0,y_0","siteName":"环球时报"}
+{"bjh_na":{"easyBrowse":"1","easyBrowseConfirm":"1","nid":"9766033389288531073","thread_id":"1035000033486933","type":"news"},"eventDescription":"新疆(含兵团)新增新冠肺炎确诊病例8例 均在乌鲁木齐市","eventTime":"1597277648","eventUrl":"http:\/\/baijiahao.baidu.com\/s?id=1674867007109286633","homepageUrl":"https:\/\/baijiahao.baidu.com\/u?app_id=1549608413453462","item_avatar":"http:\/\/pic.rmb.bdstatic.com\/89e9e167d806065171363ecbcb708a41.png@c_1,w_812,h_812,x_63,y_39","siteName":"环球网"}
+{"bjh_na":{"easyBrowse":"1","easyBrowseConfirm":"1","nid":"9382499071856256847","thread_id":"1027000033493454","type":"news"},"eventDescription":"上海昨日无新增本地新冠肺炎确诊病例,新增境外输入2例","eventTime":"1597276086","eventUrl":"http:\/\/baijiahao.baidu.com\/s?id=1674865718710262411","homepageUrl":"https:\/\/baijiahao.baidu.com\/u?app_id=1549608413453462","item_avatar":"http:\/\/pic.rmb.bdstatic.com\/89e9e167d806065171363ecbcb708a41.png@c_1,w_812,h_812,x_63,y_39","siteName":"环球网"}
+{"bjh_na":{"easyBrowse":"1","easyBrowseConfirm":"1","nid":"9691932825381303415","thread_id":"1001000033461176","type":"news"},"eventDescription":"一境外输入确诊患者治愈数月后复阳 目前在上海隔离诊治","eventTime":"1597235700","eventUrl":"http:\/\/baijiahao.baidu.com\/s?id=1674823034158688355","homepageUrl":"https:\/\/baijiahao.baidu.com\/u?app_id=1566453612428800","item_avatar":"http:\/\/pic.rmb.bdstatic.com\/9da74a517eb1befeba93a5f3167cc74b.jpeg","siteName":"新京报"}
+{"bjh_na":{"easyBrowse":"1","easyBrowseConfirm":"1","nid":"9964760177505552343","thread_id":"1011000033456808","type":"news"},"eventDescription":"香港新增62例确诊病例 连续10天少于100例","eventTime":"1597221655","eventUrl":"http:\/\/baijiahao.baidu.com\/s?id=1674808294135893351","homepageUrl":"https:\/\/baijiahao.baidu.com\/u?app_id=1552864910655429","item_avatar":"http:\/\/pic.rmb.bdstatic.com\/d2f919e031c47d671a5748b5aeafe096.jpeg","siteName":"人民日报海外网"}
+{"bjh_na":{"easyBrowse":"1","easyBrowseConfirm":"1","nid":"9288792493272135235","thread_id":"1059000033444318","type":"news"},"eventDescription":"安徽芜湖一餐饮店进口冻虾新冠病毒检出疑似阳性","eventTime":"1597217665","eventUrl":"http:\/\/baijiahao.baidu.com\/s?id=1674804105747106594","homepageUrl":"http:\/\/baijiahao.baidu.com\/u?app_id=1570168240515616","item_avatar":"http:\/\/pic.rmb.bdstatic.com\/0026e9a191787761348be081d428a354.jpeg","siteName":"央视新闻"}
+{"bjh_na":{"easyBrowse":"1","easyBrowseConfirm":"1","nid":"10446745964617569625","thread_id":"1051000033431472","type":"news"},"eventDescription":"国家卫健委:昨日新增确诊25例,其中本土病例9例(均在新疆)","eventTime":"1597193291","eventUrl":"http:\/\/baijiahao.baidu.com\/s?id=1674772359564935639","homepageUrl":"https:\/\/baijiahao.baidu.com\/u?app_id=1561825967470087","item_avatar":"http:\/\/pic.rmb.bdstatic.com\/bjh\/user\/b5b87556f7efcdf3c67fb0fd355dcaac.jpeg","siteName":"红星新闻"}
+{"bjh_na":{"easyBrowse":"1","easyBrowseConfirm":"1","nid":"10156202482208840230","thread_id":"1113000033424506","type":"news"},"eventDescription":"北京连续5天无新增确诊病例","eventTime":"1597192410","eventUrl":"http:\/\/baijiahao.baidu.com\/s?id=1674777629341250609","homepageUrl":"http:\/\/baijiahao.baidu.com\/u?app_id=1601149438053974","item_avatar":"http:\/\/pic.rmb.bdstatic.com\/73c41abfdd401cc72e04d64b3ce0103d.jpeg","siteName":"北京日报客户端"}
+{"bjh_na":{"easyBrowse":"1","easyBrowseConfirm":"1","nid":"10445989328771260362","thread_id":"1038000033429039","type":"news"},"eventDescription":"广东新增境外输入确诊病例6例:广州报告1例、珠海5例","eventTime":"1597192142","eventUrl":"http:\/\/baijiahao.baidu.com\/s?id=1674777366221789944","homepageUrl":"https:\/\/baijiahao.baidu.com\/u?app_id=1561825967470087","item_avatar":"http:\/\/pic.rmb.bdstatic.com\/bjh\/user\/b5b87556f7efcdf3c67fb0fd355dcaac.jpeg","siteName":"红星新闻"}
+{"bjh_na":{"easyBrowse":"1","easyBrowseConfirm":"1","nid":"10225492811636488031","thread_id":"1005000033424867","type":"news"},"eventDescription":"新疆报告新增新冠肺炎确诊病例9例,新增无症状感染者8例","eventTime":"1597191705","eventUrl":"http:\/\/baijiahao.baidu.com\/s?id=1674776890002663976","homepageUrl":"https:\/\/baijiahao.baidu.com\/u?app_id=1549608413453462","item_avatar":"http:\/\/pic.rmb.bdstatic.com\/89e9e167d806065171363ecbcb708a41.png@c_1,w_812,h_812,x_63,y_39","siteName":"环球网"}
+{"bjh_na":{"easyBrowse":"1","easyBrowseConfirm":"1","nid":"10687708877099983703","thread_id":"1122000033425687","type":"news"},"eventDescription":"上海新增境外输入4例,已追踪同航班密接者106人","eventTime":"1597190188","eventUrl":"http:\/\/baijiahao.baidu.com\/s?id=1674775298876647449","homepageUrl":"https:\/\/baijiahao.baidu.com\/u?app_id=1601149438053974","item_avatar":"http:\/\/pic.rmb.bdstatic.com\/814a110b34d52c106f9ba99c5d415478.jpeg","siteName":"北京日报客户端"}
+{"bjh_na":{"easyBrowse":"1","easyBrowseConfirm":"1","nid":"9974943368943181587","thread_id":"1088000033375829","type":"news"},"eventDescription":"新疆昨日新增确诊13例,新增无症状感染者11例,均在乌鲁木齐市","eventTime":"1597105476","eventUrl":"http:\/\/baijiahao.baidu.com\/s?id=1674686471966488007","homepageUrl":"https:\/\/baijiahao.baidu.com\/u?app_id=1561825967470087","item_avatar":"http:\/\/pic.rmb.bdstatic.com\/bjh\/user\/b5b87556f7efcdf3c67fb0fd355dcaac.jpeg","siteName":"红星新闻"}
+{"bjh_na":{"easyBrowse":"1","easyBrowseConfirm":"1","nid":"9619452370238472138","thread_id":"1007000033365024","type":"news"},"eventDescription":"昨天上海新增8例境外输入病例:均为中国籍,在阿联酋工作","eventTime":"1597103162","eventUrl":"http:\/\/baijiahao.baidu.com\/s?id=1674684045559410432","homepageUrl":"https:\/\/baijiahao.baidu.com\/u?app_id=1561825967470087","item_avatar":"http:\/\/pic.rmb.bdstatic.com\/bjh\/user\/b5b87556f7efcdf3c67fb0fd355dcaac.jpeg","siteName":"红星新闻"}
+{"bjh_na":{"easyBrowse":"1","easyBrowseConfirm":"1","nid":"9507350360885120833","thread_id":"1063000033345790","type":"news"},"eventDescription":"辽宁省新增2例境外输入确诊病例,分别来自美国和菲律宾","eventTime":"1597057109","eventUrl":"http:\/\/baijiahao.baidu.com\/s?id=1674636151291490440","homepageUrl":"http:\/\/baijiahao.baidu.com\/u?app_id=1549608413453462","item_avatar":"http:\/\/timg01.bdimg.com\/timg?pacompress=&imgtype=0&sec=1439619614&di=3fb711aa6bf5a0beda833cb57989fa42&quality=90&size=b870_10000&src=http%3A%2F%2Fpic.rmb.bdstatic.com%2F2f338c67bc166c145cd457c01e102de5.jpeg","siteName":"环球网"}
+{"bjh_na":{"easyBrowse":"1","easyBrowseConfirm":"1","nid":"10041585112433645986","thread_id":"1009000033336073","type":"news"},"eventDescription":"香港新增69例确诊病例 连续8天少于100例","eventTime":"1597047600","eventUrl":"http:\/\/baijiahao.baidu.com\/s?id=1674626913670664857","homepageUrl":"https:\/\/baijiahao.baidu.com\/u?app_id=1552864910655429","item_avatar":"http:\/\/pic.rmb.bdstatic.com\/d2f919e031c47d671a5748b5aeafe096.jpeg","siteName":"人民日报海外网"}
+{"bjh_na":{"easyBrowse":"1","easyBrowseConfirm":"1","nid":"9094183316011565694","thread_id":"1067000033325987","type":"news"},"eventDescription":"韩国境外输入性新冠病例中检出3例病毒变异","eventTime":"1597045994","eventUrl":"http:\/\/baijiahao.baidu.com\/s?id=1674624304511109399","homepageUrl":"https:\/\/baijiahao.baidu.com\/u?app_id=1565176801499628","item_avatar":"http:\/\/pic.rmb.bdstatic.com\/8963c44fe9bb2de078a778f71a32f6eb.jpeg","siteName":"界面新闻"}
+{"bjh_na":{"easyBrowse":"1","easyBrowseConfirm":"1","nid":"10678170579397374006","thread_id":"1091000033323462","type":"news"},"eventDescription":"新疆新增新冠确诊病例14例,无症状感染者7例,均在乌鲁木齐市","eventTime":"1597019819","eventUrl":"http:\/\/baijiahao.baidu.com\/s?id=1674596654587658867","homepageUrl":"https:\/\/baijiahao.baidu.com\/u?app_id=1561825967470087","item_avatar":"http:\/\/pic.rmb.bdstatic.com\/bjh\/user\/b5b87556f7efcdf3c67fb0fd355dcaac.jpeg","siteName":"红星新闻"}
+{"bjh_na":{"easyBrowse":"1","easyBrowseConfirm":"1","nid":"10594182283454069402","thread_id":"1117000033305820","type":"news"},"eventDescription":"北京连续三日零新增!","eventTime":"1597019546","eventUrl":"http:\/\/baijiahao.baidu.com\/s?id=1674596367795749792","homepageUrl":"https:\/\/baijiahao.baidu.com\/u?app_id=1601149438053974","item_avatar":"http:\/\/pic.rmb.bdstatic.com\/814a110b34d52c106f9ba99c5d415478.jpeg","siteName":"北京日报客户端"}
+{"bjh_na":{"easyBrowse":"1","easyBrowseConfirm":"1","nid":"9838778629077913962","thread_id":"1022000033302920","type":"news"},"eventDescription":"广东新增境外输入3例,入境后即被隔离观察","eventTime":"1597018528","eventUrl":"http:\/\/baijiahao.baidu.com\/s?id=1674595300038757825","homepageUrl":"https:\/\/baijiahao.baidu.com\/u?app_id=1601149438053974","item_avatar":"http:\/\/pic.rmb.bdstatic.com\/814a110b34d52c106f9ba99c5d415478.jpeg","siteName":"北京日报客户端"}
+{"bjh_na":{"easyBrowse":"1","easyBrowseConfirm":"1","nid":"9564560403354013811","thread_id":"1018000033303382","type":"news"},"eventDescription":"上海昨日无新增本地新冠肺炎确诊病例,新增境外输入18例","eventTime":"1597016945","eventUrl":"http:\/\/baijiahao.baidu.com\/s?id=1674593640123663447","homepageUrl":"https:\/\/baijiahao.baidu.com\/u?app_id=1549608413453462","item_avatar":"http:\/\/pic.rmb.bdstatic.com\/89e9e167d806065171363ecbcb708a41.png@c_1,w_812,h_812,x_63,y_39","siteName":"环球网"}
+{"bjh_na":{"easyBrowse":"1","easyBrowseConfirm":"1","nid":"10189864705033416741","thread_id":"1051000033311081","type":"news"},"eventDescription":"重庆昨日新增无症状感染者1例,为新加坡输入","eventTime":"1597015727","eventUrl":"http:\/\/baijiahao.baidu.com\/s?id=1674592612999522909","homepageUrl":"https:\/\/baijiahao.baidu.com\/u?app_id=1549608413453462","item_avatar":"http:\/\/pic.rmb.bdstatic.com\/89e9e167d806065171363ecbcb708a41.png@c_1,w_812,h_812,x_63,y_39","siteName":"环球网"}
+{"bjh_na":{"easyBrowse":"1","easyBrowseConfirm":"1","nid":"9484762286110882343","thread_id":"1031000033318117","type":"news"},"eventDescription":"一文读懂全球疫情:全球累计确诊超1999万例 美国暴发沙门氏菌疫情","eventTime":"1597013216","eventUrl":"http:\/\/baijiahao.baidu.com\/s?id=1674590351892834223","homepageUrl":"https:\/\/baijiahao.baidu.com\/u?app_id=1552864910655429","item_avatar":"http:\/\/pic.rmb.bdstatic.com\/d2f919e031c47d671a5748b5aeafe096.jpeg","siteName":"人民日报海外网"}
+{"bjh_na":{"easyBrowse":"1","easyBrowseConfirm":"1","nid":"10031387837680691121","thread_id":"1047000033289493","type":"news"},"eventDescription":"香港新增72例新冠确诊病例,连续7日少于100例","eventTime":"1596962023","eventUrl":"http:\/\/baijiahao.baidu.com\/s?id=1674536151578789920","homepageUrl":"https:\/\/baijiahao.baidu.com\/u?app_id=1565176801499628","item_avatar":"http:\/\/pic.rmb.bdstatic.com\/8963c44fe9bb2de078a778f71a32f6eb.jpeg","siteName":"界面新闻"}
+{"bjh_na":{"easyBrowse":"1","easyBrowseConfirm":"1","nid":"8870194084231278988","thread_id":"1032000033266987","type":"news"},"eventDescription":"新疆新增确诊病例15例,均在乌鲁木齐","eventTime":"1596933298","eventUrl":"http:\/\/baijiahao.baidu.com\/s?id=1674505943705180172","homepageUrl":"https:\/\/baijiahao.baidu.com\/u?app_id=1566453612428800","item_avatar":"http:\/\/pic.rmb.bdstatic.com\/9da74a517eb1befeba93a5f3167cc74b.jpeg","siteName":"新京报"}
+{"bjh_na":{"easyBrowse":"1","easyBrowseConfirm":"1","nid":"9364628799509405044","thread_id":"1122000033252294","type":"news"},"eventDescription":"31省区市新增确诊病例23例,其中本土病例15例(均在新疆)","eventTime":"1596930618","eventUrl":"http:\/\/baijiahao.baidu.com\/s?id=1674503120087258676","homepageUrl":"https:\/\/baijiahao.baidu.com\/u?app_id=1589908044211516","item_avatar":"http:\/\/pic.rmb.bdstatic.com\/ec0438088a2c4589fb541fbc12b448f0.jpeg","siteName":"健康中国"}
+{"bjh_na":{"easyBrowse":"1","easyBrowseConfirm":"1","nid":"9965173851761410585","thread_id":"1002000033232184","type":"news"},"eventDescription":"香港新增69例新冠确诊病例,累计超过4000例","eventTime":"1596875762","eventUrl":"http:\/\/baijiahao.baidu.com\/s?id=1674445865981756049","homepageUrl":"https:\/\/baijiahao.baidu.com\/u?app_id=1565176801499628","item_avatar":"http:\/\/pic.rmb.bdstatic.com\/8963c44fe9bb2de078a778f71a32f6eb.jpeg","siteName":"界面新闻"}
+{"bjh_na":{"easyBrowse":"1","easyBrowseConfirm":"1","nid":"10269292514474689323","thread_id":"1005000033194943","type":"news"},"eventDescription":"截至8月7日24时新型冠状病毒肺炎疫情最新情况","eventTime":"1596848726","eventUrl":"http:\/\/baijiahao.baidu.com\/s?id=1674417127284939659","homepageUrl":"https:\/\/baijiahao.baidu.com\/u?app_id=1589908044211516","item_avatar":"http:\/\/pic.rmb.bdstatic.com\/ec0438088a2c4589fb541fbc12b448f0.jpeg","siteName":"健康中国"}
+{"bjh_na":{"easyBrowse":"1","easyBrowseConfirm":"1","nid":"9285567187511393140","thread_id":"1087000033199166","type":"news"},"eventDescription":"北京8月7日无新增报告新冠肺炎确诊病例","eventTime":"1596846935","eventUrl":"http:\/\/baijiahao.baidu.com\/s?id=1674415443327545029","homepageUrl":"https:\/\/baijiahao.baidu.com\/u?app_id=1636203306265037","item_avatar":"http:\/\/pic.rmb.bdstatic.com\/10c0099fe5c2ef55fbd758a03a6ecafa.jpeg@c_1,w_300,h_300,x_0,y_0","siteName":"青瞳视角"}
+{"bjh_na":{"easyBrowse":"1","easyBrowseConfirm":"1","nid":"9141704573616219966","thread_id":"1055000033207982","type":"news"},"eventDescription":"新疆新增确诊病例25例、无症状感染者8例","eventTime":"1596846320","eventUrl":"http:\/\/baijiahao.baidu.com\/s?id=1674414780683115272","homepageUrl":"https:\/\/baijiahao.baidu.com\/u?app_id=1566453612428800","item_avatar":"http:\/\/pic.rmb.bdstatic.com\/9da74a517eb1befeba93a5f3167cc74b.jpeg","siteName":"新京报"}
+{"bjh_na":{"easyBrowse":"1","easyBrowseConfirm":"1","nid":"8385504052586009526","thread_id":"1049000033197879","type":"news"},"eventDescription":"辽宁连续两日无新增本土确诊病例 新增治愈出院3例","eventTime":"1596844828","eventUrl":"http:\/\/baijiahao.baidu.com\/s?id=1674413542605070060","homepageUrl":"https:\/\/baijiahao.baidu.com\/u?app_id=1549608413453462","item_avatar":"http:\/\/pic.rmb.bdstatic.com\/89e9e167d806065171363ecbcb708a41.png@c_1,w_812,h_812,x_63,y_39","siteName":"环球网"}
+{"bjh_na":{"easyBrowse":"1","easyBrowseConfirm":"1","nid":"10340470323184094852","thread_id":"1006000033182361","type":"news"},"eventDescription":"新疆目前疫情形势如何?本轮疫情有何特点?最新解读来了","eventTime":"1596812732","eventUrl":"http:\/\/baijiahao.baidu.com\/s?id=1674379507756801699","homepageUrl":"https:\/\/baijiahao.baidu.com\/u?app_id=1601149438053974","item_avatar":"http:\/\/pic.rmb.bdstatic.com\/814a110b34d52c106f9ba99c5d415478.jpeg","siteName":"北京日报客户端"}
+{"bjh_na":{"easyBrowse":"1","easyBrowseConfirm":"1","nid":"9212218199886086839","thread_id":"1073000033176131","type":"news"},"eventDescription":"对话地坛医院专家陈志海:半年抗疫 “新冠”刷新认知","eventTime":"1596800212","eventUrl":"http:\/\/baijiahao.baidu.com\/s?id=1674366411525373651","homepageUrl":"https:\/\/baijiahao.baidu.com\/u?app_id=1566453612428800","item_avatar":"http:\/\/pic.rmb.bdstatic.com\/9da74a517eb1befeba93a5f3167cc74b.jpeg","siteName":"新京报"}
+{"bjh_na":{"easyBrowse":"1","easyBrowseConfirm":"1","nid":"9522441535993403492","thread_id":"1112000033153621","type":"news"},"eventDescription":"林郑月娥:港府将免费为全港市民进行新冠病毒检测","eventTime":"1596785041","eventUrl":"http:\/\/baijiahao.baidu.com\/s?id=1674350469425444439","homepageUrl":"https:\/\/baijiahao.baidu.com\/u?app_id=1552864910655429","item_avatar":"http:\/\/pic.rmb.bdstatic.com\/d2f919e031c47d671a5748b5aeafe096.jpeg","siteName":"人民日报海外网"}
+{"bjh_na":{"easyBrowse":"1","easyBrowseConfirm":"1","nid":"8873266033734356254","thread_id":"1040000033143344","type":"news"},"eventDescription":"北京新增大连疫情关联病例1例,新发地疫情病例清零","eventTime":"1596760416","eventUrl":"http:\/\/baijiahao.baidu.com\/s?id=1674324650362213371","homepageUrl":"https:\/\/baijiahao.baidu.com\/u?app_id=1601149438053974","item_avatar":"http:\/\/pic.rmb.bdstatic.com\/814a110b34d52c106f9ba99c5d415478.jpeg","siteName":"北京日报客户端"}
+{"bjh_na":{"easyBrowse":"1","easyBrowseConfirm":"1","nid":"9716904130394627526","thread_id":"1027000033142735","type":"news"},"eventDescription":"31省市区新增确诊37例,其中本土病例27例(新疆26例 北京1例)","eventTime":"1596760201","eventUrl":"http:\/\/baijiahao.baidu.com\/s?id=1674324373876029599","homepageUrl":"https:\/\/baijiahao.baidu.com\/u?app_id=1589908044211516","item_avatar":"http:\/\/pic.rmb.bdstatic.com\/ec0438088a2c4589fb541fbc12b448f0.jpeg","siteName":"健康中国"}
+{"bjh_na":{"easyBrowse":"1","easyBrowseConfirm":"1","nid":"9549964897517921046","thread_id":"1033000033139415","type":"news"},"eventDescription":"新疆(含兵团)新增确诊病例26例,新增无症状感染者10例","eventTime":"1596759965","eventUrl":"http:\/\/baijiahao.baidu.com\/s?id=1674324177292404207","homepageUrl":"https:\/\/baijiahao.baidu.com\/u?app_id=1561825967470087","item_avatar":"http:\/\/pic.rmb.bdstatic.com\/bjh\/user\/b5b87556f7efcdf3c67fb0fd355dcaac.jpeg","siteName":"红星新闻"}
+eventTime: , eventDescription: 北京连续6日零新增, siteName: 北京日报客户端, eventUrl: http://baijiahao.baidu.com/s?id=1674868131526596687, homepageUrl: https://baijiahao.baidu.com/u?app_id=1601149438053974, item_avatar: http://pic.rmb.bdstatic.com/814a110b34d52c106f9ba99c5d415478.jpeg
+++++++++++++++++++++++++++++++++++++++++++++++++++
+eventTime: , eventDescription: 国家卫健委:昨日新增确诊病例19例 本土病例8例, siteName: 环球时报, eventUrl: http://baijiahao.baidu.com/s?id=1674867651765148223, homepageUrl: https://baijiahao.baidu.com/u?app_id=1551968238585112, item_avatar: http://pic.rmb.bdstatic.com/07feda26036edb976980ce1e803de532.jpeg@c_1,w_640,h_640,x_0,y_0
+++++++++++++++++++++++++++++++++++++++++++++++++++
+eventTime: , eventDescription: 新疆(含兵团)新增新冠肺炎确诊病例8例 均在乌鲁木齐市, siteName: 环球网, eventUrl: http://baijiahao.baidu.com/s?id=1674867007109286633, homepageUrl: https://baijiahao.baidu.com/u?app_id=1549608413453462, item_avatar: http://pic.rmb.bdstatic.com/89e9e167d806065171363ecbcb708a41.png@c_1,w_812,h_812,x_63,y_39
+++++++++++++++++++++++++++++++++++++++++++++++++++
+eventTime: , eventDescription: 上海昨日无新增本地新冠肺炎确诊病例,新增境外输入2例, siteName: 环球网, eventUrl: http://baijiahao.baidu.com/s?id=1674865718710262411, homepageUrl: https://baijiahao.baidu.com/u?app_id=1549608413453462, item_avatar: http://pic.rmb.bdstatic.com/89e9e167d806065171363ecbcb708a41.png@c_1,w_812,h_812,x_63,y_39
+++++++++++++++++++++++++++++++++++++++++++++++++++
+eventTime: , eventDescription: 一境外输入确诊患者治愈数月后复阳 目前在上海隔离诊治, siteName: 新京报, eventUrl: http://baijiahao.baidu.com/s?id=1674823034158688355, homepageUrl: https://baijiahao.baidu.com/u?app_id=1566453612428800, item_avatar: http://pic.rmb.bdstatic.com/9da74a517eb1befeba93a5f3167cc74b.jpeg
+++++++++++++++++++++++++++++++++++++++++++++++++++
+eventTime: , eventDescription: 香港新增62例确诊病例 连续10天少于100例, siteName: 人民日报海外网, eventUrl: http://baijiahao.baidu.com/s?id=1674808294135893351, homepageUrl: https://baijiahao.baidu.com/u?app_id=1552864910655429, item_avatar: http://pic.rmb.bdstatic.com/d2f919e031c47d671a5748b5aeafe096.jpeg
+++++++++++++++++++++++++++++++++++++++++++++++++++
+eventTime: , eventDescription: 安徽芜湖一餐饮店进口冻虾新冠病毒检出疑似阳性, siteName: 央视新闻, eventUrl: http://baijiahao.baidu.com/s?id=1674804105747106594, homepageUrl: http://baijiahao.baidu.com/u?app_id=1570168240515616, item_avatar: http://pic.rmb.bdstatic.com/0026e9a191787761348be081d428a354.jpeg
+++++++++++++++++++++++++++++++++++++++++++++++++++
+eventTime: , eventDescription: 国家卫健委:昨日新增确诊25例,其中本土病例9例(均在新疆), siteName: 红星新闻, eventUrl: http://baijiahao.baidu.com/s?id=1674772359564935639, homepageUrl: https://baijiahao.baidu.com/u?app_id=1561825967470087, item_avatar: http://pic.rmb.bdstatic.com/bjh/user/b5b87556f7efcdf3c67fb0fd355dcaac.jpeg
+++++++++++++++++++++++++++++++++++++++++++++++++++
+eventTime: , eventDescription: 北京连续5天无新增确诊病例, siteName: 北京日报客户端, eventUrl: http://baijiahao.baidu.com/s?id=1674777629341250609, homepageUrl: http://baijiahao.baidu.com/u?app_id=1601149438053974, item_avatar: http://pic.rmb.bdstatic.com/73c41abfdd401cc72e04d64b3ce0103d.jpeg
+++++++++++++++++++++++++++++++++++++++++++++++++++
+eventTime: , eventDescription: 广东新增境外输入确诊病例6例:广州报告1例、珠海5例, siteName: 红星新闻, eventUrl: http://baijiahao.baidu.com/s?id=1674777366221789944, homepageUrl: https://baijiahao.baidu.com/u?app_id=1561825967470087, item_avatar: http://pic.rmb.bdstatic.com/bjh/user/b5b87556f7efcdf3c67fb0fd355dcaac.jpeg
+++++++++++++++++++++++++++++++++++++++++++++++++++
+eventTime: , eventDescription: 新疆报告新增新冠肺炎确诊病例9例,新增无症状感染者8例, siteName: 环球网, eventUrl: http://baijiahao.baidu.com/s?id=1674776890002663976, homepageUrl: https://baijiahao.baidu.com/u?app_id=1549608413453462, item_avatar: http://pic.rmb.bdstatic.com/89e9e167d806065171363ecbcb708a41.png@c_1,w_812,h_812,x_63,y_39
+++++++++++++++++++++++++++++++++++++++++++++++++++
+eventTime: , eventDescription: 上海新增境外输入4例,已追踪同航班密接者106人, siteName: 北京日报客户端, eventUrl: http://baijiahao.baidu.com/s?id=1674775298876647449, homepageUrl: https://baijiahao.baidu.com/u?app_id=1601149438053974, item_avatar: http://pic.rmb.bdstatic.com/814a110b34d52c106f9ba99c5d415478.jpeg
+++++++++++++++++++++++++++++++++++++++++++++++++++
+eventTime: , eventDescription: 新疆昨日新增确诊13例,新增无症状感染者11例,均在乌鲁木齐市, siteName: 红星新闻, eventUrl: http://baijiahao.baidu.com/s?id=1674686471966488007, homepageUrl: https://baijiahao.baidu.com/u?app_id=1561825967470087, item_avatar: http://pic.rmb.bdstatic.com/bjh/user/b5b87556f7efcdf3c67fb0fd355dcaac.jpeg
+++++++++++++++++++++++++++++++++++++++++++++++++++
+eventTime: , eventDescription: 昨天上海新增8例境外输入病例:均为中国籍,在阿联酋工作, siteName: 红星新闻, eventUrl: http://baijiahao.baidu.com/s?id=1674684045559410432, homepageUrl: https://baijiahao.baidu.com/u?app_id=1561825967470087, item_avatar: http://pic.rmb.bdstatic.com/bjh/user/b5b87556f7efcdf3c67fb0fd355dcaac.jpeg
+++++++++++++++++++++++++++++++++++++++++++++++++++
+eventTime: , eventDescription: 辽宁省新增2例境外输入确诊病例,分别来自美国和菲律宾, siteName: 环球网, eventUrl: http://baijiahao.baidu.com/s?id=1674636151291490440, homepageUrl: http://baijiahao.baidu.com/u?app_id=1549608413453462, item_avatar: http://timg01.bdimg.com/timg?pacompress=&imgtype=0&sec=1439619614&di=3fb711aa6bf5a0beda833cb57989fa42&quality=90&size=b870_10000&src=http%3A%2F%2Fpic.rmb.bdstatic.com%2F2f338c67bc166c145cd457c01e102de5.jpeg
+++++++++++++++++++++++++++++++++++++++++++++++++++
+eventTime: , eventDescription: 香港新增69例确诊病例 连续8天少于100例, siteName: 人民日报海外网, eventUrl: http://baijiahao.baidu.com/s?id=1674626913670664857, homepageUrl: https://baijiahao.baidu.com/u?app_id=1552864910655429, item_avatar: http://pic.rmb.bdstatic.com/d2f919e031c47d671a5748b5aeafe096.jpeg
+++++++++++++++++++++++++++++++++++++++++++++++++++
+eventTime: , eventDescription: 韩国境外输入性新冠病例中检出3例病毒变异, siteName: 界面新闻, eventUrl: http://baijiahao.baidu.com/s?id=1674624304511109399, homepageUrl: https://baijiahao.baidu.com/u?app_id=1565176801499628, item_avatar: http://pic.rmb.bdstatic.com/8963c44fe9bb2de078a778f71a32f6eb.jpeg
+++++++++++++++++++++++++++++++++++++++++++++++++++
+eventTime: , eventDescription: 新疆新增新冠确诊病例14例,无症状感染者7例,均在乌鲁木齐市, siteName: 红星新闻, eventUrl: http://baijiahao.baidu.com/s?id=1674596654587658867, homepageUrl: https://baijiahao.baidu.com/u?app_id=1561825967470087, item_avatar: http://pic.rmb.bdstatic.com/bjh/user/b5b87556f7efcdf3c67fb0fd355dcaac.jpeg
+++++++++++++++++++++++++++++++++++++++++++++++++++
+eventTime: , eventDescription: 北京连续三日零新增!, siteName: 北京日报客户端, eventUrl: http://baijiahao.baidu.com/s?id=1674596367795749792, homepageUrl: https://baijiahao.baidu.com/u?app_id=1601149438053974, item_avatar: http://pic.rmb.bdstatic.com/814a110b34d52c106f9ba99c5d415478.jpeg
+++++++++++++++++++++++++++++++++++++++++++++++++++
+eventTime: , eventDescription: 广东新增境外输入3例,入境后即被隔离观察, siteName: 北京日报客户端, eventUrl: http://baijiahao.baidu.com/s?id=1674595300038757825, homepageUrl: https://baijiahao.baidu.com/u?app_id=1601149438053974, item_avatar: http://pic.rmb.bdstatic.com/814a110b34d52c106f9ba99c5d415478.jpeg
+++++++++++++++++++++++++++++++++++++++++++++++++++
+eventTime: , eventDescription: 上海昨日无新增本地新冠肺炎确诊病例,新增境外输入18例, siteName: 环球网, eventUrl: http://baijiahao.baidu.com/s?id=1674593640123663447, homepageUrl: https://baijiahao.baidu.com/u?app_id=1549608413453462, item_avatar: http://pic.rmb.bdstatic.com/89e9e167d806065171363ecbcb708a41.png@c_1,w_812,h_812,x_63,y_39
+++++++++++++++++++++++++++++++++++++++++++++++++++
+eventTime: , eventDescription: 重庆昨日新增无症状感染者1例,为新加坡输入, siteName: 环球网, eventUrl: http://baijiahao.baidu.com/s?id=1674592612999522909, homepageUrl: https://baijiahao.baidu.com/u?app_id=1549608413453462, item_avatar: http://pic.rmb.bdstatic.com/89e9e167d806065171363ecbcb708a41.png@c_1,w_812,h_812,x_63,y_39
+++++++++++++++++++++++++++++++++++++++++++++++++++
+eventTime: , eventDescription: 一文读懂全球疫情:全球累计确诊超1999万例 美国暴发沙门氏菌疫情, siteName: 人民日报海外网, eventUrl: http://baijiahao.baidu.com/s?id=1674590351892834223, homepageUrl: https://baijiahao.baidu.com/u?app_id=1552864910655429, item_avatar: http://pic.rmb.bdstatic.com/d2f919e031c47d671a5748b5aeafe096.jpeg
+++++++++++++++++++++++++++++++++++++++++++++++++++
+eventTime: , eventDescription: 香港新增72例新冠确诊病例,连续7日少于100例, siteName: 界面新闻, eventUrl: http://baijiahao.baidu.com/s?id=1674536151578789920, homepageUrl: https://baijiahao.baidu.com/u?app_id=1565176801499628, item_avatar: http://pic.rmb.bdstatic.com/8963c44fe9bb2de078a778f71a32f6eb.jpeg
+++++++++++++++++++++++++++++++++++++++++++++++++++
+eventTime: , eventDescription: 新疆新增确诊病例15例,均在乌鲁木齐, siteName: 新京报, eventUrl: http://baijiahao.baidu.com/s?id=1674505943705180172, homepageUrl: https://baijiahao.baidu.com/u?app_id=1566453612428800, item_avatar: http://pic.rmb.bdstatic.com/9da74a517eb1befeba93a5f3167cc74b.jpeg
+++++++++++++++++++++++++++++++++++++++++++++++++++
+eventTime: , eventDescription: 31省区市新增确诊病例23例,其中本土病例15例(均在新疆), siteName: 健康中国, eventUrl: http://baijiahao.baidu.com/s?id=1674503120087258676, homepageUrl: https://baijiahao.baidu.com/u?app_id=1589908044211516, item_avatar: http://pic.rmb.bdstatic.com/ec0438088a2c4589fb541fbc12b448f0.jpeg
+++++++++++++++++++++++++++++++++++++++++++++++++++
+eventTime: , eventDescription: 香港新增69例新冠确诊病例,累计超过4000例, siteName: 界面新闻, eventUrl: http://baijiahao.baidu.com/s?id=1674445865981756049, homepageUrl: https://baijiahao.baidu.com/u?app_id=1565176801499628, item_avatar: http://pic.rmb.bdstatic.com/8963c44fe9bb2de078a778f71a32f6eb.jpeg
+++++++++++++++++++++++++++++++++++++++++++++++++++
+eventTime: , eventDescription: 截至8月7日24时新型冠状病毒肺炎疫情最新情况, siteName: 健康中国, eventUrl: http://baijiahao.baidu.com/s?id=1674417127284939659, homepageUrl: https://baijiahao.baidu.com/u?app_id=1589908044211516, item_avatar: http://pic.rmb.bdstatic.com/ec0438088a2c4589fb541fbc12b448f0.jpeg
+++++++++++++++++++++++++++++++++++++++++++++++++++
+eventTime: , eventDescription: 北京8月7日无新增报告新冠肺炎确诊病例, siteName: 青瞳视角, eventUrl: http://baijiahao.baidu.com/s?id=1674415443327545029, homepageUrl: https://baijiahao.baidu.com/u?app_id=1636203306265037, item_avatar: http://pic.rmb.bdstatic.com/10c0099fe5c2ef55fbd758a03a6ecafa.jpeg@c_1,w_300,h_300,x_0,y_0
+++++++++++++++++++++++++++++++++++++++++++++++++++
+eventTime: , eventDescription: 新疆新增确诊病例25例、无症状感染者8例, siteName: 新京报, eventUrl: http://baijiahao.baidu.com/s?id=1674414780683115272, homepageUrl: https://baijiahao.baidu.com/u?app_id=1566453612428800, item_avatar: http://pic.rmb.bdstatic.com/9da74a517eb1befeba93a5f3167cc74b.jpeg
+++++++++++++++++++++++++++++++++++++++++++++++++++
+eventTime: , eventDescription: 辽宁连续两日无新增本土确诊病例 新增治愈出院3例, siteName: 环球网, eventUrl: http://baijiahao.baidu.com/s?id=1674413542605070060, homepageUrl: https://baijiahao.baidu.com/u?app_id=1549608413453462, item_avatar: http://pic.rmb.bdstatic.com/89e9e167d806065171363ecbcb708a41.png@c_1,w_812,h_812,x_63,y_39
+++++++++++++++++++++++++++++++++++++++++++++++++++
+eventTime: , eventDescription: 新疆目前疫情形势如何?本轮疫情有何特点?最新解读来了, siteName: 北京日报客户端, eventUrl: http://baijiahao.baidu.com/s?id=1674379507756801699, homepageUrl: https://baijiahao.baidu.com/u?app_id=1601149438053974, item_avatar: http://pic.rmb.bdstatic.com/814a110b34d52c106f9ba99c5d415478.jpeg
+++++++++++++++++++++++++++++++++++++++++++++++++++
+eventTime: , eventDescription: 对话地坛医院专家陈志海:半年抗疫 “新冠”刷新认知, siteName: 新京报, eventUrl: http://baijiahao.baidu.com/s?id=1674366411525373651, homepageUrl: https://baijiahao.baidu.com/u?app_id=1566453612428800, item_avatar: http://pic.rmb.bdstatic.com/9da74a517eb1befeba93a5f3167cc74b.jpeg
+++++++++++++++++++++++++++++++++++++++++++++++++++
+eventTime: , eventDescription: 林郑月娥:港府将免费为全港市民进行新冠病毒检测, siteName: 人民日报海外网, eventUrl: http://baijiahao.baidu.com/s?id=1674350469425444439, homepageUrl: https://baijiahao.baidu.com/u?app_id=1552864910655429, item_avatar: http://pic.rmb.bdstatic.com/d2f919e031c47d671a5748b5aeafe096.jpeg
+++++++++++++++++++++++++++++++++++++++++++++++++++
+eventTime: , eventDescription: 北京新增大连疫情关联病例1例,新发地疫情病例清零, siteName: 北京日报客户端, eventUrl: http://baijiahao.baidu.com/s?id=1674324650362213371, homepageUrl: https://baijiahao.baidu.com/u?app_id=1601149438053974, item_avatar: http://pic.rmb.bdstatic.com/814a110b34d52c106f9ba99c5d415478.jpeg
+++++++++++++++++++++++++++++++++++++++++++++++++++
+eventTime: , eventDescription: 31省市区新增确诊37例,其中本土病例27例(新疆26例 北京1例), siteName: 健康中国, eventUrl: http://baijiahao.baidu.com/s?id=1674324373876029599, homepageUrl: https://baijiahao.baidu.com/u?app_id=1589908044211516, item_avatar: http://pic.rmb.bdstatic.com/ec0438088a2c4589fb541fbc12b448f0.jpeg
+++++++++++++++++++++++++++++++++++++++++++++++++++
+eventTime: 1596759965, eventDescription: 新疆(含兵团)新增确诊病例26例,新增无症状感染者10例, siteName: 红星新闻, eventUrl: http://baijiahao.baidu.com/s?id=1674324177292404207, homepageUrl: https://baijiahao.baidu.com/u?app_id=1561825967470087, item_avatar: http://pic.rmb.bdstatic.com/bjh/user/b5b87556f7efcdf3c67fb0fd355dcaac.jpeg
+++++++++++++++++++++++++++++++++++++++++++++++++++
+
+
+
+ +
+ +
+ + +
+
E:\anaconda3\lib\site-packages\ipykernel_launcher.py:20: DeprecationWarning: invalid escape sequence '\/'
+
+
+
+ +
+
+ +
+
+
+
+

获取国外疫情实时滚动新闻播报数据

+
+
+
+
+
+
In [26]:
+
+
+
import requests
+from bs4 import BeautifulSoup
+import re 
+import json 
+# 定义实时国外新闻类
+class outsideNews: 
+    def __init__(self): 
+        insidenews.eventTime = ''
+        self.eventDescription = ''  
+        self.siteName = '' 
+        self.eventUrl = '' 
+        self.homepageUrl = ''
+        self.item_avatar = ''
+        
+    def __str__(self): 
+        return 'eventTime: %s, eventDescription: %s, siteName: %s, eventUrl: %s, homepageUrl: %s, item_avatar: %s' % (self.eventTime, self.eventDescription, self.siteName, self.eventUrl, self.homepageUrl, self.item_avatar)
+
+    
+response = requests.get('https://opendata.baidu.com/data/inner?tn=reserved_all_res_tn&dspName=iphone&from_sf=1&dsp=iphone&resource_id=28565&alr=1&query=%E6%96%B0%E5%86%A0%E8%82%BA%E7%82%8E%E5%9B%BD%E5%A4%96%E7%96%AB%E6%83%85&cb=jsonp_1597232049116_92879&qq-pf-to=pcqq.group') #发送get请求,获得目标服务器相应 
+html = response.content.decode('unicode-escape')# 解码 
+soup = BeautifulSoup(html)# 构建soup对象 
+tag = soup.find('p')# 查找指定标签
+tagstr = tag.string# 转换成字符串
+results = re.findall(r'(\{"bjh_na".*?"eventDescription":.*?\})', tagstr)# 使用正则表达式查找所有的实时新闻
+
+all_outsidenews = [] 
+for item in results:
+    outsidenews = outsideNews() 
+    itemjson = json.loads(item)      
+    outsidenews.eventTime = itemjson['eventTime']
+    outsidenews.eventDescription = itemjson['eventDescription'] 
+    outsidenews.siteName = itemjson['siteName']   
+    outsidenews.eventUrl = itemjson['eventUrl'] 
+    outsidenews.homepageUrl = itemjson['homepageUrl'] 
+    outsidenews.item_avatar = itemjson['item_avatar'] 
+    
+    all_outsidenews.append(outsidenews) 
+    
+for outsidenews in all_outsidenews: 
+    print(outsidenews) 
+    print('+++++' * 10)
+
+ +
+
+
+ +
+
+ + +
+ +
+ + +
+
eventTime: 1597275291, eventDescription: 日增5.5万,巴西新冠肺炎确诊病例超316万例, siteName: 人民日报, eventUrl: http://baijiahao.baidu.com/s?id=1674864544431254391, homepageUrl: https://baijiahao.baidu.com/u?app_id=1593743208952652, item_avatar: http://pic.rmb.bdstatic.com/53864a5e77b735000396c815760f7b4c.jpeg
+++++++++++++++++++++++++++++++++++++++++++++++++++
+eventTime: 1597273097, eventDescription: 一文读懂全球疫情:全球累计确诊超2076万例 美国现另一场公共卫生危机, siteName: 人民日报海外网, eventUrl: http://baijiahao.baidu.com/s?id=1674862818784287730, homepageUrl: https://baijiahao.baidu.com/u?app_id=1552864910655429, item_avatar: http://pic.rmb.bdstatic.com/d2f919e031c47d671a5748b5aeafe096.jpeg
+++++++++++++++++++++++++++++++++++++++++++++++++++
+eventTime: 1597272142, eventDescription: 「战疫全时区」美国单日新增确诊病例逾4.8万例 累计超535万例, siteName: 人民日报海外网, eventUrl: http://baijiahao.baidu.com/s?id=1674861714204253861, homepageUrl: https://baijiahao.baidu.com/u?app_id=1552864910655429, item_avatar: http://pic.rmb.bdstatic.com/d2f919e031c47d671a5748b5aeafe096.jpeg
+++++++++++++++++++++++++++++++++++++++++++++++++++
+eventTime: 1597217570, eventDescription: 「战疫全时区」俄罗斯新增5102例确诊病例 累计超90万例, siteName: 人民日报海外网, eventUrl: http://baijiahao.baidu.com/s?id=1674804011129533783, homepageUrl: https://baijiahao.baidu.com/u?app_id=1552864910655429, item_avatar: http://pic.rmb.bdstatic.com/d2f919e031c47d671a5748b5aeafe096.jpeg
+++++++++++++++++++++++++++++++++++++++++++++++++++
+eventTime: 1597187332, eventDescription: 美国确诊超513万,死亡超16.4万例,超38万儿童感染, siteName: 北京日报客户端, eventUrl: http://baijiahao.baidu.com/s?id=1674772303894901158, homepageUrl: https://baijiahao.baidu.com/u?app_id=1601149438053974, item_avatar: http://pic.rmb.bdstatic.com/814a110b34d52c106f9ba99c5d415478.jpeg
+++++++++++++++++++++++++++++++++++++++++++++++++++
+eventTime: 1597186109, eventDescription: 一文读懂全球疫情:全球累计确诊超2048万例 俄罗斯注册首款新冠疫苗, siteName: 人民日报海外网, eventUrl: http://baijiahao.baidu.com/s?id=1674771802796395216, homepageUrl: https://baijiahao.baidu.com/u?app_id=1552864910655429, item_avatar: http://pic.rmb.bdstatic.com/d2f919e031c47d671a5748b5aeafe096.jpeg
+++++++++++++++++++++++++++++++++++++++++++++++++++
+eventTime: 1597108712, eventDescription: 全球2000万+!确诊数前三的国家,疫情形势怎么样了?, siteName: 北京日报客户端, eventUrl: http://baijiahao.baidu.com/s?id=1674689865386982063, homepageUrl: https://baijiahao.baidu.com/u?app_id=1601149438053974, item_avatar: http://pic.rmb.bdstatic.com/814a110b34d52c106f9ba99c5d415478.jpeg
+++++++++++++++++++++++++++++++++++++++++++++++++++
+eventTime: 1597099802, eventDescription: 一文读懂全球疫情:全球累计确诊超2021万例 巴西90万人在疫情期间成功戒烟, siteName: 人民日报海外网, eventUrl: http://baijiahao.baidu.com/s?id=1674681138323257988, homepageUrl: https://baijiahao.baidu.com/u?app_id=1552864910655429, item_avatar: http://pic.rmb.bdstatic.com/d2f919e031c47d671a5748b5aeafe096.jpeg
+++++++++++++++++++++++++++++++++++++++++++++++++++
+eventTime: 1597098897, eventDescription: 「战疫全时区」美国单日新增确诊病例逾4.1万例 累计超524万例, siteName: 人民日报海外网, eventUrl: http://baijiahao.baidu.com/s?id=1674680199047536719, homepageUrl: https://baijiahao.baidu.com/u?app_id=1552864910655429, item_avatar: http://pic.rmb.bdstatic.com/d2f919e031c47d671a5748b5aeafe096.jpeg
+++++++++++++++++++++++++++++++++++++++++++++++++++
+eventTime: 1597098540, eventDescription: 「战疫全时区」巴西单日新增确诊病例逾2.2万例 累计超305万例, siteName: 人民日报海外网, eventUrl: http://baijiahao.baidu.com/s?id=1674679503309357561, homepageUrl: https://baijiahao.baidu.com/u?app_id=1552864910655429, item_avatar: http://pic.rmb.bdstatic.com/d2f919e031c47d671a5748b5aeafe096.jpeg
+++++++++++++++++++++++++++++++++++++++++++++++++++
+eventTime: 1597044840, eventDescription: 「战疫全时区」俄罗斯新增5118例确诊病例 累计超89万例, siteName: 人民日报海外网, eventUrl: http://baijiahao.baidu.com/s?id=1674623132178961292, homepageUrl: https://baijiahao.baidu.com/u?app_id=1552864910655429, item_avatar: http://pic.rmb.bdstatic.com/d2f919e031c47d671a5748b5aeafe096.jpeg
+++++++++++++++++++++++++++++++++++++++++++++++++++
+eventTime: 1597041465, eventDescription: 返校前夕,全美近10万学生两周内感染新冠肺炎, siteName: 人民日报海外网, eventUrl: http://baijiahao.baidu.com/s?id=1674620237736618467, homepageUrl: https://baijiahao.baidu.com/u?app_id=1552864910655429, item_avatar: http://pic.rmb.bdstatic.com/d2f919e031c47d671a5748b5aeafe096.jpeg
+++++++++++++++++++++++++++++++++++++++++++++++++++
+eventTime: 1597013216, eventDescription: 一文读懂全球疫情:全球累计确诊超1999万例 美国暴发沙门氏菌疫情, siteName: 人民日报海外网, eventUrl: http://baijiahao.baidu.com/s?id=1674590351892834223, homepageUrl: https://baijiahao.baidu.com/u?app_id=1552864910655429, item_avatar: http://pic.rmb.bdstatic.com/d2f919e031c47d671a5748b5aeafe096.jpeg
+++++++++++++++++++++++++++++++++++++++++++++++++++
+eventTime: 1597012823, eventDescription: 「战疫全时区」美国单日新增确诊病例逾4万例 累计超519万例, siteName: 人民日报海外网, eventUrl: http://baijiahao.baidu.com/s?id=1674589542624222920, homepageUrl: https://baijiahao.baidu.com/u?app_id=1552864910655429, item_avatar: http://pic.rmb.bdstatic.com/d2f919e031c47d671a5748b5aeafe096.jpeg
+++++++++++++++++++++++++++++++++++++++++++++++++++
+eventTime: 1597012711, eventDescription: 「战疫全时区」巴西单日新增确诊病例逾2.3万例 累计超303万例, siteName: 人民日报海外网, eventUrl: http://baijiahao.baidu.com/s?id=1674589406966244222, homepageUrl: https://baijiahao.baidu.com/u?app_id=1552864910655429, item_avatar: http://pic.rmb.bdstatic.com/d2f919e031c47d671a5748b5aeafe096.jpeg
+++++++++++++++++++++++++++++++++++++++++++++++++++
+eventTime: 1596961970, eventDescription: 首批哈萨克斯坦华人回国 隔离14天后可回家 亲历者:提前12小时就到机场, siteName: 封面新闻, eventUrl: http://baijiahao.baidu.com/s?id=1674536083513406606, homepageUrl: https://baijiahao.baidu.com/u?app_id=1577667706397024, item_avatar: http://pic.rmb.bdstatic.com/7d15a50cee51b0a96e00a85cfabbb513.jpeg
+++++++++++++++++++++++++++++++++++++++++++++++++++
+eventTime: 1596960850, eventDescription: 日本连续5天新增逾千例 安倍:极力避免再次进入紧急状态, siteName: 人民日报海外网, eventUrl: http://baijiahao.baidu.com/s?id=1674535368426252580, homepageUrl: https://baijiahao.baidu.com/u?app_id=1552864910655429, item_avatar: http://pic.rmb.bdstatic.com/d2f919e031c47d671a5748b5aeafe096.jpeg
+++++++++++++++++++++++++++++++++++++++++++++++++++
+eventTime: 1596960494, eventDescription: 美国新冠确诊病例接近500万例, siteName: 新华社客户端, eventUrl: http://baijiahao.baidu.com/s?id=1674534439138090155, homepageUrl: https://baijiahao.baidu.com/u?app_id=1552507899985619, item_avatar: http://pic.rmb.bdstatic.com/f873c8976c274e5f9bacc77a5c1b3e89.jpeg@c_1,w_385,h_385,x_6,y_11
+++++++++++++++++++++++++++++++++++++++++++++++++++
+eventTime: 1596953765, eventDescription: 印度第3位部长级官员确诊感染新冠肺炎 身兼多份国务要职, siteName: 新京报, eventUrl: http://baijiahao.baidu.com/s?id=1674527459855975038, homepageUrl: https://baijiahao.baidu.com/u?app_id=1566453612428800, item_avatar: http://pic.rmb.bdstatic.com/9da74a517eb1befeba93a5f3167cc74b.jpeg
+++++++++++++++++++++++++++++++++++++++++++++++++++
+eventTime: 1596942761, eventDescription: 印度安德拉邦一新冠肺炎征用酒店起火 已致4人死亡, siteName: 新京报, eventUrl: http://baijiahao.baidu.com/s?id=1674515947367591821, homepageUrl: https://baijiahao.baidu.com/u?app_id=1566453612428800, item_avatar: http://pic.rmb.bdstatic.com/9da74a517eb1befeba93a5f3167cc74b.jpeg
+++++++++++++++++++++++++++++++++++++++++++++++++++
+eventTime: 1596931253, eventDescription: 警惕!日本研究发现变异新冠病毒,已向全国各地扩散, siteName: 北晚新视觉网, eventUrl: http://baijiahao.baidu.com/s?id=1674503882559884129, homepageUrl: https://baijiahao.baidu.com/u?app_id=1549941228125394, item_avatar: http://pic.rmb.bdstatic.com/b9279adf974b78d27201a0b34970c2a9.jpeg
+++++++++++++++++++++++++++++++++++++++++++++++++++
+eventTime: 1596927420, eventDescription: 一文读懂全球疫情:全球累计确诊超1977万例 印度连续10天日增病例超5万, siteName: 人民日报海外网, eventUrl: http://baijiahao.baidu.com/s?id=1674500137134331393, homepageUrl: https://baijiahao.baidu.com/u?app_id=1552864910655429, item_avatar: http://pic.rmb.bdstatic.com/d2f919e031c47d671a5748b5aeafe096.jpeg
+++++++++++++++++++++++++++++++++++++++++++++++++++
+eventTime: 1596926700, eventDescription: 「战疫全时区」美国单日新增确诊病例超6.1万例 累计逾514万例, siteName: 人民日报海外网, eventUrl: http://baijiahao.baidu.com/s?id=1674499173102689654, homepageUrl: https://baijiahao.baidu.com/u?app_id=1552864910655429, item_avatar: http://pic.rmb.bdstatic.com/d2f919e031c47d671a5748b5aeafe096.jpeg
+++++++++++++++++++++++++++++++++++++++++++++++++++
+eventTime: 1596926340, eventDescription: 「战疫全时区」巴西累计确诊病例超301万例 死亡逾10万例, siteName: 人民日报海外网, eventUrl: http://baijiahao.baidu.com/s?id=1674498936636405454, homepageUrl: https://baijiahao.baidu.com/u?app_id=1552864910655429, item_avatar: http://pic.rmb.bdstatic.com/d2f919e031c47d671a5748b5aeafe096.jpeg
+++++++++++++++++++++++++++++++++++++++++++++++++++
+eventTime: 1596871924, eventDescription: 「战疫全时区」俄罗斯新增5212例确诊病例 累计确诊超88万例, siteName: 人民日报海外网, eventUrl: http://baijiahao.baidu.com/s?id=1674442809541789098, homepageUrl: https://baijiahao.baidu.com/u?app_id=1552864910655429, item_avatar: http://pic.rmb.bdstatic.com/d2f919e031c47d671a5748b5aeafe096.jpeg
+++++++++++++++++++++++++++++++++++++++++++++++++++
+eventTime: 1596860880, eventDescription: 「战疫全时区」印度日增超6.1万例 累计确诊超208万, siteName: 人民日报海外网, eventUrl: http://baijiahao.baidu.com/s?id=1674430048790588388, homepageUrl: https://baijiahao.baidu.com/u?app_id=1552864910655429, item_avatar: http://pic.rmb.bdstatic.com/d2f919e031c47d671a5748b5aeafe096.jpeg
+++++++++++++++++++++++++++++++++++++++++++++++++++
+eventTime: 1596840442, eventDescription: 一文读懂全球疫情:全球累计确诊近1950万例 印度累计确诊超过200万例, siteName: 人民日报海外网, eventUrl: http://baijiahao.baidu.com/s?id=1674408972047141586, homepageUrl: https://baijiahao.baidu.com/u?app_id=1552864910655429, item_avatar: http://pic.rmb.bdstatic.com/d2f919e031c47d671a5748b5aeafe096.jpeg
+++++++++++++++++++++++++++++++++++++++++++++++++++
+eventTime: 1596839886, eventDescription: 「战疫全时区」巴西单日新增确诊病例超4.4万例 累计逾296万例, siteName: 人民日报海外网, eventUrl: http://baijiahao.baidu.com/s?id=1674408470860851393, homepageUrl: https://baijiahao.baidu.com/u?app_id=1552864910655429, item_avatar: http://pic.rmb.bdstatic.com/d2f919e031c47d671a5748b5aeafe096.jpeg
+++++++++++++++++++++++++++++++++++++++++++++++++++
+eventTime: 1596839434, eventDescription: 「战疫全时区」美国单日新增确诊病例超6万例 累计逾508万例, siteName: 人民日报海外网, eventUrl: http://baijiahao.baidu.com/s?id=1674407841000149898, homepageUrl: https://baijiahao.baidu.com/u?app_id=1552864910655429, item_avatar: http://pic.rmb.bdstatic.com/d2f919e031c47d671a5748b5aeafe096.jpeg
+++++++++++++++++++++++++++++++++++++++++++++++++++
+eventTime: 1596817404, eventDescription: 20天!印度新冠确诊病例从100万增至200万, siteName: 北京日报客户端, eventUrl: http://baijiahao.baidu.com/s?id=1674384907923543549, homepageUrl: https://baijiahao.baidu.com/u?app_id=1601149438053974, item_avatar: http://pic.rmb.bdstatic.com/814a110b34d52c106f9ba99c5d415478.jpeg
+++++++++++++++++++++++++++++++++++++++++++++++++++
+eventTime: 1596801592, eventDescription: 8月7日全球疫情观察:至少22国日增确诊超千例 印度累计确诊逾200万例, siteName: 人民日报海外网, eventUrl: http://baijiahao.baidu.com/s?id=1674368083759315686, homepageUrl: https://baijiahao.baidu.com/u?app_id=1552864910655429, item_avatar: http://pic.rmb.bdstatic.com/d2f919e031c47d671a5748b5aeafe096.jpeg
+++++++++++++++++++++++++++++++++++++++++++++++++++
+eventTime: 1596786000, eventDescription: 「战疫全时区」俄罗斯新增5241例确诊病例 累计确诊超87万例, siteName: 人民日报海外网, eventUrl: http://baijiahao.baidu.com/s?id=1674351790374377698, homepageUrl: https://baijiahao.baidu.com/u?app_id=1552864910655429, item_avatar: http://pic.rmb.bdstatic.com/d2f919e031c47d671a5748b5aeafe096.jpeg
+++++++++++++++++++++++++++++++++++++++++++++++++++
+eventTime: 1596783247, eventDescription: 白俄罗斯总统卢卡申科:有人故意将新冠病毒传染给我, siteName: 新京报, eventUrl: http://baijiahao.baidu.com/s?id=1674348658542718182, homepageUrl: https://baijiahao.baidu.com/u?app_id=1566453612428800, item_avatar: http://pic.rmb.bdstatic.com/9da74a517eb1befeba93a5f3167cc74b.jpeg
+++++++++++++++++++++++++++++++++++++++++++++++++++
+eventTime: 1596769135, eventDescription: 印度成全球第三个确诊破200万的国家,21天新增100万, siteName: 新京报, eventUrl: http://baijiahao.baidu.com/s?id=1674333884198078936, homepageUrl: https://baijiahao.baidu.com/u?app_id=1566453612428800, item_avatar: http://pic.rmb.bdstatic.com/9da74a517eb1befeba93a5f3167cc74b.jpeg
+++++++++++++++++++++++++++++++++++++++++++++++++++
+eventTime: 1596760866, eventDescription: 世卫组织:已有六种新冠疫苗进入三期临床试验阶段 其中三种来自中国, siteName: 中国日报网, eventUrl: http://baijiahao.baidu.com/s?id=1674325335422513595, homepageUrl: https://baijiahao.baidu.com/u?app_id=1567805706555546, item_avatar: http://pic.rmb.bdstatic.com/8f27cc4a0abf470446a58a0066b710f5.png
+++++++++++++++++++++++++++++++++++++++++++++++++++
+eventTime: 1596753951, eventDescription: 一文读懂全球疫情:全球累计确诊近1920万例 美国一州长为特朗普接机前确诊, siteName: 人民日报海外网, eventUrl: http://baijiahao.baidu.com/s?id=1674318314838065202, homepageUrl: https://baijiahao.baidu.com/u?app_id=1552864910655429, item_avatar: http://pic.rmb.bdstatic.com/d2f919e031c47d671a5748b5aeafe096.jpeg
+++++++++++++++++++++++++++++++++++++++++++++++++++
+eventTime: 1596753846, eventDescription: 「战疫全时区」巴西单日新增确诊病例超5.3万例 累计逾291万例, siteName: 人民资讯, eventUrl: http://baijiahao.baidu.com/s?id=1674317758867433207, homepageUrl: https://baijiahao.baidu.com/u?app_id=1669728810290752, item_avatar: http://pic.rmb.bdstatic.com/bjh/user/867084918f0beae5baa88b1fcbc34f1f.jpeg
+++++++++++++++++++++++++++++++++++++++++++++++++++
+eventTime: 1596753788, eventDescription: 「战疫全时区」美国单日新增确诊病例超5.9万例 累计逾502万例, siteName: 人民日报海外网, eventUrl: http://baijiahao.baidu.com/s?id=1674318066095349625, homepageUrl: https://baijiahao.baidu.com/u?app_id=1552864910655429, item_avatar: http://pic.rmb.bdstatic.com/d2f919e031c47d671a5748b5aeafe096.jpeg
+++++++++++++++++++++++++++++++++++++++++++++++++++
+eventTime: 1596723779, eventDescription: 今日疫情汇总:黎巴嫩爆炸恐加剧疫情传播,朝鲜首例疑似检测“没结果”, siteName: 纵相新闻, eventUrl: http://baijiahao.baidu.com/s?id=1674286560061876250, homepageUrl: https://baijiahao.baidu.com/u?app_id=1607773795161133, item_avatar: http://pic.rmb.bdstatic.com/4221a08a04ad0d69f15b15252297b8e2.jpeg
+++++++++++++++++++++++++++++++++++++++++++++++++++
+eventTime: 1596708377, eventDescription: 全球新冠肺炎确诊病例超1900万例 死亡病例逾71.1万例, siteName: 人民日报海外网, eventUrl: http://baijiahao.baidu.com/s?id=1674270432508007457, homepageUrl: https://baijiahao.baidu.com/u?app_id=1552864910655429, item_avatar: http://pic.rmb.bdstatic.com/d2f919e031c47d671a5748b5aeafe096.jpeg
+++++++++++++++++++++++++++++++++++++++++++++++++++
+
+
+
+ +
+ +
+ + +
+
E:\anaconda3\lib\site-packages\ipykernel_launcher.py:20: DeprecationWarning: invalid escape sequence '\/'
+
+
+
+ +
+
+ +
+
+
+
+

将获取到的国内外实时新闻数据导入数据库中

+
+
+
+
+
+
+

数据库实体类

+
+
+
+
+
+
In [45]:
+
+
+
import pymysql
+
+class MyDB:
+    def __init__(self, host, user, passwd, db):
+        self.conn = pymysql.connect(host, user, passwd, db)
+        self.cursor = self.conn.cursor()
+        
+    def get_insideNews_list_tuple(self, all_insideNews):
+        info_tuple = []
+        for item in all_insideNews:
+            info_tuple.append(item.get_info_tuple())
+        return info_tuple
+    
+    # 保存国内新闻数据
+    def save_insideNews_datas(self, all_insideNews):
+        sql = 'insert into insideNews_daily_datas(eventTime, eventDescription, siteName, eventUrl, homepageUrl, item_avatar) values(%s,%s,%s,%s,%s,%s)'
+        res = self.get_insideNews_list_tuple(all_insideNews)
+        
+        print('+++ save_insideNews_datas, data len: %d' % len(res))
+        try:
+            self.cursor.executemany(sql, res)#批处理插入函数,res为元组类型,其中还是元组类型
+            self.conn.commit()
+        except Exception as e:
+            print(e)
+        print('+++ save_insideNews_datas is over.')
+        
+        
+    
+    def get_outsideNews_list_tuple(self, all_outsideNews):
+            info_tuple = []
+            for item in all_outsideNews:
+                info_tuple.append(item.get_info_tuple())
+            return info_tuple
+  
+    #保存国外新闻数据
+    def save_outsideNews_datas(self, all_outsideNews):
+        sql = 'insert into outsideNews_daily_datas(eventTime, eventDescription, siteName, eventUrl, homepageUrl, item_avatar) values(%s,%s,%s,%s,%s,%s)'
+        res = self.get_outsideNews_list_tuple(all_outsideNews)
+        
+        print('+++ save_outsideNews_datas, data len: %d' % len(res))
+        try:
+            self.cursor.executemany(sql, res)#批处理插入函数,res为元组类型,其中还是元组类型
+            self.conn.commit()
+        except Exception as e:
+            print(e)
+        print('+++ save_outsideNews_datas is over.')
+      
+    def __del__(self):
+        if self.conn is not None:
+            self.conn.close()
+
+ +
+
+
+ +
+
+
+
+

国内新闻

+
+
+
+
+
+
In [43]:
+
+
+
import requests
+import re
+from bs4 import BeautifulSoup
+import json
+ 
+
+# 定义实时国内新闻类
+class InsideNews: 
+    def __init__(self): 
+        self.nid = ''
+        self.thread_id = ''
+        self.eventTime = '' 
+        self.eventDescription = ''  
+        self.siteName = '' 
+        self.eventUrl = '' 
+        self.homepageUrl = ''
+        self.item_avatar = ''
+        
+    def __str__(self): 
+        return 'eventTime: %s, eventDescription: %s, siteName: %s, eventUrl: %s, homepageUrl: %s, item_avatar: %s' % (self.eventTime, self.eventDescription, self.siteName, self.eventUrl, self.homepageUrl, self.item_avatar)
+
+    def get_info_tuple(self):
+        return ((self.eventTime, self.eventDescription, self.siteName, self.eventUrl, self.homepageUrl, self.item_avatar))
+    
+class DataService:
+    def __init__(self):
+        self.url = 'https://opendata.baidu.com/data/inner?tn=reserved_all_res_tn&dspName=iphone&from_sf=1&dsp=iphone&resource_id=28565&alr=1&query=%E8%82%BA%E7%82%8E&cb=jsonp_1597231600668_30527&qq-pf-to=pcqq.group'
+        self.db = MyDB('localhost', 'root', '2000721zsf', 'covid19_datas_guangxi')
+        
+    #抓取网页
+    def fetch_html_page(self):
+        res = requests.get(self.url)#发送get请求,获得目标服务器响应
+        res = res.content.decode('unicode-escape')#解码
+        return res
+    
+    #解析网页
+    def parse_target_page(self, html):
+        soup = BeautifulSoup(html)#构建soup对象
+        tag = soup.find('p')#查找指定标签
+        tagStr = tag.string#转换为字符串
+        
+        #使用正则表达式查找所有内容,结果返回为list类型(即获取国内实时新闻数据)
+        self.insideNews = re.findall(r'(\{"bjh_na".*?"eventDescription":.*?\})',tagStr)
+       
+    def fetch_insideNews_datas(self):
+        all_insideNews = []
+        for item in self.insideNews:
+            insideNews = InsideNews() 
+            itemjson = json.loads(item)      
+            insideNews.eventTime = itemjson['eventTime']
+            insideNews.eventDescription = itemjson['eventDescription'] 
+            insideNews.siteName = itemjson['siteName']   
+            insideNews.eventUrl = itemjson['eventUrl'] 
+            insideNews.homepageUrl = itemjson['homepageUrl'] 
+            insideNews.item_avatar = itemjson['item_avatar'] 
+            
+            all_insideNews.append(insideNews)
+        return all_insideNews
+    
+    #提取内容生成对象
+    def fetch_page_datas(self):
+        all_insideNews = self.fetch_insideNews_datas()
+        
+       # for item in all_insideNews:
+         #   print(item)
+         #   print("+++++"*10)
+        
+        return all_insideNews
+        
+    #业务函数
+    def process_data(self):
+        html = self.fetch_html_page()
+        self.parse_target_page(html)
+        all_insideNews = self.fetch_page_datas()
+        
+        #保存国内实时播报新闻数据
+        self.db.save_insideNews_datas(all_insideNews)
+        
+#创建DataService对象
+ds = DataService()
+ds.process_data()
+
+#print(len(ds.insideNews))
+#print(type(ds.insideNews[0]))
+#print(ds.insideNews[0])
+#print()
+#print(ds.outsideNews)
+#print()
+
+ +
+
+
+ +
+
+ + +
+ +
+ + +
+
E:\anaconda3\lib\site-packages\ipykernel_launcher.py:33: DeprecationWarning: invalid escape sequence '\/'
+
+
+
+ +
+ +
+ + +
+
+++ save_insideNews_datas, data len: 37
++++ save_insideNews_datas is over.
+
+
+
+ +
+
+ +
+
+
+
+

国外新闻

+
+
+
+
+
+
In [46]:
+
+
+
import requests
+import re
+from bs4 import BeautifulSoup
+import json
+ 
+
+# 定义实时国内新闻类
+class OutsideNews: 
+    def __init__(self): 
+        self.nid = ''
+        self.thread_id = ''
+        self.eventTime = '' 
+        self.eventDescription = ''  
+        self.siteName = '' 
+        self.eventUrl = '' 
+        self.homepageUrl = ''
+        self.item_avatar = ''
+        
+    def __str__(self): 
+        return 'eventTime: %s, eventDescription: %s, siteName: %s, eventUrl: %s, homepageUrl: %s, item_avatar: %s' % (self.eventTime, self.eventDescription, self.siteName, self.eventUrl, self.homepageUrl, self.item_avatar)
+
+    def get_info_tuple(self):
+        return ((self.eventTime, self.eventDescription, self.siteName, self.eventUrl, self.homepageUrl, self.item_avatar))
+    
+class DataService:
+    def __init__(self):
+        self.url = 'https://opendata.baidu.com/data/inner?tn=reserved_all_res_tn&dspName=iphone&from_sf=1&dsp=iphone&resource_id=28565&alr=1&query=%E6%96%B0%E5%86%A0%E8%82%BA%E7%82%8E%E5%9B%BD%E5%A4%96%E7%96%AB%E6%83%85&cb=jsonp_1597232049116_92879&qq-pf-to=pcqq.group'
+        self.db = MyDB('localhost', 'root', '2000721zsf', 'covid19_datas_guangxi')
+        
+    #抓取网页
+    def fetch_html_page(self):
+        res = requests.get(self.url)#发送get请求,获得目标服务器响应
+        res = res.content.decode('unicode-escape')#解码
+        return res
+    
+    #解析网页
+    def parse_target_page(self, html):
+        soup = BeautifulSoup(html)#构建soup对象
+        tag = soup.find('p')#查找指定标签
+        tagStr = tag.string#转换为字符串
+        
+        #使用正则表达式查找所有内容,结果返回为list类型(即获取国内实时新闻数据)
+        self.outsideNews = re.findall(r'(\{"bjh_na".*?"eventDescription":.*?\})',tagStr)
+       
+    def fetch_outsideNews_datas(self):
+        all_outsideNews = []
+        for item in self.outsideNews:
+            outsideNews = OutsideNews() 
+            itemjson = json.loads(item)      
+            outsideNews.eventTime = itemjson['eventTime']
+            outsideNews.eventDescription = itemjson['eventDescription'] 
+            outsideNews.siteName = itemjson['siteName']   
+            outsideNews.eventUrl = itemjson['eventUrl'] 
+            outsideNews.homepageUrl = itemjson['homepageUrl'] 
+            outsideNews.item_avatar = itemjson['item_avatar'] 
+            
+            all_outsideNews.append(outsideNews)
+        return all_outsideNews
+    
+    #提取内容生成对象
+    def fetch_page_datas(self):
+        all_outsideNews = self.fetch_outsideNews_datas()
+        
+       # for item in all_insideNews:
+         #   print(item)
+         #   print("+++++"*10)
+        
+        return all_outsideNews
+        
+    #业务函数
+    def process_data(self):
+        html = self.fetch_html_page()
+        self.parse_target_page(html)
+        all_outsideNews = self.fetch_page_datas()
+        
+        #保存国内实时播报新闻数据
+        self.db.save_outsideNews_datas(all_outsideNews)
+        
+#创建DataService对象
+ds = DataService()
+ds.process_data()
+
+#print(len(ds.insideNews))
+#print(type(ds.insideNews[0]))
+#print(ds.insideNews[0])
+#print()
+#print(ds.outsideNews)
+#print()
+
+ +
+
+
+ +
+
+ + +
+ +
+ + +
+
E:\anaconda3\lib\site-packages\ipykernel_launcher.py:33: DeprecationWarning: invalid escape sequence '\/'
+
+
+
+ +
+ +
+ + +
+
+++ save_outsideNews_datas, data len: 40
++++ save_outsideNews_datas is over.
+
+
+
+ +
+
+ +
+
+
+ + + + + + diff --git a/提取实时新闻数据.ipynb b/提取实时新闻数据.ipynb new file mode 100644 index 0000000..f95f488 --- /dev/null +++ b/提取实时新闻数据.ipynb @@ -0,0 +1,669 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# 获取国内疫情实时滚动新闻播报数据" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\"bjh_na\":{\"easyBrowse\":\"1\",\"easyBrowseConfirm\":\"1\",\"nid\":\"10193162063076338121\",\"thread_id\":\"1125000033495772\",\"type\":\"news\"},\"eventDescription\":\"北京连续6日零新增\",\"eventTime\":\"1597278720\",\"eventUrl\":\"http:\\/\\/baijiahao.baidu.com\\/s?id=1674868131526596687\",\"homepageUrl\":\"https:\\/\\/baijiahao.baidu.com\\/u?app_id=1601149438053974\",\"item_avatar\":\"http:\\/\\/pic.rmb.bdstatic.com\\/814a110b34d52c106f9ba99c5d415478.jpeg\",\"siteName\":\"北京日报客户端\"}\n", + "{\"bjh_na\":{\"easyBrowse\":\"1\",\"easyBrowseConfirm\":\"1\",\"nid\":\"9729390377660864175\",\"thread_id\":\"1062000033493672\",\"type\":\"news\"},\"eventDescription\":\"国家卫健委:昨日新增确诊病例19例 本土病例8例\",\"eventTime\":\"1597278262\",\"eventUrl\":\"http:\\/\\/baijiahao.baidu.com\\/s?id=1674867651765148223\",\"homepageUrl\":\"https:\\/\\/baijiahao.baidu.com\\/u?app_id=1551968238585112\",\"item_avatar\":\"http:\\/\\/pic.rmb.bdstatic.com\\/07feda26036edb976980ce1e803de532.jpeg@c_1,w_640,h_640,x_0,y_0\",\"siteName\":\"环球时报\"}\n", + "{\"bjh_na\":{\"easyBrowse\":\"1\",\"easyBrowseConfirm\":\"1\",\"nid\":\"9766033389288531073\",\"thread_id\":\"1035000033486933\",\"type\":\"news\"},\"eventDescription\":\"新疆(含兵团)新增新冠肺炎确诊病例8例 均在乌鲁木齐市\",\"eventTime\":\"1597277648\",\"eventUrl\":\"http:\\/\\/baijiahao.baidu.com\\/s?id=1674867007109286633\",\"homepageUrl\":\"https:\\/\\/baijiahao.baidu.com\\/u?app_id=1549608413453462\",\"item_avatar\":\"http:\\/\\/pic.rmb.bdstatic.com\\/89e9e167d806065171363ecbcb708a41.png@c_1,w_812,h_812,x_63,y_39\",\"siteName\":\"环球网\"}\n", + "{\"bjh_na\":{\"easyBrowse\":\"1\",\"easyBrowseConfirm\":\"1\",\"nid\":\"9382499071856256847\",\"thread_id\":\"1027000033493454\",\"type\":\"news\"},\"eventDescription\":\"上海昨日无新增本地新冠肺炎确诊病例,新增境外输入2例\",\"eventTime\":\"1597276086\",\"eventUrl\":\"http:\\/\\/baijiahao.baidu.com\\/s?id=1674865718710262411\",\"homepageUrl\":\"https:\\/\\/baijiahao.baidu.com\\/u?app_id=1549608413453462\",\"item_avatar\":\"http:\\/\\/pic.rmb.bdstatic.com\\/89e9e167d806065171363ecbcb708a41.png@c_1,w_812,h_812,x_63,y_39\",\"siteName\":\"环球网\"}\n", + "{\"bjh_na\":{\"easyBrowse\":\"1\",\"easyBrowseConfirm\":\"1\",\"nid\":\"9691932825381303415\",\"thread_id\":\"1001000033461176\",\"type\":\"news\"},\"eventDescription\":\"一境外输入确诊患者治愈数月后复阳 目前在上海隔离诊治\",\"eventTime\":\"1597235700\",\"eventUrl\":\"http:\\/\\/baijiahao.baidu.com\\/s?id=1674823034158688355\",\"homepageUrl\":\"https:\\/\\/baijiahao.baidu.com\\/u?app_id=1566453612428800\",\"item_avatar\":\"http:\\/\\/pic.rmb.bdstatic.com\\/9da74a517eb1befeba93a5f3167cc74b.jpeg\",\"siteName\":\"新京报\"}\n", + "{\"bjh_na\":{\"easyBrowse\":\"1\",\"easyBrowseConfirm\":\"1\",\"nid\":\"9964760177505552343\",\"thread_id\":\"1011000033456808\",\"type\":\"news\"},\"eventDescription\":\"香港新增62例确诊病例 连续10天少于100例\",\"eventTime\":\"1597221655\",\"eventUrl\":\"http:\\/\\/baijiahao.baidu.com\\/s?id=1674808294135893351\",\"homepageUrl\":\"https:\\/\\/baijiahao.baidu.com\\/u?app_id=1552864910655429\",\"item_avatar\":\"http:\\/\\/pic.rmb.bdstatic.com\\/d2f919e031c47d671a5748b5aeafe096.jpeg\",\"siteName\":\"人民日报海外网\"}\n", + "{\"bjh_na\":{\"easyBrowse\":\"1\",\"easyBrowseConfirm\":\"1\",\"nid\":\"9288792493272135235\",\"thread_id\":\"1059000033444318\",\"type\":\"news\"},\"eventDescription\":\"安徽芜湖一餐饮店进口冻虾新冠病毒检出疑似阳性\",\"eventTime\":\"1597217665\",\"eventUrl\":\"http:\\/\\/baijiahao.baidu.com\\/s?id=1674804105747106594\",\"homepageUrl\":\"http:\\/\\/baijiahao.baidu.com\\/u?app_id=1570168240515616\",\"item_avatar\":\"http:\\/\\/pic.rmb.bdstatic.com\\/0026e9a191787761348be081d428a354.jpeg\",\"siteName\":\"央视新闻\"}\n", + "{\"bjh_na\":{\"easyBrowse\":\"1\",\"easyBrowseConfirm\":\"1\",\"nid\":\"10446745964617569625\",\"thread_id\":\"1051000033431472\",\"type\":\"news\"},\"eventDescription\":\"国家卫健委:昨日新增确诊25例,其中本土病例9例(均在新疆)\",\"eventTime\":\"1597193291\",\"eventUrl\":\"http:\\/\\/baijiahao.baidu.com\\/s?id=1674772359564935639\",\"homepageUrl\":\"https:\\/\\/baijiahao.baidu.com\\/u?app_id=1561825967470087\",\"item_avatar\":\"http:\\/\\/pic.rmb.bdstatic.com\\/bjh\\/user\\/b5b87556f7efcdf3c67fb0fd355dcaac.jpeg\",\"siteName\":\"红星新闻\"}\n", + "{\"bjh_na\":{\"easyBrowse\":\"1\",\"easyBrowseConfirm\":\"1\",\"nid\":\"10156202482208840230\",\"thread_id\":\"1113000033424506\",\"type\":\"news\"},\"eventDescription\":\"北京连续5天无新增确诊病例\",\"eventTime\":\"1597192410\",\"eventUrl\":\"http:\\/\\/baijiahao.baidu.com\\/s?id=1674777629341250609\",\"homepageUrl\":\"http:\\/\\/baijiahao.baidu.com\\/u?app_id=1601149438053974\",\"item_avatar\":\"http:\\/\\/pic.rmb.bdstatic.com\\/73c41abfdd401cc72e04d64b3ce0103d.jpeg\",\"siteName\":\"北京日报客户端\"}\n", + "{\"bjh_na\":{\"easyBrowse\":\"1\",\"easyBrowseConfirm\":\"1\",\"nid\":\"10445989328771260362\",\"thread_id\":\"1038000033429039\",\"type\":\"news\"},\"eventDescription\":\"广东新增境外输入确诊病例6例:广州报告1例、珠海5例\",\"eventTime\":\"1597192142\",\"eventUrl\":\"http:\\/\\/baijiahao.baidu.com\\/s?id=1674777366221789944\",\"homepageUrl\":\"https:\\/\\/baijiahao.baidu.com\\/u?app_id=1561825967470087\",\"item_avatar\":\"http:\\/\\/pic.rmb.bdstatic.com\\/bjh\\/user\\/b5b87556f7efcdf3c67fb0fd355dcaac.jpeg\",\"siteName\":\"红星新闻\"}\n", + "{\"bjh_na\":{\"easyBrowse\":\"1\",\"easyBrowseConfirm\":\"1\",\"nid\":\"10225492811636488031\",\"thread_id\":\"1005000033424867\",\"type\":\"news\"},\"eventDescription\":\"新疆报告新增新冠肺炎确诊病例9例,新增无症状感染者8例\",\"eventTime\":\"1597191705\",\"eventUrl\":\"http:\\/\\/baijiahao.baidu.com\\/s?id=1674776890002663976\",\"homepageUrl\":\"https:\\/\\/baijiahao.baidu.com\\/u?app_id=1549608413453462\",\"item_avatar\":\"http:\\/\\/pic.rmb.bdstatic.com\\/89e9e167d806065171363ecbcb708a41.png@c_1,w_812,h_812,x_63,y_39\",\"siteName\":\"环球网\"}\n", + "{\"bjh_na\":{\"easyBrowse\":\"1\",\"easyBrowseConfirm\":\"1\",\"nid\":\"10687708877099983703\",\"thread_id\":\"1122000033425687\",\"type\":\"news\"},\"eventDescription\":\"上海新增境外输入4例,已追踪同航班密接者106人\",\"eventTime\":\"1597190188\",\"eventUrl\":\"http:\\/\\/baijiahao.baidu.com\\/s?id=1674775298876647449\",\"homepageUrl\":\"https:\\/\\/baijiahao.baidu.com\\/u?app_id=1601149438053974\",\"item_avatar\":\"http:\\/\\/pic.rmb.bdstatic.com\\/814a110b34d52c106f9ba99c5d415478.jpeg\",\"siteName\":\"北京日报客户端\"}\n", + "{\"bjh_na\":{\"easyBrowse\":\"1\",\"easyBrowseConfirm\":\"1\",\"nid\":\"9974943368943181587\",\"thread_id\":\"1088000033375829\",\"type\":\"news\"},\"eventDescription\":\"新疆昨日新增确诊13例,新增无症状感染者11例,均在乌鲁木齐市\",\"eventTime\":\"1597105476\",\"eventUrl\":\"http:\\/\\/baijiahao.baidu.com\\/s?id=1674686471966488007\",\"homepageUrl\":\"https:\\/\\/baijiahao.baidu.com\\/u?app_id=1561825967470087\",\"item_avatar\":\"http:\\/\\/pic.rmb.bdstatic.com\\/bjh\\/user\\/b5b87556f7efcdf3c67fb0fd355dcaac.jpeg\",\"siteName\":\"红星新闻\"}\n", + "{\"bjh_na\":{\"easyBrowse\":\"1\",\"easyBrowseConfirm\":\"1\",\"nid\":\"9619452370238472138\",\"thread_id\":\"1007000033365024\",\"type\":\"news\"},\"eventDescription\":\"昨天上海新增8例境外输入病例:均为中国籍,在阿联酋工作\",\"eventTime\":\"1597103162\",\"eventUrl\":\"http:\\/\\/baijiahao.baidu.com\\/s?id=1674684045559410432\",\"homepageUrl\":\"https:\\/\\/baijiahao.baidu.com\\/u?app_id=1561825967470087\",\"item_avatar\":\"http:\\/\\/pic.rmb.bdstatic.com\\/bjh\\/user\\/b5b87556f7efcdf3c67fb0fd355dcaac.jpeg\",\"siteName\":\"红星新闻\"}\n", + "{\"bjh_na\":{\"easyBrowse\":\"1\",\"easyBrowseConfirm\":\"1\",\"nid\":\"9507350360885120833\",\"thread_id\":\"1063000033345790\",\"type\":\"news\"},\"eventDescription\":\"辽宁省新增2例境外输入确诊病例,分别来自美国和菲律宾\",\"eventTime\":\"1597057109\",\"eventUrl\":\"http:\\/\\/baijiahao.baidu.com\\/s?id=1674636151291490440\",\"homepageUrl\":\"http:\\/\\/baijiahao.baidu.com\\/u?app_id=1549608413453462\",\"item_avatar\":\"http:\\/\\/timg01.bdimg.com\\/timg?pacompress=&imgtype=0&sec=1439619614&di=3fb711aa6bf5a0beda833cb57989fa42&quality=90&size=b870_10000&src=http%3A%2F%2Fpic.rmb.bdstatic.com%2F2f338c67bc166c145cd457c01e102de5.jpeg\",\"siteName\":\"环球网\"}\n", + "{\"bjh_na\":{\"easyBrowse\":\"1\",\"easyBrowseConfirm\":\"1\",\"nid\":\"10041585112433645986\",\"thread_id\":\"1009000033336073\",\"type\":\"news\"},\"eventDescription\":\"香港新增69例确诊病例 连续8天少于100例\",\"eventTime\":\"1597047600\",\"eventUrl\":\"http:\\/\\/baijiahao.baidu.com\\/s?id=1674626913670664857\",\"homepageUrl\":\"https:\\/\\/baijiahao.baidu.com\\/u?app_id=1552864910655429\",\"item_avatar\":\"http:\\/\\/pic.rmb.bdstatic.com\\/d2f919e031c47d671a5748b5aeafe096.jpeg\",\"siteName\":\"人民日报海外网\"}\n", + "{\"bjh_na\":{\"easyBrowse\":\"1\",\"easyBrowseConfirm\":\"1\",\"nid\":\"9094183316011565694\",\"thread_id\":\"1067000033325987\",\"type\":\"news\"},\"eventDescription\":\"韩国境外输入性新冠病例中检出3例病毒变异\",\"eventTime\":\"1597045994\",\"eventUrl\":\"http:\\/\\/baijiahao.baidu.com\\/s?id=1674624304511109399\",\"homepageUrl\":\"https:\\/\\/baijiahao.baidu.com\\/u?app_id=1565176801499628\",\"item_avatar\":\"http:\\/\\/pic.rmb.bdstatic.com\\/8963c44fe9bb2de078a778f71a32f6eb.jpeg\",\"siteName\":\"界面新闻\"}\n", + "{\"bjh_na\":{\"easyBrowse\":\"1\",\"easyBrowseConfirm\":\"1\",\"nid\":\"10678170579397374006\",\"thread_id\":\"1091000033323462\",\"type\":\"news\"},\"eventDescription\":\"新疆新增新冠确诊病例14例,无症状感染者7例,均在乌鲁木齐市\",\"eventTime\":\"1597019819\",\"eventUrl\":\"http:\\/\\/baijiahao.baidu.com\\/s?id=1674596654587658867\",\"homepageUrl\":\"https:\\/\\/baijiahao.baidu.com\\/u?app_id=1561825967470087\",\"item_avatar\":\"http:\\/\\/pic.rmb.bdstatic.com\\/bjh\\/user\\/b5b87556f7efcdf3c67fb0fd355dcaac.jpeg\",\"siteName\":\"红星新闻\"}\n", + "{\"bjh_na\":{\"easyBrowse\":\"1\",\"easyBrowseConfirm\":\"1\",\"nid\":\"10594182283454069402\",\"thread_id\":\"1117000033305820\",\"type\":\"news\"},\"eventDescription\":\"北京连续三日零新增!\",\"eventTime\":\"1597019546\",\"eventUrl\":\"http:\\/\\/baijiahao.baidu.com\\/s?id=1674596367795749792\",\"homepageUrl\":\"https:\\/\\/baijiahao.baidu.com\\/u?app_id=1601149438053974\",\"item_avatar\":\"http:\\/\\/pic.rmb.bdstatic.com\\/814a110b34d52c106f9ba99c5d415478.jpeg\",\"siteName\":\"北京日报客户端\"}\n", + "{\"bjh_na\":{\"easyBrowse\":\"1\",\"easyBrowseConfirm\":\"1\",\"nid\":\"9838778629077913962\",\"thread_id\":\"1022000033302920\",\"type\":\"news\"},\"eventDescription\":\"广东新增境外输入3例,入境后即被隔离观察\",\"eventTime\":\"1597018528\",\"eventUrl\":\"http:\\/\\/baijiahao.baidu.com\\/s?id=1674595300038757825\",\"homepageUrl\":\"https:\\/\\/baijiahao.baidu.com\\/u?app_id=1601149438053974\",\"item_avatar\":\"http:\\/\\/pic.rmb.bdstatic.com\\/814a110b34d52c106f9ba99c5d415478.jpeg\",\"siteName\":\"北京日报客户端\"}\n", + "{\"bjh_na\":{\"easyBrowse\":\"1\",\"easyBrowseConfirm\":\"1\",\"nid\":\"9564560403354013811\",\"thread_id\":\"1018000033303382\",\"type\":\"news\"},\"eventDescription\":\"上海昨日无新增本地新冠肺炎确诊病例,新增境外输入18例\",\"eventTime\":\"1597016945\",\"eventUrl\":\"http:\\/\\/baijiahao.baidu.com\\/s?id=1674593640123663447\",\"homepageUrl\":\"https:\\/\\/baijiahao.baidu.com\\/u?app_id=1549608413453462\",\"item_avatar\":\"http:\\/\\/pic.rmb.bdstatic.com\\/89e9e167d806065171363ecbcb708a41.png@c_1,w_812,h_812,x_63,y_39\",\"siteName\":\"环球网\"}\n", + "{\"bjh_na\":{\"easyBrowse\":\"1\",\"easyBrowseConfirm\":\"1\",\"nid\":\"10189864705033416741\",\"thread_id\":\"1051000033311081\",\"type\":\"news\"},\"eventDescription\":\"重庆昨日新增无症状感染者1例,为新加坡输入\",\"eventTime\":\"1597015727\",\"eventUrl\":\"http:\\/\\/baijiahao.baidu.com\\/s?id=1674592612999522909\",\"homepageUrl\":\"https:\\/\\/baijiahao.baidu.com\\/u?app_id=1549608413453462\",\"item_avatar\":\"http:\\/\\/pic.rmb.bdstatic.com\\/89e9e167d806065171363ecbcb708a41.png@c_1,w_812,h_812,x_63,y_39\",\"siteName\":\"环球网\"}\n", + "{\"bjh_na\":{\"easyBrowse\":\"1\",\"easyBrowseConfirm\":\"1\",\"nid\":\"9484762286110882343\",\"thread_id\":\"1031000033318117\",\"type\":\"news\"},\"eventDescription\":\"一文读懂全球疫情:全球累计确诊超1999万例 美国暴发沙门氏菌疫情\",\"eventTime\":\"1597013216\",\"eventUrl\":\"http:\\/\\/baijiahao.baidu.com\\/s?id=1674590351892834223\",\"homepageUrl\":\"https:\\/\\/baijiahao.baidu.com\\/u?app_id=1552864910655429\",\"item_avatar\":\"http:\\/\\/pic.rmb.bdstatic.com\\/d2f919e031c47d671a5748b5aeafe096.jpeg\",\"siteName\":\"人民日报海外网\"}\n", + "{\"bjh_na\":{\"easyBrowse\":\"1\",\"easyBrowseConfirm\":\"1\",\"nid\":\"10031387837680691121\",\"thread_id\":\"1047000033289493\",\"type\":\"news\"},\"eventDescription\":\"香港新增72例新冠确诊病例,连续7日少于100例\",\"eventTime\":\"1596962023\",\"eventUrl\":\"http:\\/\\/baijiahao.baidu.com\\/s?id=1674536151578789920\",\"homepageUrl\":\"https:\\/\\/baijiahao.baidu.com\\/u?app_id=1565176801499628\",\"item_avatar\":\"http:\\/\\/pic.rmb.bdstatic.com\\/8963c44fe9bb2de078a778f71a32f6eb.jpeg\",\"siteName\":\"界面新闻\"}\n", + "{\"bjh_na\":{\"easyBrowse\":\"1\",\"easyBrowseConfirm\":\"1\",\"nid\":\"8870194084231278988\",\"thread_id\":\"1032000033266987\",\"type\":\"news\"},\"eventDescription\":\"新疆新增确诊病例15例,均在乌鲁木齐\",\"eventTime\":\"1596933298\",\"eventUrl\":\"http:\\/\\/baijiahao.baidu.com\\/s?id=1674505943705180172\",\"homepageUrl\":\"https:\\/\\/baijiahao.baidu.com\\/u?app_id=1566453612428800\",\"item_avatar\":\"http:\\/\\/pic.rmb.bdstatic.com\\/9da74a517eb1befeba93a5f3167cc74b.jpeg\",\"siteName\":\"新京报\"}\n", + "{\"bjh_na\":{\"easyBrowse\":\"1\",\"easyBrowseConfirm\":\"1\",\"nid\":\"9364628799509405044\",\"thread_id\":\"1122000033252294\",\"type\":\"news\"},\"eventDescription\":\"31省区市新增确诊病例23例,其中本土病例15例(均在新疆)\",\"eventTime\":\"1596930618\",\"eventUrl\":\"http:\\/\\/baijiahao.baidu.com\\/s?id=1674503120087258676\",\"homepageUrl\":\"https:\\/\\/baijiahao.baidu.com\\/u?app_id=1589908044211516\",\"item_avatar\":\"http:\\/\\/pic.rmb.bdstatic.com\\/ec0438088a2c4589fb541fbc12b448f0.jpeg\",\"siteName\":\"健康中国\"}\n", + "{\"bjh_na\":{\"easyBrowse\":\"1\",\"easyBrowseConfirm\":\"1\",\"nid\":\"9965173851761410585\",\"thread_id\":\"1002000033232184\",\"type\":\"news\"},\"eventDescription\":\"香港新增69例新冠确诊病例,累计超过4000例\",\"eventTime\":\"1596875762\",\"eventUrl\":\"http:\\/\\/baijiahao.baidu.com\\/s?id=1674445865981756049\",\"homepageUrl\":\"https:\\/\\/baijiahao.baidu.com\\/u?app_id=1565176801499628\",\"item_avatar\":\"http:\\/\\/pic.rmb.bdstatic.com\\/8963c44fe9bb2de078a778f71a32f6eb.jpeg\",\"siteName\":\"界面新闻\"}\n", + "{\"bjh_na\":{\"easyBrowse\":\"1\",\"easyBrowseConfirm\":\"1\",\"nid\":\"10269292514474689323\",\"thread_id\":\"1005000033194943\",\"type\":\"news\"},\"eventDescription\":\"截至8月7日24时新型冠状病毒肺炎疫情最新情况\",\"eventTime\":\"1596848726\",\"eventUrl\":\"http:\\/\\/baijiahao.baidu.com\\/s?id=1674417127284939659\",\"homepageUrl\":\"https:\\/\\/baijiahao.baidu.com\\/u?app_id=1589908044211516\",\"item_avatar\":\"http:\\/\\/pic.rmb.bdstatic.com\\/ec0438088a2c4589fb541fbc12b448f0.jpeg\",\"siteName\":\"健康中国\"}\n", + "{\"bjh_na\":{\"easyBrowse\":\"1\",\"easyBrowseConfirm\":\"1\",\"nid\":\"9285567187511393140\",\"thread_id\":\"1087000033199166\",\"type\":\"news\"},\"eventDescription\":\"北京8月7日无新增报告新冠肺炎确诊病例\",\"eventTime\":\"1596846935\",\"eventUrl\":\"http:\\/\\/baijiahao.baidu.com\\/s?id=1674415443327545029\",\"homepageUrl\":\"https:\\/\\/baijiahao.baidu.com\\/u?app_id=1636203306265037\",\"item_avatar\":\"http:\\/\\/pic.rmb.bdstatic.com\\/10c0099fe5c2ef55fbd758a03a6ecafa.jpeg@c_1,w_300,h_300,x_0,y_0\",\"siteName\":\"青瞳视角\"}\n", + "{\"bjh_na\":{\"easyBrowse\":\"1\",\"easyBrowseConfirm\":\"1\",\"nid\":\"9141704573616219966\",\"thread_id\":\"1055000033207982\",\"type\":\"news\"},\"eventDescription\":\"新疆新增确诊病例25例、无症状感染者8例\",\"eventTime\":\"1596846320\",\"eventUrl\":\"http:\\/\\/baijiahao.baidu.com\\/s?id=1674414780683115272\",\"homepageUrl\":\"https:\\/\\/baijiahao.baidu.com\\/u?app_id=1566453612428800\",\"item_avatar\":\"http:\\/\\/pic.rmb.bdstatic.com\\/9da74a517eb1befeba93a5f3167cc74b.jpeg\",\"siteName\":\"新京报\"}\n", + "{\"bjh_na\":{\"easyBrowse\":\"1\",\"easyBrowseConfirm\":\"1\",\"nid\":\"8385504052586009526\",\"thread_id\":\"1049000033197879\",\"type\":\"news\"},\"eventDescription\":\"辽宁连续两日无新增本土确诊病例 新增治愈出院3例\",\"eventTime\":\"1596844828\",\"eventUrl\":\"http:\\/\\/baijiahao.baidu.com\\/s?id=1674413542605070060\",\"homepageUrl\":\"https:\\/\\/baijiahao.baidu.com\\/u?app_id=1549608413453462\",\"item_avatar\":\"http:\\/\\/pic.rmb.bdstatic.com\\/89e9e167d806065171363ecbcb708a41.png@c_1,w_812,h_812,x_63,y_39\",\"siteName\":\"环球网\"}\n", + "{\"bjh_na\":{\"easyBrowse\":\"1\",\"easyBrowseConfirm\":\"1\",\"nid\":\"10340470323184094852\",\"thread_id\":\"1006000033182361\",\"type\":\"news\"},\"eventDescription\":\"新疆目前疫情形势如何?本轮疫情有何特点?最新解读来了\",\"eventTime\":\"1596812732\",\"eventUrl\":\"http:\\/\\/baijiahao.baidu.com\\/s?id=1674379507756801699\",\"homepageUrl\":\"https:\\/\\/baijiahao.baidu.com\\/u?app_id=1601149438053974\",\"item_avatar\":\"http:\\/\\/pic.rmb.bdstatic.com\\/814a110b34d52c106f9ba99c5d415478.jpeg\",\"siteName\":\"北京日报客户端\"}\n", + "{\"bjh_na\":{\"easyBrowse\":\"1\",\"easyBrowseConfirm\":\"1\",\"nid\":\"9212218199886086839\",\"thread_id\":\"1073000033176131\",\"type\":\"news\"},\"eventDescription\":\"对话地坛医院专家陈志海:半年抗疫 “新冠”刷新认知\",\"eventTime\":\"1596800212\",\"eventUrl\":\"http:\\/\\/baijiahao.baidu.com\\/s?id=1674366411525373651\",\"homepageUrl\":\"https:\\/\\/baijiahao.baidu.com\\/u?app_id=1566453612428800\",\"item_avatar\":\"http:\\/\\/pic.rmb.bdstatic.com\\/9da74a517eb1befeba93a5f3167cc74b.jpeg\",\"siteName\":\"新京报\"}\n", + "{\"bjh_na\":{\"easyBrowse\":\"1\",\"easyBrowseConfirm\":\"1\",\"nid\":\"9522441535993403492\",\"thread_id\":\"1112000033153621\",\"type\":\"news\"},\"eventDescription\":\"林郑月娥:港府将免费为全港市民进行新冠病毒检测\",\"eventTime\":\"1596785041\",\"eventUrl\":\"http:\\/\\/baijiahao.baidu.com\\/s?id=1674350469425444439\",\"homepageUrl\":\"https:\\/\\/baijiahao.baidu.com\\/u?app_id=1552864910655429\",\"item_avatar\":\"http:\\/\\/pic.rmb.bdstatic.com\\/d2f919e031c47d671a5748b5aeafe096.jpeg\",\"siteName\":\"人民日报海外网\"}\n", + "{\"bjh_na\":{\"easyBrowse\":\"1\",\"easyBrowseConfirm\":\"1\",\"nid\":\"8873266033734356254\",\"thread_id\":\"1040000033143344\",\"type\":\"news\"},\"eventDescription\":\"北京新增大连疫情关联病例1例,新发地疫情病例清零\",\"eventTime\":\"1596760416\",\"eventUrl\":\"http:\\/\\/baijiahao.baidu.com\\/s?id=1674324650362213371\",\"homepageUrl\":\"https:\\/\\/baijiahao.baidu.com\\/u?app_id=1601149438053974\",\"item_avatar\":\"http:\\/\\/pic.rmb.bdstatic.com\\/814a110b34d52c106f9ba99c5d415478.jpeg\",\"siteName\":\"北京日报客户端\"}\n", + "{\"bjh_na\":{\"easyBrowse\":\"1\",\"easyBrowseConfirm\":\"1\",\"nid\":\"9716904130394627526\",\"thread_id\":\"1027000033142735\",\"type\":\"news\"},\"eventDescription\":\"31省市区新增确诊37例,其中本土病例27例(新疆26例 北京1例)\",\"eventTime\":\"1596760201\",\"eventUrl\":\"http:\\/\\/baijiahao.baidu.com\\/s?id=1674324373876029599\",\"homepageUrl\":\"https:\\/\\/baijiahao.baidu.com\\/u?app_id=1589908044211516\",\"item_avatar\":\"http:\\/\\/pic.rmb.bdstatic.com\\/ec0438088a2c4589fb541fbc12b448f0.jpeg\",\"siteName\":\"健康中国\"}\n", + "{\"bjh_na\":{\"easyBrowse\":\"1\",\"easyBrowseConfirm\":\"1\",\"nid\":\"9549964897517921046\",\"thread_id\":\"1033000033139415\",\"type\":\"news\"},\"eventDescription\":\"新疆(含兵团)新增确诊病例26例,新增无症状感染者10例\",\"eventTime\":\"1596759965\",\"eventUrl\":\"http:\\/\\/baijiahao.baidu.com\\/s?id=1674324177292404207\",\"homepageUrl\":\"https:\\/\\/baijiahao.baidu.com\\/u?app_id=1561825967470087\",\"item_avatar\":\"http:\\/\\/pic.rmb.bdstatic.com\\/bjh\\/user\\/b5b87556f7efcdf3c67fb0fd355dcaac.jpeg\",\"siteName\":\"红星新闻\"}\n", + "eventTime: , eventDescription: 北京连续6日零新增, siteName: 北京日报客户端, eventUrl: http://baijiahao.baidu.com/s?id=1674868131526596687, homepageUrl: https://baijiahao.baidu.com/u?app_id=1601149438053974, item_avatar: http://pic.rmb.bdstatic.com/814a110b34d52c106f9ba99c5d415478.jpeg\n", + "++++++++++++++++++++++++++++++++++++++++++++++++++\n", + "eventTime: , eventDescription: 国家卫健委:昨日新增确诊病例19例 本土病例8例, siteName: 环球时报, eventUrl: http://baijiahao.baidu.com/s?id=1674867651765148223, homepageUrl: https://baijiahao.baidu.com/u?app_id=1551968238585112, item_avatar: http://pic.rmb.bdstatic.com/07feda26036edb976980ce1e803de532.jpeg@c_1,w_640,h_640,x_0,y_0\n", + "++++++++++++++++++++++++++++++++++++++++++++++++++\n", + "eventTime: , eventDescription: 新疆(含兵团)新增新冠肺炎确诊病例8例 均在乌鲁木齐市, siteName: 环球网, eventUrl: http://baijiahao.baidu.com/s?id=1674867007109286633, homepageUrl: https://baijiahao.baidu.com/u?app_id=1549608413453462, item_avatar: http://pic.rmb.bdstatic.com/89e9e167d806065171363ecbcb708a41.png@c_1,w_812,h_812,x_63,y_39\n", + "++++++++++++++++++++++++++++++++++++++++++++++++++\n", + "eventTime: , eventDescription: 上海昨日无新增本地新冠肺炎确诊病例,新增境外输入2例, siteName: 环球网, eventUrl: http://baijiahao.baidu.com/s?id=1674865718710262411, homepageUrl: https://baijiahao.baidu.com/u?app_id=1549608413453462, item_avatar: http://pic.rmb.bdstatic.com/89e9e167d806065171363ecbcb708a41.png@c_1,w_812,h_812,x_63,y_39\n", + "++++++++++++++++++++++++++++++++++++++++++++++++++\n", + "eventTime: , eventDescription: 一境外输入确诊患者治愈数月后复阳 目前在上海隔离诊治, siteName: 新京报, eventUrl: http://baijiahao.baidu.com/s?id=1674823034158688355, homepageUrl: https://baijiahao.baidu.com/u?app_id=1566453612428800, item_avatar: http://pic.rmb.bdstatic.com/9da74a517eb1befeba93a5f3167cc74b.jpeg\n", + "++++++++++++++++++++++++++++++++++++++++++++++++++\n", + "eventTime: , eventDescription: 香港新增62例确诊病例 连续10天少于100例, siteName: 人民日报海外网, eventUrl: http://baijiahao.baidu.com/s?id=1674808294135893351, homepageUrl: https://baijiahao.baidu.com/u?app_id=1552864910655429, item_avatar: http://pic.rmb.bdstatic.com/d2f919e031c47d671a5748b5aeafe096.jpeg\n", + "++++++++++++++++++++++++++++++++++++++++++++++++++\n", + "eventTime: , eventDescription: 安徽芜湖一餐饮店进口冻虾新冠病毒检出疑似阳性, siteName: 央视新闻, eventUrl: http://baijiahao.baidu.com/s?id=1674804105747106594, homepageUrl: http://baijiahao.baidu.com/u?app_id=1570168240515616, item_avatar: http://pic.rmb.bdstatic.com/0026e9a191787761348be081d428a354.jpeg\n", + "++++++++++++++++++++++++++++++++++++++++++++++++++\n", + "eventTime: , eventDescription: 国家卫健委:昨日新增确诊25例,其中本土病例9例(均在新疆), siteName: 红星新闻, eventUrl: http://baijiahao.baidu.com/s?id=1674772359564935639, homepageUrl: https://baijiahao.baidu.com/u?app_id=1561825967470087, item_avatar: http://pic.rmb.bdstatic.com/bjh/user/b5b87556f7efcdf3c67fb0fd355dcaac.jpeg\n", + "++++++++++++++++++++++++++++++++++++++++++++++++++\n", + "eventTime: , eventDescription: 北京连续5天无新增确诊病例, siteName: 北京日报客户端, eventUrl: http://baijiahao.baidu.com/s?id=1674777629341250609, homepageUrl: http://baijiahao.baidu.com/u?app_id=1601149438053974, item_avatar: http://pic.rmb.bdstatic.com/73c41abfdd401cc72e04d64b3ce0103d.jpeg\n", + "++++++++++++++++++++++++++++++++++++++++++++++++++\n", + "eventTime: , eventDescription: 广东新增境外输入确诊病例6例:广州报告1例、珠海5例, siteName: 红星新闻, eventUrl: http://baijiahao.baidu.com/s?id=1674777366221789944, homepageUrl: https://baijiahao.baidu.com/u?app_id=1561825967470087, item_avatar: http://pic.rmb.bdstatic.com/bjh/user/b5b87556f7efcdf3c67fb0fd355dcaac.jpeg\n", + "++++++++++++++++++++++++++++++++++++++++++++++++++\n", + "eventTime: , eventDescription: 新疆报告新增新冠肺炎确诊病例9例,新增无症状感染者8例, siteName: 环球网, eventUrl: http://baijiahao.baidu.com/s?id=1674776890002663976, homepageUrl: https://baijiahao.baidu.com/u?app_id=1549608413453462, item_avatar: http://pic.rmb.bdstatic.com/89e9e167d806065171363ecbcb708a41.png@c_1,w_812,h_812,x_63,y_39\n", + "++++++++++++++++++++++++++++++++++++++++++++++++++\n", + "eventTime: , eventDescription: 上海新增境外输入4例,已追踪同航班密接者106人, siteName: 北京日报客户端, eventUrl: http://baijiahao.baidu.com/s?id=1674775298876647449, homepageUrl: https://baijiahao.baidu.com/u?app_id=1601149438053974, item_avatar: http://pic.rmb.bdstatic.com/814a110b34d52c106f9ba99c5d415478.jpeg\n", + "++++++++++++++++++++++++++++++++++++++++++++++++++\n", + "eventTime: , eventDescription: 新疆昨日新增确诊13例,新增无症状感染者11例,均在乌鲁木齐市, siteName: 红星新闻, eventUrl: http://baijiahao.baidu.com/s?id=1674686471966488007, homepageUrl: https://baijiahao.baidu.com/u?app_id=1561825967470087, item_avatar: http://pic.rmb.bdstatic.com/bjh/user/b5b87556f7efcdf3c67fb0fd355dcaac.jpeg\n", + "++++++++++++++++++++++++++++++++++++++++++++++++++\n", + "eventTime: , eventDescription: 昨天上海新增8例境外输入病例:均为中国籍,在阿联酋工作, siteName: 红星新闻, eventUrl: http://baijiahao.baidu.com/s?id=1674684045559410432, homepageUrl: https://baijiahao.baidu.com/u?app_id=1561825967470087, item_avatar: http://pic.rmb.bdstatic.com/bjh/user/b5b87556f7efcdf3c67fb0fd355dcaac.jpeg\n", + "++++++++++++++++++++++++++++++++++++++++++++++++++\n", + "eventTime: , eventDescription: 辽宁省新增2例境外输入确诊病例,分别来自美国和菲律宾, siteName: 环球网, eventUrl: http://baijiahao.baidu.com/s?id=1674636151291490440, homepageUrl: http://baijiahao.baidu.com/u?app_id=1549608413453462, item_avatar: http://timg01.bdimg.com/timg?pacompress=&imgtype=0&sec=1439619614&di=3fb711aa6bf5a0beda833cb57989fa42&quality=90&size=b870_10000&src=http%3A%2F%2Fpic.rmb.bdstatic.com%2F2f338c67bc166c145cd457c01e102de5.jpeg\n", + "++++++++++++++++++++++++++++++++++++++++++++++++++\n", + "eventTime: , eventDescription: 香港新增69例确诊病例 连续8天少于100例, siteName: 人民日报海外网, eventUrl: http://baijiahao.baidu.com/s?id=1674626913670664857, homepageUrl: https://baijiahao.baidu.com/u?app_id=1552864910655429, item_avatar: http://pic.rmb.bdstatic.com/d2f919e031c47d671a5748b5aeafe096.jpeg\n", + "++++++++++++++++++++++++++++++++++++++++++++++++++\n", + "eventTime: , eventDescription: 韩国境外输入性新冠病例中检出3例病毒变异, siteName: 界面新闻, eventUrl: http://baijiahao.baidu.com/s?id=1674624304511109399, homepageUrl: https://baijiahao.baidu.com/u?app_id=1565176801499628, item_avatar: http://pic.rmb.bdstatic.com/8963c44fe9bb2de078a778f71a32f6eb.jpeg\n", + "++++++++++++++++++++++++++++++++++++++++++++++++++\n", + "eventTime: , eventDescription: 新疆新增新冠确诊病例14例,无症状感染者7例,均在乌鲁木齐市, siteName: 红星新闻, eventUrl: http://baijiahao.baidu.com/s?id=1674596654587658867, homepageUrl: https://baijiahao.baidu.com/u?app_id=1561825967470087, item_avatar: http://pic.rmb.bdstatic.com/bjh/user/b5b87556f7efcdf3c67fb0fd355dcaac.jpeg\n", + "++++++++++++++++++++++++++++++++++++++++++++++++++\n", + "eventTime: , eventDescription: 北京连续三日零新增!, siteName: 北京日报客户端, eventUrl: http://baijiahao.baidu.com/s?id=1674596367795749792, homepageUrl: https://baijiahao.baidu.com/u?app_id=1601149438053974, item_avatar: http://pic.rmb.bdstatic.com/814a110b34d52c106f9ba99c5d415478.jpeg\n", + "++++++++++++++++++++++++++++++++++++++++++++++++++\n", + "eventTime: , eventDescription: 广东新增境外输入3例,入境后即被隔离观察, siteName: 北京日报客户端, eventUrl: http://baijiahao.baidu.com/s?id=1674595300038757825, homepageUrl: https://baijiahao.baidu.com/u?app_id=1601149438053974, item_avatar: http://pic.rmb.bdstatic.com/814a110b34d52c106f9ba99c5d415478.jpeg\n", + "++++++++++++++++++++++++++++++++++++++++++++++++++\n", + "eventTime: , eventDescription: 上海昨日无新增本地新冠肺炎确诊病例,新增境外输入18例, siteName: 环球网, eventUrl: http://baijiahao.baidu.com/s?id=1674593640123663447, homepageUrl: https://baijiahao.baidu.com/u?app_id=1549608413453462, item_avatar: http://pic.rmb.bdstatic.com/89e9e167d806065171363ecbcb708a41.png@c_1,w_812,h_812,x_63,y_39\n", + "++++++++++++++++++++++++++++++++++++++++++++++++++\n", + "eventTime: , eventDescription: 重庆昨日新增无症状感染者1例,为新加坡输入, siteName: 环球网, eventUrl: http://baijiahao.baidu.com/s?id=1674592612999522909, homepageUrl: https://baijiahao.baidu.com/u?app_id=1549608413453462, item_avatar: http://pic.rmb.bdstatic.com/89e9e167d806065171363ecbcb708a41.png@c_1,w_812,h_812,x_63,y_39\n", + "++++++++++++++++++++++++++++++++++++++++++++++++++\n", + "eventTime: , eventDescription: 一文读懂全球疫情:全球累计确诊超1999万例 美国暴发沙门氏菌疫情, siteName: 人民日报海外网, eventUrl: http://baijiahao.baidu.com/s?id=1674590351892834223, homepageUrl: https://baijiahao.baidu.com/u?app_id=1552864910655429, item_avatar: http://pic.rmb.bdstatic.com/d2f919e031c47d671a5748b5aeafe096.jpeg\n", + "++++++++++++++++++++++++++++++++++++++++++++++++++\n", + "eventTime: , eventDescription: 香港新增72例新冠确诊病例,连续7日少于100例, siteName: 界面新闻, eventUrl: http://baijiahao.baidu.com/s?id=1674536151578789920, homepageUrl: https://baijiahao.baidu.com/u?app_id=1565176801499628, item_avatar: http://pic.rmb.bdstatic.com/8963c44fe9bb2de078a778f71a32f6eb.jpeg\n", + "++++++++++++++++++++++++++++++++++++++++++++++++++\n", + "eventTime: , eventDescription: 新疆新增确诊病例15例,均在乌鲁木齐, siteName: 新京报, eventUrl: http://baijiahao.baidu.com/s?id=1674505943705180172, homepageUrl: https://baijiahao.baidu.com/u?app_id=1566453612428800, item_avatar: http://pic.rmb.bdstatic.com/9da74a517eb1befeba93a5f3167cc74b.jpeg\n", + "++++++++++++++++++++++++++++++++++++++++++++++++++\n", + "eventTime: , eventDescription: 31省区市新增确诊病例23例,其中本土病例15例(均在新疆), siteName: 健康中国, eventUrl: http://baijiahao.baidu.com/s?id=1674503120087258676, homepageUrl: https://baijiahao.baidu.com/u?app_id=1589908044211516, item_avatar: http://pic.rmb.bdstatic.com/ec0438088a2c4589fb541fbc12b448f0.jpeg\n", + "++++++++++++++++++++++++++++++++++++++++++++++++++\n", + "eventTime: , eventDescription: 香港新增69例新冠确诊病例,累计超过4000例, siteName: 界面新闻, eventUrl: http://baijiahao.baidu.com/s?id=1674445865981756049, homepageUrl: https://baijiahao.baidu.com/u?app_id=1565176801499628, item_avatar: http://pic.rmb.bdstatic.com/8963c44fe9bb2de078a778f71a32f6eb.jpeg\n", + "++++++++++++++++++++++++++++++++++++++++++++++++++\n", + "eventTime: , eventDescription: 截至8月7日24时新型冠状病毒肺炎疫情最新情况, siteName: 健康中国, eventUrl: http://baijiahao.baidu.com/s?id=1674417127284939659, homepageUrl: https://baijiahao.baidu.com/u?app_id=1589908044211516, item_avatar: http://pic.rmb.bdstatic.com/ec0438088a2c4589fb541fbc12b448f0.jpeg\n", + "++++++++++++++++++++++++++++++++++++++++++++++++++\n", + "eventTime: , eventDescription: 北京8月7日无新增报告新冠肺炎确诊病例, siteName: 青瞳视角, eventUrl: http://baijiahao.baidu.com/s?id=1674415443327545029, homepageUrl: https://baijiahao.baidu.com/u?app_id=1636203306265037, item_avatar: http://pic.rmb.bdstatic.com/10c0099fe5c2ef55fbd758a03a6ecafa.jpeg@c_1,w_300,h_300,x_0,y_0\n", + "++++++++++++++++++++++++++++++++++++++++++++++++++\n", + "eventTime: , eventDescription: 新疆新增确诊病例25例、无症状感染者8例, siteName: 新京报, eventUrl: http://baijiahao.baidu.com/s?id=1674414780683115272, homepageUrl: https://baijiahao.baidu.com/u?app_id=1566453612428800, item_avatar: http://pic.rmb.bdstatic.com/9da74a517eb1befeba93a5f3167cc74b.jpeg\n", + "++++++++++++++++++++++++++++++++++++++++++++++++++\n", + "eventTime: , eventDescription: 辽宁连续两日无新增本土确诊病例 新增治愈出院3例, siteName: 环球网, eventUrl: http://baijiahao.baidu.com/s?id=1674413542605070060, homepageUrl: https://baijiahao.baidu.com/u?app_id=1549608413453462, item_avatar: http://pic.rmb.bdstatic.com/89e9e167d806065171363ecbcb708a41.png@c_1,w_812,h_812,x_63,y_39\n", + "++++++++++++++++++++++++++++++++++++++++++++++++++\n", + "eventTime: , eventDescription: 新疆目前疫情形势如何?本轮疫情有何特点?最新解读来了, siteName: 北京日报客户端, eventUrl: http://baijiahao.baidu.com/s?id=1674379507756801699, homepageUrl: https://baijiahao.baidu.com/u?app_id=1601149438053974, item_avatar: http://pic.rmb.bdstatic.com/814a110b34d52c106f9ba99c5d415478.jpeg\n", + "++++++++++++++++++++++++++++++++++++++++++++++++++\n", + "eventTime: , eventDescription: 对话地坛医院专家陈志海:半年抗疫 “新冠”刷新认知, siteName: 新京报, eventUrl: http://baijiahao.baidu.com/s?id=1674366411525373651, homepageUrl: https://baijiahao.baidu.com/u?app_id=1566453612428800, item_avatar: http://pic.rmb.bdstatic.com/9da74a517eb1befeba93a5f3167cc74b.jpeg\n", + "++++++++++++++++++++++++++++++++++++++++++++++++++\n", + "eventTime: , eventDescription: 林郑月娥:港府将免费为全港市民进行新冠病毒检测, siteName: 人民日报海外网, eventUrl: http://baijiahao.baidu.com/s?id=1674350469425444439, homepageUrl: https://baijiahao.baidu.com/u?app_id=1552864910655429, item_avatar: http://pic.rmb.bdstatic.com/d2f919e031c47d671a5748b5aeafe096.jpeg\n", + "++++++++++++++++++++++++++++++++++++++++++++++++++\n", + "eventTime: , eventDescription: 北京新增大连疫情关联病例1例,新发地疫情病例清零, siteName: 北京日报客户端, eventUrl: http://baijiahao.baidu.com/s?id=1674324650362213371, homepageUrl: https://baijiahao.baidu.com/u?app_id=1601149438053974, item_avatar: http://pic.rmb.bdstatic.com/814a110b34d52c106f9ba99c5d415478.jpeg\n", + "++++++++++++++++++++++++++++++++++++++++++++++++++\n", + "eventTime: , eventDescription: 31省市区新增确诊37例,其中本土病例27例(新疆26例 北京1例), siteName: 健康中国, eventUrl: http://baijiahao.baidu.com/s?id=1674324373876029599, homepageUrl: https://baijiahao.baidu.com/u?app_id=1589908044211516, item_avatar: http://pic.rmb.bdstatic.com/ec0438088a2c4589fb541fbc12b448f0.jpeg\n", + "++++++++++++++++++++++++++++++++++++++++++++++++++\n", + "eventTime: 1596759965, eventDescription: 新疆(含兵团)新增确诊病例26例,新增无症状感染者10例, siteName: 红星新闻, eventUrl: http://baijiahao.baidu.com/s?id=1674324177292404207, homepageUrl: https://baijiahao.baidu.com/u?app_id=1561825967470087, item_avatar: http://pic.rmb.bdstatic.com/bjh/user/b5b87556f7efcdf3c67fb0fd355dcaac.jpeg\n", + "++++++++++++++++++++++++++++++++++++++++++++++++++\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "E:\\anaconda3\\lib\\site-packages\\ipykernel_launcher.py:20: DeprecationWarning: invalid escape sequence '\\/'\n" + ] + } + ], + "source": [ + "import requests\n", + "from bs4 import BeautifulSoup\n", + "import re \n", + "import json \n", + "# 定义实时国内新闻类\n", + "class insideNews: \n", + " def __init__(self): \n", + " insidenews.eventTime = ''\n", + " self.eventDescription = '' \n", + " self.siteName = '' \n", + " self.eventUrl = '' \n", + " self.homepageUrl = ''\n", + " self.item_avatar = ''\n", + " \n", + " def __str__(self): \n", + " return 'eventTime: %s, eventDescription: %s, siteName: %s, eventUrl: %s, homepageUrl: %s, item_avatar: %s' % (self.eventTime, self.eventDescription, self.siteName, self.eventUrl, self.homepageUrl, self.item_avatar)\n", + "\n", + " \n", + "response = requests.get('https://opendata.baidu.com/data/inner?tn=reserved_all_res_tn&dspName=iphone&from_sf=1&dsp=iphone&resource_id=28565&alr=1&query=%E8%82%BA%E7%82%8E&cb=jsonp_1597231600668_30527&qq-pf-to=pcqq.group') #发送get请求,获得目标服务器相应 \n", + "html = response.content.decode('unicode-escape')# 解码 \n", + "soup = BeautifulSoup(html)# 构建soup对象 \n", + "tag = soup.find('p')# 查找指定标签\n", + "tagstr = tag.string# 转换成字符串\n", + "results = re.findall(r'(\\{\"bjh_na\".*?\"eventDescription\":.*?\\})', tagstr)# 使用正则表达式查找所有的实时新闻\n", + "\n", + "all_insidenews = [] \n", + "for item in results:\n", + " insidenews = insideNews() \n", + " print(item)\n", + " itemjson = json.loads(item) \n", + " insidenews.eventTime = itemjson['eventTime']\n", + " insidenews.eventDescription = itemjson['eventDescription'] \n", + " insidenews.siteName = itemjson['siteName'] \n", + " insidenews.eventUrl = itemjson['eventUrl'] \n", + " insidenews.homepageUrl = itemjson['homepageUrl'] \n", + " insidenews.item_avatar = itemjson['item_avatar'] \n", + " \n", + " all_insidenews.append(insidenews) \n", + " \n", + "for insidenews in all_insidenews: \n", + " print(insidenews) \n", + " print('+++++' * 10)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# 获取国外疫情实时滚动新闻播报数据" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "eventTime: 1597275291, eventDescription: 日增5.5万,巴西新冠肺炎确诊病例超316万例, siteName: 人民日报, eventUrl: http://baijiahao.baidu.com/s?id=1674864544431254391, homepageUrl: https://baijiahao.baidu.com/u?app_id=1593743208952652, item_avatar: http://pic.rmb.bdstatic.com/53864a5e77b735000396c815760f7b4c.jpeg\n", + "++++++++++++++++++++++++++++++++++++++++++++++++++\n", + "eventTime: 1597273097, eventDescription: 一文读懂全球疫情:全球累计确诊超2076万例 美国现另一场公共卫生危机, siteName: 人民日报海外网, eventUrl: http://baijiahao.baidu.com/s?id=1674862818784287730, homepageUrl: https://baijiahao.baidu.com/u?app_id=1552864910655429, item_avatar: http://pic.rmb.bdstatic.com/d2f919e031c47d671a5748b5aeafe096.jpeg\n", + "++++++++++++++++++++++++++++++++++++++++++++++++++\n", + "eventTime: 1597272142, eventDescription: 「战疫全时区」美国单日新增确诊病例逾4.8万例 累计超535万例, siteName: 人民日报海外网, eventUrl: http://baijiahao.baidu.com/s?id=1674861714204253861, homepageUrl: https://baijiahao.baidu.com/u?app_id=1552864910655429, item_avatar: http://pic.rmb.bdstatic.com/d2f919e031c47d671a5748b5aeafe096.jpeg\n", + "++++++++++++++++++++++++++++++++++++++++++++++++++\n", + "eventTime: 1597217570, eventDescription: 「战疫全时区」俄罗斯新增5102例确诊病例 累计超90万例, siteName: 人民日报海外网, eventUrl: http://baijiahao.baidu.com/s?id=1674804011129533783, homepageUrl: https://baijiahao.baidu.com/u?app_id=1552864910655429, item_avatar: http://pic.rmb.bdstatic.com/d2f919e031c47d671a5748b5aeafe096.jpeg\n", + "++++++++++++++++++++++++++++++++++++++++++++++++++\n", + "eventTime: 1597187332, eventDescription: 美国确诊超513万,死亡超16.4万例,超38万儿童感染, siteName: 北京日报客户端, eventUrl: http://baijiahao.baidu.com/s?id=1674772303894901158, homepageUrl: https://baijiahao.baidu.com/u?app_id=1601149438053974, item_avatar: http://pic.rmb.bdstatic.com/814a110b34d52c106f9ba99c5d415478.jpeg\n", + "++++++++++++++++++++++++++++++++++++++++++++++++++\n", + "eventTime: 1597186109, eventDescription: 一文读懂全球疫情:全球累计确诊超2048万例 俄罗斯注册首款新冠疫苗, siteName: 人民日报海外网, eventUrl: http://baijiahao.baidu.com/s?id=1674771802796395216, homepageUrl: https://baijiahao.baidu.com/u?app_id=1552864910655429, item_avatar: http://pic.rmb.bdstatic.com/d2f919e031c47d671a5748b5aeafe096.jpeg\n", + "++++++++++++++++++++++++++++++++++++++++++++++++++\n", + "eventTime: 1597108712, eventDescription: 全球2000万+!确诊数前三的国家,疫情形势怎么样了?, siteName: 北京日报客户端, eventUrl: http://baijiahao.baidu.com/s?id=1674689865386982063, homepageUrl: https://baijiahao.baidu.com/u?app_id=1601149438053974, item_avatar: http://pic.rmb.bdstatic.com/814a110b34d52c106f9ba99c5d415478.jpeg\n", + "++++++++++++++++++++++++++++++++++++++++++++++++++\n", + "eventTime: 1597099802, eventDescription: 一文读懂全球疫情:全球累计确诊超2021万例 巴西90万人在疫情期间成功戒烟, siteName: 人民日报海外网, eventUrl: http://baijiahao.baidu.com/s?id=1674681138323257988, homepageUrl: https://baijiahao.baidu.com/u?app_id=1552864910655429, item_avatar: http://pic.rmb.bdstatic.com/d2f919e031c47d671a5748b5aeafe096.jpeg\n", + "++++++++++++++++++++++++++++++++++++++++++++++++++\n", + "eventTime: 1597098897, eventDescription: 「战疫全时区」美国单日新增确诊病例逾4.1万例 累计超524万例, siteName: 人民日报海外网, eventUrl: http://baijiahao.baidu.com/s?id=1674680199047536719, homepageUrl: https://baijiahao.baidu.com/u?app_id=1552864910655429, item_avatar: http://pic.rmb.bdstatic.com/d2f919e031c47d671a5748b5aeafe096.jpeg\n", + "++++++++++++++++++++++++++++++++++++++++++++++++++\n", + "eventTime: 1597098540, eventDescription: 「战疫全时区」巴西单日新增确诊病例逾2.2万例 累计超305万例, siteName: 人民日报海外网, eventUrl: http://baijiahao.baidu.com/s?id=1674679503309357561, homepageUrl: https://baijiahao.baidu.com/u?app_id=1552864910655429, item_avatar: http://pic.rmb.bdstatic.com/d2f919e031c47d671a5748b5aeafe096.jpeg\n", + "++++++++++++++++++++++++++++++++++++++++++++++++++\n", + "eventTime: 1597044840, eventDescription: 「战疫全时区」俄罗斯新增5118例确诊病例 累计超89万例, siteName: 人民日报海外网, eventUrl: http://baijiahao.baidu.com/s?id=1674623132178961292, homepageUrl: https://baijiahao.baidu.com/u?app_id=1552864910655429, item_avatar: http://pic.rmb.bdstatic.com/d2f919e031c47d671a5748b5aeafe096.jpeg\n", + "++++++++++++++++++++++++++++++++++++++++++++++++++\n", + "eventTime: 1597041465, eventDescription: 返校前夕,全美近10万学生两周内感染新冠肺炎, siteName: 人民日报海外网, eventUrl: http://baijiahao.baidu.com/s?id=1674620237736618467, homepageUrl: https://baijiahao.baidu.com/u?app_id=1552864910655429, item_avatar: http://pic.rmb.bdstatic.com/d2f919e031c47d671a5748b5aeafe096.jpeg\n", + "++++++++++++++++++++++++++++++++++++++++++++++++++\n", + "eventTime: 1597013216, eventDescription: 一文读懂全球疫情:全球累计确诊超1999万例 美国暴发沙门氏菌疫情, siteName: 人民日报海外网, eventUrl: http://baijiahao.baidu.com/s?id=1674590351892834223, homepageUrl: https://baijiahao.baidu.com/u?app_id=1552864910655429, item_avatar: http://pic.rmb.bdstatic.com/d2f919e031c47d671a5748b5aeafe096.jpeg\n", + "++++++++++++++++++++++++++++++++++++++++++++++++++\n", + "eventTime: 1597012823, eventDescription: 「战疫全时区」美国单日新增确诊病例逾4万例 累计超519万例, siteName: 人民日报海外网, eventUrl: http://baijiahao.baidu.com/s?id=1674589542624222920, homepageUrl: https://baijiahao.baidu.com/u?app_id=1552864910655429, item_avatar: http://pic.rmb.bdstatic.com/d2f919e031c47d671a5748b5aeafe096.jpeg\n", + "++++++++++++++++++++++++++++++++++++++++++++++++++\n", + "eventTime: 1597012711, eventDescription: 「战疫全时区」巴西单日新增确诊病例逾2.3万例 累计超303万例, siteName: 人民日报海外网, eventUrl: http://baijiahao.baidu.com/s?id=1674589406966244222, homepageUrl: https://baijiahao.baidu.com/u?app_id=1552864910655429, item_avatar: http://pic.rmb.bdstatic.com/d2f919e031c47d671a5748b5aeafe096.jpeg\n", + "++++++++++++++++++++++++++++++++++++++++++++++++++\n", + "eventTime: 1596961970, eventDescription: 首批哈萨克斯坦华人回国 隔离14天后可回家 亲历者:提前12小时就到机场, siteName: 封面新闻, eventUrl: http://baijiahao.baidu.com/s?id=1674536083513406606, homepageUrl: https://baijiahao.baidu.com/u?app_id=1577667706397024, item_avatar: http://pic.rmb.bdstatic.com/7d15a50cee51b0a96e00a85cfabbb513.jpeg\n", + "++++++++++++++++++++++++++++++++++++++++++++++++++\n", + "eventTime: 1596960850, eventDescription: 日本连续5天新增逾千例 安倍:极力避免再次进入紧急状态, siteName: 人民日报海外网, eventUrl: http://baijiahao.baidu.com/s?id=1674535368426252580, homepageUrl: https://baijiahao.baidu.com/u?app_id=1552864910655429, item_avatar: http://pic.rmb.bdstatic.com/d2f919e031c47d671a5748b5aeafe096.jpeg\n", + "++++++++++++++++++++++++++++++++++++++++++++++++++\n", + "eventTime: 1596960494, eventDescription: 美国新冠确诊病例接近500万例, siteName: 新华社客户端, eventUrl: http://baijiahao.baidu.com/s?id=1674534439138090155, homepageUrl: https://baijiahao.baidu.com/u?app_id=1552507899985619, item_avatar: http://pic.rmb.bdstatic.com/f873c8976c274e5f9bacc77a5c1b3e89.jpeg@c_1,w_385,h_385,x_6,y_11\n", + "++++++++++++++++++++++++++++++++++++++++++++++++++\n", + "eventTime: 1596953765, eventDescription: 印度第3位部长级官员确诊感染新冠肺炎 身兼多份国务要职, siteName: 新京报, eventUrl: http://baijiahao.baidu.com/s?id=1674527459855975038, homepageUrl: https://baijiahao.baidu.com/u?app_id=1566453612428800, item_avatar: http://pic.rmb.bdstatic.com/9da74a517eb1befeba93a5f3167cc74b.jpeg\n", + "++++++++++++++++++++++++++++++++++++++++++++++++++\n", + "eventTime: 1596942761, eventDescription: 印度安德拉邦一新冠肺炎征用酒店起火 已致4人死亡, siteName: 新京报, eventUrl: http://baijiahao.baidu.com/s?id=1674515947367591821, homepageUrl: https://baijiahao.baidu.com/u?app_id=1566453612428800, item_avatar: http://pic.rmb.bdstatic.com/9da74a517eb1befeba93a5f3167cc74b.jpeg\n", + "++++++++++++++++++++++++++++++++++++++++++++++++++\n", + "eventTime: 1596931253, eventDescription: 警惕!日本研究发现变异新冠病毒,已向全国各地扩散, siteName: 北晚新视觉网, eventUrl: http://baijiahao.baidu.com/s?id=1674503882559884129, homepageUrl: https://baijiahao.baidu.com/u?app_id=1549941228125394, item_avatar: http://pic.rmb.bdstatic.com/b9279adf974b78d27201a0b34970c2a9.jpeg\n", + "++++++++++++++++++++++++++++++++++++++++++++++++++\n", + "eventTime: 1596927420, eventDescription: 一文读懂全球疫情:全球累计确诊超1977万例 印度连续10天日增病例超5万, siteName: 人民日报海外网, eventUrl: http://baijiahao.baidu.com/s?id=1674500137134331393, homepageUrl: https://baijiahao.baidu.com/u?app_id=1552864910655429, item_avatar: http://pic.rmb.bdstatic.com/d2f919e031c47d671a5748b5aeafe096.jpeg\n", + "++++++++++++++++++++++++++++++++++++++++++++++++++\n", + "eventTime: 1596926700, eventDescription: 「战疫全时区」美国单日新增确诊病例超6.1万例 累计逾514万例, siteName: 人民日报海外网, eventUrl: http://baijiahao.baidu.com/s?id=1674499173102689654, homepageUrl: https://baijiahao.baidu.com/u?app_id=1552864910655429, item_avatar: http://pic.rmb.bdstatic.com/d2f919e031c47d671a5748b5aeafe096.jpeg\n", + "++++++++++++++++++++++++++++++++++++++++++++++++++\n", + "eventTime: 1596926340, eventDescription: 「战疫全时区」巴西累计确诊病例超301万例 死亡逾10万例, siteName: 人民日报海外网, eventUrl: http://baijiahao.baidu.com/s?id=1674498936636405454, homepageUrl: https://baijiahao.baidu.com/u?app_id=1552864910655429, item_avatar: http://pic.rmb.bdstatic.com/d2f919e031c47d671a5748b5aeafe096.jpeg\n", + "++++++++++++++++++++++++++++++++++++++++++++++++++\n", + "eventTime: 1596871924, eventDescription: 「战疫全时区」俄罗斯新增5212例确诊病例 累计确诊超88万例, siteName: 人民日报海外网, eventUrl: http://baijiahao.baidu.com/s?id=1674442809541789098, homepageUrl: https://baijiahao.baidu.com/u?app_id=1552864910655429, item_avatar: http://pic.rmb.bdstatic.com/d2f919e031c47d671a5748b5aeafe096.jpeg\n", + "++++++++++++++++++++++++++++++++++++++++++++++++++\n", + "eventTime: 1596860880, eventDescription: 「战疫全时区」印度日增超6.1万例 累计确诊超208万, siteName: 人民日报海外网, eventUrl: http://baijiahao.baidu.com/s?id=1674430048790588388, homepageUrl: https://baijiahao.baidu.com/u?app_id=1552864910655429, item_avatar: http://pic.rmb.bdstatic.com/d2f919e031c47d671a5748b5aeafe096.jpeg\n", + "++++++++++++++++++++++++++++++++++++++++++++++++++\n", + "eventTime: 1596840442, eventDescription: 一文读懂全球疫情:全球累计确诊近1950万例 印度累计确诊超过200万例, siteName: 人民日报海外网, eventUrl: http://baijiahao.baidu.com/s?id=1674408972047141586, homepageUrl: https://baijiahao.baidu.com/u?app_id=1552864910655429, item_avatar: http://pic.rmb.bdstatic.com/d2f919e031c47d671a5748b5aeafe096.jpeg\n", + "++++++++++++++++++++++++++++++++++++++++++++++++++\n", + "eventTime: 1596839886, eventDescription: 「战疫全时区」巴西单日新增确诊病例超4.4万例 累计逾296万例, siteName: 人民日报海外网, eventUrl: http://baijiahao.baidu.com/s?id=1674408470860851393, homepageUrl: https://baijiahao.baidu.com/u?app_id=1552864910655429, item_avatar: http://pic.rmb.bdstatic.com/d2f919e031c47d671a5748b5aeafe096.jpeg\n", + "++++++++++++++++++++++++++++++++++++++++++++++++++\n", + "eventTime: 1596839434, eventDescription: 「战疫全时区」美国单日新增确诊病例超6万例 累计逾508万例, siteName: 人民日报海外网, eventUrl: http://baijiahao.baidu.com/s?id=1674407841000149898, homepageUrl: https://baijiahao.baidu.com/u?app_id=1552864910655429, item_avatar: http://pic.rmb.bdstatic.com/d2f919e031c47d671a5748b5aeafe096.jpeg\n", + "++++++++++++++++++++++++++++++++++++++++++++++++++\n", + "eventTime: 1596817404, eventDescription: 20天!印度新冠确诊病例从100万增至200万, siteName: 北京日报客户端, eventUrl: http://baijiahao.baidu.com/s?id=1674384907923543549, homepageUrl: https://baijiahao.baidu.com/u?app_id=1601149438053974, item_avatar: http://pic.rmb.bdstatic.com/814a110b34d52c106f9ba99c5d415478.jpeg\n", + "++++++++++++++++++++++++++++++++++++++++++++++++++\n", + "eventTime: 1596801592, eventDescription: 8月7日全球疫情观察:至少22国日增确诊超千例 印度累计确诊逾200万例, siteName: 人民日报海外网, eventUrl: http://baijiahao.baidu.com/s?id=1674368083759315686, homepageUrl: https://baijiahao.baidu.com/u?app_id=1552864910655429, item_avatar: http://pic.rmb.bdstatic.com/d2f919e031c47d671a5748b5aeafe096.jpeg\n", + "++++++++++++++++++++++++++++++++++++++++++++++++++\n", + "eventTime: 1596786000, eventDescription: 「战疫全时区」俄罗斯新增5241例确诊病例 累计确诊超87万例, siteName: 人民日报海外网, eventUrl: http://baijiahao.baidu.com/s?id=1674351790374377698, homepageUrl: https://baijiahao.baidu.com/u?app_id=1552864910655429, item_avatar: http://pic.rmb.bdstatic.com/d2f919e031c47d671a5748b5aeafe096.jpeg\n", + "++++++++++++++++++++++++++++++++++++++++++++++++++\n", + "eventTime: 1596783247, eventDescription: 白俄罗斯总统卢卡申科:有人故意将新冠病毒传染给我, siteName: 新京报, eventUrl: http://baijiahao.baidu.com/s?id=1674348658542718182, homepageUrl: https://baijiahao.baidu.com/u?app_id=1566453612428800, item_avatar: http://pic.rmb.bdstatic.com/9da74a517eb1befeba93a5f3167cc74b.jpeg\n", + "++++++++++++++++++++++++++++++++++++++++++++++++++\n", + "eventTime: 1596769135, eventDescription: 印度成全球第三个确诊破200万的国家,21天新增100万, siteName: 新京报, eventUrl: http://baijiahao.baidu.com/s?id=1674333884198078936, homepageUrl: https://baijiahao.baidu.com/u?app_id=1566453612428800, item_avatar: http://pic.rmb.bdstatic.com/9da74a517eb1befeba93a5f3167cc74b.jpeg\n", + "++++++++++++++++++++++++++++++++++++++++++++++++++\n", + "eventTime: 1596760866, eventDescription: 世卫组织:已有六种新冠疫苗进入三期临床试验阶段 其中三种来自中国, siteName: 中国日报网, eventUrl: http://baijiahao.baidu.com/s?id=1674325335422513595, homepageUrl: https://baijiahao.baidu.com/u?app_id=1567805706555546, item_avatar: http://pic.rmb.bdstatic.com/8f27cc4a0abf470446a58a0066b710f5.png\n", + "++++++++++++++++++++++++++++++++++++++++++++++++++\n", + "eventTime: 1596753951, eventDescription: 一文读懂全球疫情:全球累计确诊近1920万例 美国一州长为特朗普接机前确诊, siteName: 人民日报海外网, eventUrl: http://baijiahao.baidu.com/s?id=1674318314838065202, homepageUrl: https://baijiahao.baidu.com/u?app_id=1552864910655429, item_avatar: http://pic.rmb.bdstatic.com/d2f919e031c47d671a5748b5aeafe096.jpeg\n", + "++++++++++++++++++++++++++++++++++++++++++++++++++\n", + "eventTime: 1596753846, eventDescription: 「战疫全时区」巴西单日新增确诊病例超5.3万例 累计逾291万例, siteName: 人民资讯, eventUrl: http://baijiahao.baidu.com/s?id=1674317758867433207, homepageUrl: https://baijiahao.baidu.com/u?app_id=1669728810290752, item_avatar: http://pic.rmb.bdstatic.com/bjh/user/867084918f0beae5baa88b1fcbc34f1f.jpeg\n", + "++++++++++++++++++++++++++++++++++++++++++++++++++\n", + "eventTime: 1596753788, eventDescription: 「战疫全时区」美国单日新增确诊病例超5.9万例 累计逾502万例, siteName: 人民日报海外网, eventUrl: http://baijiahao.baidu.com/s?id=1674318066095349625, homepageUrl: https://baijiahao.baidu.com/u?app_id=1552864910655429, item_avatar: http://pic.rmb.bdstatic.com/d2f919e031c47d671a5748b5aeafe096.jpeg\n", + "++++++++++++++++++++++++++++++++++++++++++++++++++\n", + "eventTime: 1596723779, eventDescription: 今日疫情汇总:黎巴嫩爆炸恐加剧疫情传播,朝鲜首例疑似检测“没结果”, siteName: 纵相新闻, eventUrl: http://baijiahao.baidu.com/s?id=1674286560061876250, homepageUrl: https://baijiahao.baidu.com/u?app_id=1607773795161133, item_avatar: http://pic.rmb.bdstatic.com/4221a08a04ad0d69f15b15252297b8e2.jpeg\n", + "++++++++++++++++++++++++++++++++++++++++++++++++++\n", + "eventTime: 1596708377, eventDescription: 全球新冠肺炎确诊病例超1900万例 死亡病例逾71.1万例, siteName: 人民日报海外网, eventUrl: http://baijiahao.baidu.com/s?id=1674270432508007457, homepageUrl: https://baijiahao.baidu.com/u?app_id=1552864910655429, item_avatar: http://pic.rmb.bdstatic.com/d2f919e031c47d671a5748b5aeafe096.jpeg\n", + "++++++++++++++++++++++++++++++++++++++++++++++++++\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "E:\\anaconda3\\lib\\site-packages\\ipykernel_launcher.py:20: DeprecationWarning: invalid escape sequence '\\/'\n" + ] + } + ], + "source": [ + "import requests\n", + "from bs4 import BeautifulSoup\n", + "import re \n", + "import json \n", + "# 定义实时国外新闻类\n", + "class outsideNews: \n", + " def __init__(self): \n", + " insidenews.eventTime = ''\n", + " self.eventDescription = '' \n", + " self.siteName = '' \n", + " self.eventUrl = '' \n", + " self.homepageUrl = ''\n", + " self.item_avatar = ''\n", + " \n", + " def __str__(self): \n", + " return 'eventTime: %s, eventDescription: %s, siteName: %s, eventUrl: %s, homepageUrl: %s, item_avatar: %s' % (self.eventTime, self.eventDescription, self.siteName, self.eventUrl, self.homepageUrl, self.item_avatar)\n", + "\n", + " \n", + "response = requests.get('https://opendata.baidu.com/data/inner?tn=reserved_all_res_tn&dspName=iphone&from_sf=1&dsp=iphone&resource_id=28565&alr=1&query=%E6%96%B0%E5%86%A0%E8%82%BA%E7%82%8E%E5%9B%BD%E5%A4%96%E7%96%AB%E6%83%85&cb=jsonp_1597232049116_92879&qq-pf-to=pcqq.group') #发送get请求,获得目标服务器相应 \n", + "html = response.content.decode('unicode-escape')# 解码 \n", + "soup = BeautifulSoup(html)# 构建soup对象 \n", + "tag = soup.find('p')# 查找指定标签\n", + "tagstr = tag.string# 转换成字符串\n", + "results = re.findall(r'(\\{\"bjh_na\".*?\"eventDescription\":.*?\\})', tagstr)# 使用正则表达式查找所有的实时新闻\n", + "\n", + "all_outsidenews = [] \n", + "for item in results:\n", + " outsidenews = outsideNews() \n", + " itemjson = json.loads(item) \n", + " outsidenews.eventTime = itemjson['eventTime']\n", + " outsidenews.eventDescription = itemjson['eventDescription'] \n", + " outsidenews.siteName = itemjson['siteName'] \n", + " outsidenews.eventUrl = itemjson['eventUrl'] \n", + " outsidenews.homepageUrl = itemjson['homepageUrl'] \n", + " outsidenews.item_avatar = itemjson['item_avatar'] \n", + " \n", + " all_outsidenews.append(outsidenews) \n", + " \n", + "for outsidenews in all_outsidenews: \n", + " print(outsidenews) \n", + " print('+++++' * 10)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# 将获取到的国内外实时新闻数据导入数据库中" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + " ## 数据库实体类" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "metadata": {}, + "outputs": [], + "source": [ + "import pymysql\n", + "\n", + "class MyDB:\n", + " def __init__(self, host, user, passwd, db):\n", + " self.conn = pymysql.connect(host, user, passwd, db)\n", + " self.cursor = self.conn.cursor()\n", + " \n", + " def get_insideNews_list_tuple(self, all_insideNews):\n", + " info_tuple = []\n", + " for item in all_insideNews:\n", + " info_tuple.append(item.get_info_tuple())\n", + " return info_tuple\n", + " \n", + " # 保存国内新闻数据\n", + " def save_insideNews_datas(self, all_insideNews):\n", + " sql = 'insert into insideNews_daily_datas(eventTime, eventDescription, siteName, eventUrl, homepageUrl, item_avatar) values(%s,%s,%s,%s,%s,%s)'\n", + " res = self.get_insideNews_list_tuple(all_insideNews)\n", + " \n", + " print('+++ save_insideNews_datas, data len: %d' % len(res))\n", + " try:\n", + " self.cursor.executemany(sql, res)#批处理插入函数,res为元组类型,其中还是元组类型\n", + " self.conn.commit()\n", + " except Exception as e:\n", + " print(e)\n", + " print('+++ save_insideNews_datas is over.')\n", + " \n", + " \n", + " \n", + " def get_outsideNews_list_tuple(self, all_outsideNews):\n", + " info_tuple = []\n", + " for item in all_outsideNews:\n", + " info_tuple.append(item.get_info_tuple())\n", + " return info_tuple\n", + " \n", + " #保存国外新闻数据\n", + " def save_outsideNews_datas(self, all_outsideNews):\n", + " sql = 'insert into outsideNews_daily_datas(eventTime, eventDescription, siteName, eventUrl, homepageUrl, item_avatar) values(%s,%s,%s,%s,%s,%s)'\n", + " res = self.get_outsideNews_list_tuple(all_outsideNews)\n", + " \n", + " print('+++ save_outsideNews_datas, data len: %d' % len(res))\n", + " try:\n", + " self.cursor.executemany(sql, res)#批处理插入函数,res为元组类型,其中还是元组类型\n", + " self.conn.commit()\n", + " except Exception as e:\n", + " print(e)\n", + " print('+++ save_outsideNews_datas is over.')\n", + " \n", + " def __del__(self):\n", + " if self.conn is not None:\n", + " self.conn.close()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 国内新闻" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "E:\\anaconda3\\lib\\site-packages\\ipykernel_launcher.py:33: DeprecationWarning: invalid escape sequence '\\/'\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "+++ save_insideNews_datas, data len: 37\n", + "+++ save_insideNews_datas is over.\n" + ] + } + ], + "source": [ + "import requests\n", + "import re\n", + "from bs4 import BeautifulSoup\n", + "import json\n", + " \n", + "\n", + "# 定义实时国内新闻类\n", + "class InsideNews: \n", + " def __init__(self): \n", + " self.nid = ''\n", + " self.thread_id = ''\n", + " self.eventTime = '' \n", + " self.eventDescription = '' \n", + " self.siteName = '' \n", + " self.eventUrl = '' \n", + " self.homepageUrl = ''\n", + " self.item_avatar = ''\n", + " \n", + " def __str__(self): \n", + " return 'eventTime: %s, eventDescription: %s, siteName: %s, eventUrl: %s, homepageUrl: %s, item_avatar: %s' % (self.eventTime, self.eventDescription, self.siteName, self.eventUrl, self.homepageUrl, self.item_avatar)\n", + "\n", + " def get_info_tuple(self):\n", + " return ((self.eventTime, self.eventDescription, self.siteName, self.eventUrl, self.homepageUrl, self.item_avatar))\n", + " \n", + "class DataService:\n", + " def __init__(self):\n", + " self.url = 'https://opendata.baidu.com/data/inner?tn=reserved_all_res_tn&dspName=iphone&from_sf=1&dsp=iphone&resource_id=28565&alr=1&query=%E8%82%BA%E7%82%8E&cb=jsonp_1597231600668_30527&qq-pf-to=pcqq.group'\n", + " self.db = MyDB('localhost', 'root', '2000721zsf', 'covid19_datas_guangxi')\n", + " \n", + " #抓取网页\n", + " def fetch_html_page(self):\n", + " res = requests.get(self.url)#发送get请求,获得目标服务器响应\n", + " res = res.content.decode('unicode-escape')#解码\n", + " return res\n", + " \n", + " #解析网页\n", + " def parse_target_page(self, html):\n", + " soup = BeautifulSoup(html)#构建soup对象\n", + " tag = soup.find('p')#查找指定标签\n", + " tagStr = tag.string#转换为字符串\n", + " \n", + " #使用正则表达式查找所有内容,结果返回为list类型(即获取国内实时新闻数据)\n", + " self.insideNews = re.findall(r'(\\{\"bjh_na\".*?\"eventDescription\":.*?\\})',tagStr)\n", + " \n", + " def fetch_insideNews_datas(self):\n", + " all_insideNews = []\n", + " for item in self.insideNews:\n", + " insideNews = InsideNews() \n", + " itemjson = json.loads(item) \n", + " insideNews.eventTime = itemjson['eventTime']\n", + " insideNews.eventDescription = itemjson['eventDescription'] \n", + " insideNews.siteName = itemjson['siteName'] \n", + " insideNews.eventUrl = itemjson['eventUrl'] \n", + " insideNews.homepageUrl = itemjson['homepageUrl'] \n", + " insideNews.item_avatar = itemjson['item_avatar'] \n", + " \n", + " all_insideNews.append(insideNews)\n", + " return all_insideNews\n", + " \n", + " #提取内容生成对象\n", + " def fetch_page_datas(self):\n", + " all_insideNews = self.fetch_insideNews_datas()\n", + " \n", + " # for item in all_insideNews:\n", + " # print(item)\n", + " # print(\"+++++\"*10)\n", + " \n", + " return all_insideNews\n", + " \n", + " #业务函数\n", + " def process_data(self):\n", + " html = self.fetch_html_page()\n", + " self.parse_target_page(html)\n", + " all_insideNews = self.fetch_page_datas()\n", + " \n", + " #保存国内实时播报新闻数据\n", + " self.db.save_insideNews_datas(all_insideNews)\n", + " \n", + "#创建DataService对象\n", + "ds = DataService()\n", + "ds.process_data()\n", + "\n", + "#print(len(ds.insideNews))\n", + "#print(type(ds.insideNews[0]))\n", + "#print(ds.insideNews[0])\n", + "#print()\n", + "#print(ds.outsideNews)\n", + "#print()\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + " ## 国外新闻" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "E:\\anaconda3\\lib\\site-packages\\ipykernel_launcher.py:33: DeprecationWarning: invalid escape sequence '\\/'\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "+++ save_outsideNews_datas, data len: 40\n", + "+++ save_outsideNews_datas is over.\n" + ] + } + ], + "source": [ + "import requests\n", + "import re\n", + "from bs4 import BeautifulSoup\n", + "import json\n", + " \n", + "\n", + "# 定义实时国内新闻类\n", + "class OutsideNews: \n", + " def __init__(self): \n", + " self.nid = ''\n", + " self.thread_id = ''\n", + " self.eventTime = '' \n", + " self.eventDescription = '' \n", + " self.siteName = '' \n", + " self.eventUrl = '' \n", + " self.homepageUrl = ''\n", + " self.item_avatar = ''\n", + " \n", + " def __str__(self): \n", + " return 'eventTime: %s, eventDescription: %s, siteName: %s, eventUrl: %s, homepageUrl: %s, item_avatar: %s' % (self.eventTime, self.eventDescription, self.siteName, self.eventUrl, self.homepageUrl, self.item_avatar)\n", + "\n", + " def get_info_tuple(self):\n", + " return ((self.eventTime, self.eventDescription, self.siteName, self.eventUrl, self.homepageUrl, self.item_avatar))\n", + " \n", + "class DataService:\n", + " def __init__(self):\n", + " self.url = 'https://opendata.baidu.com/data/inner?tn=reserved_all_res_tn&dspName=iphone&from_sf=1&dsp=iphone&resource_id=28565&alr=1&query=%E6%96%B0%E5%86%A0%E8%82%BA%E7%82%8E%E5%9B%BD%E5%A4%96%E7%96%AB%E6%83%85&cb=jsonp_1597232049116_92879&qq-pf-to=pcqq.group'\n", + " self.db = MyDB('localhost', 'root', '2000721zsf', 'covid19_datas_guangxi')\n", + " \n", + " #抓取网页\n", + " def fetch_html_page(self):\n", + " res = requests.get(self.url)#发送get请求,获得目标服务器响应\n", + " res = res.content.decode('unicode-escape')#解码\n", + " return res\n", + " \n", + " #解析网页\n", + " def parse_target_page(self, html):\n", + " soup = BeautifulSoup(html)#构建soup对象\n", + " tag = soup.find('p')#查找指定标签\n", + " tagStr = tag.string#转换为字符串\n", + " \n", + " #使用正则表达式查找所有内容,结果返回为list类型(即获取国内实时新闻数据)\n", + " self.outsideNews = re.findall(r'(\\{\"bjh_na\".*?\"eventDescription\":.*?\\})',tagStr)\n", + " \n", + " def fetch_outsideNews_datas(self):\n", + " all_outsideNews = []\n", + " for item in self.outsideNews:\n", + " outsideNews = OutsideNews() \n", + " itemjson = json.loads(item) \n", + " outsideNews.eventTime = itemjson['eventTime']\n", + " outsideNews.eventDescription = itemjson['eventDescription'] \n", + " outsideNews.siteName = itemjson['siteName'] \n", + " outsideNews.eventUrl = itemjson['eventUrl'] \n", + " outsideNews.homepageUrl = itemjson['homepageUrl'] \n", + " outsideNews.item_avatar = itemjson['item_avatar'] \n", + " \n", + " all_outsideNews.append(outsideNews)\n", + " return all_outsideNews\n", + " \n", + " #提取内容生成对象\n", + " def fetch_page_datas(self):\n", + " all_outsideNews = self.fetch_outsideNews_datas()\n", + " \n", + " # for item in all_insideNews:\n", + " # print(item)\n", + " # print(\"+++++\"*10)\n", + " \n", + " return all_outsideNews\n", + " \n", + " #业务函数\n", + " def process_data(self):\n", + " html = self.fetch_html_page()\n", + " self.parse_target_page(html)\n", + " all_outsideNews = self.fetch_page_datas()\n", + " \n", + " #保存国内实时播报新闻数据\n", + " self.db.save_outsideNews_datas(all_outsideNews)\n", + " \n", + "#创建DataService对象\n", + "ds = DataService()\n", + "ds.process_data()\n", + "\n", + "#print(len(ds.insideNews))\n", + "#print(type(ds.insideNews[0]))\n", + "#print(ds.insideNews[0])\n", + "#print()\n", + "#print(ds.outsideNews)\n", + "#print()\n", + "\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.6" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +}