You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
ppre8onyw/提取实时新闻数据.ipynb

670 lines
70 KiB

{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# 获取国内疫情实时滚动新闻播报数据"
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{\"bjh_na\":{\"easyBrowse\":\"1\",\"easyBrowseConfirm\":\"1\",\"nid\":\"10193162063076338121\",\"thread_id\":\"1125000033495772\",\"type\":\"news\"},\"eventDescription\":\"北京连续6日零新增\",\"eventTime\":\"1597278720\",\"eventUrl\":\"http:\\/\\/baijiahao.baidu.com\\/s?id=1674868131526596687\",\"homepageUrl\":\"https:\\/\\/baijiahao.baidu.com\\/u?app_id=1601149438053974\",\"item_avatar\":\"http:\\/\\/pic.rmb.bdstatic.com\\/814a110b34d52c106f9ba99c5d415478.jpeg\",\"siteName\":\"北京日报客户端\"}\n",
"{\"bjh_na\":{\"easyBrowse\":\"1\",\"easyBrowseConfirm\":\"1\",\"nid\":\"9729390377660864175\",\"thread_id\":\"1062000033493672\",\"type\":\"news\"},\"eventDescription\":\"国家卫健委昨日新增确诊病例19例 本土病例8例\",\"eventTime\":\"1597278262\",\"eventUrl\":\"http:\\/\\/baijiahao.baidu.com\\/s?id=1674867651765148223\",\"homepageUrl\":\"https:\\/\\/baijiahao.baidu.com\\/u?app_id=1551968238585112\",\"item_avatar\":\"http:\\/\\/pic.rmb.bdstatic.com\\/07feda26036edb976980ce1e803de532.jpeg@c_1,w_640,h_640,x_0,y_0\",\"siteName\":\"环球时报\"}\n",
"{\"bjh_na\":{\"easyBrowse\":\"1\",\"easyBrowseConfirm\":\"1\",\"nid\":\"9766033389288531073\",\"thread_id\":\"1035000033486933\",\"type\":\"news\"},\"eventDescription\":\"新疆含兵团新增新冠肺炎确诊病例8例 均在乌鲁木齐市\",\"eventTime\":\"1597277648\",\"eventUrl\":\"http:\\/\\/baijiahao.baidu.com\\/s?id=1674867007109286633\",\"homepageUrl\":\"https:\\/\\/baijiahao.baidu.com\\/u?app_id=1549608413453462\",\"item_avatar\":\"http:\\/\\/pic.rmb.bdstatic.com\\/89e9e167d806065171363ecbcb708a41.png@c_1,w_812,h_812,x_63,y_39\",\"siteName\":\"环球网\"}\n",
"{\"bjh_na\":{\"easyBrowse\":\"1\",\"easyBrowseConfirm\":\"1\",\"nid\":\"9382499071856256847\",\"thread_id\":\"1027000033493454\",\"type\":\"news\"},\"eventDescription\":\"上海昨日无新增本地新冠肺炎确诊病例新增境外输入2例\",\"eventTime\":\"1597276086\",\"eventUrl\":\"http:\\/\\/baijiahao.baidu.com\\/s?id=1674865718710262411\",\"homepageUrl\":\"https:\\/\\/baijiahao.baidu.com\\/u?app_id=1549608413453462\",\"item_avatar\":\"http:\\/\\/pic.rmb.bdstatic.com\\/89e9e167d806065171363ecbcb708a41.png@c_1,w_812,h_812,x_63,y_39\",\"siteName\":\"环球网\"}\n",
"{\"bjh_na\":{\"easyBrowse\":\"1\",\"easyBrowseConfirm\":\"1\",\"nid\":\"9691932825381303415\",\"thread_id\":\"1001000033461176\",\"type\":\"news\"},\"eventDescription\":\"一境外输入确诊患者治愈数月后复阳 目前在上海隔离诊治\",\"eventTime\":\"1597235700\",\"eventUrl\":\"http:\\/\\/baijiahao.baidu.com\\/s?id=1674823034158688355\",\"homepageUrl\":\"https:\\/\\/baijiahao.baidu.com\\/u?app_id=1566453612428800\",\"item_avatar\":\"http:\\/\\/pic.rmb.bdstatic.com\\/9da74a517eb1befeba93a5f3167cc74b.jpeg\",\"siteName\":\"新京报\"}\n",
"{\"bjh_na\":{\"easyBrowse\":\"1\",\"easyBrowseConfirm\":\"1\",\"nid\":\"9964760177505552343\",\"thread_id\":\"1011000033456808\",\"type\":\"news\"},\"eventDescription\":\"香港新增62例确诊病例 连续10天少于100例\",\"eventTime\":\"1597221655\",\"eventUrl\":\"http:\\/\\/baijiahao.baidu.com\\/s?id=1674808294135893351\",\"homepageUrl\":\"https:\\/\\/baijiahao.baidu.com\\/u?app_id=1552864910655429\",\"item_avatar\":\"http:\\/\\/pic.rmb.bdstatic.com\\/d2f919e031c47d671a5748b5aeafe096.jpeg\",\"siteName\":\"人民日报海外网\"}\n",
"{\"bjh_na\":{\"easyBrowse\":\"1\",\"easyBrowseConfirm\":\"1\",\"nid\":\"9288792493272135235\",\"thread_id\":\"1059000033444318\",\"type\":\"news\"},\"eventDescription\":\"安徽芜湖一餐饮店进口冻虾新冠病毒检出疑似阳性\",\"eventTime\":\"1597217665\",\"eventUrl\":\"http:\\/\\/baijiahao.baidu.com\\/s?id=1674804105747106594\",\"homepageUrl\":\"http:\\/\\/baijiahao.baidu.com\\/u?app_id=1570168240515616\",\"item_avatar\":\"http:\\/\\/pic.rmb.bdstatic.com\\/0026e9a191787761348be081d428a354.jpeg\",\"siteName\":\"央视新闻\"}\n",
"{\"bjh_na\":{\"easyBrowse\":\"1\",\"easyBrowseConfirm\":\"1\",\"nid\":\"10446745964617569625\",\"thread_id\":\"1051000033431472\",\"type\":\"news\"},\"eventDescription\":\"国家卫健委昨日新增确诊25例其中本土病例9例均在新疆\",\"eventTime\":\"1597193291\",\"eventUrl\":\"http:\\/\\/baijiahao.baidu.com\\/s?id=1674772359564935639\",\"homepageUrl\":\"https:\\/\\/baijiahao.baidu.com\\/u?app_id=1561825967470087\",\"item_avatar\":\"http:\\/\\/pic.rmb.bdstatic.com\\/bjh\\/user\\/b5b87556f7efcdf3c67fb0fd355dcaac.jpeg\",\"siteName\":\"红星新闻\"}\n",
"{\"bjh_na\":{\"easyBrowse\":\"1\",\"easyBrowseConfirm\":\"1\",\"nid\":\"10156202482208840230\",\"thread_id\":\"1113000033424506\",\"type\":\"news\"},\"eventDescription\":\"北京连续5天无新增确诊病例\",\"eventTime\":\"1597192410\",\"eventUrl\":\"http:\\/\\/baijiahao.baidu.com\\/s?id=1674777629341250609\",\"homepageUrl\":\"http:\\/\\/baijiahao.baidu.com\\/u?app_id=1601149438053974\",\"item_avatar\":\"http:\\/\\/pic.rmb.bdstatic.com\\/73c41abfdd401cc72e04d64b3ce0103d.jpeg\",\"siteName\":\"北京日报客户端\"}\n",
"{\"bjh_na\":{\"easyBrowse\":\"1\",\"easyBrowseConfirm\":\"1\",\"nid\":\"10445989328771260362\",\"thread_id\":\"1038000033429039\",\"type\":\"news\"},\"eventDescription\":\"广东新增境外输入确诊病例6例广州报告1例、珠海5例\",\"eventTime\":\"1597192142\",\"eventUrl\":\"http:\\/\\/baijiahao.baidu.com\\/s?id=1674777366221789944\",\"homepageUrl\":\"https:\\/\\/baijiahao.baidu.com\\/u?app_id=1561825967470087\",\"item_avatar\":\"http:\\/\\/pic.rmb.bdstatic.com\\/bjh\\/user\\/b5b87556f7efcdf3c67fb0fd355dcaac.jpeg\",\"siteName\":\"红星新闻\"}\n",
"{\"bjh_na\":{\"easyBrowse\":\"1\",\"easyBrowseConfirm\":\"1\",\"nid\":\"10225492811636488031\",\"thread_id\":\"1005000033424867\",\"type\":\"news\"},\"eventDescription\":\"新疆报告新增新冠肺炎确诊病例9例新增无症状感染者8例\",\"eventTime\":\"1597191705\",\"eventUrl\":\"http:\\/\\/baijiahao.baidu.com\\/s?id=1674776890002663976\",\"homepageUrl\":\"https:\\/\\/baijiahao.baidu.com\\/u?app_id=1549608413453462\",\"item_avatar\":\"http:\\/\\/pic.rmb.bdstatic.com\\/89e9e167d806065171363ecbcb708a41.png@c_1,w_812,h_812,x_63,y_39\",\"siteName\":\"环球网\"}\n",
"{\"bjh_na\":{\"easyBrowse\":\"1\",\"easyBrowseConfirm\":\"1\",\"nid\":\"10687708877099983703\",\"thread_id\":\"1122000033425687\",\"type\":\"news\"},\"eventDescription\":\"上海新增境外输入4例已追踪同航班密接者106人\",\"eventTime\":\"1597190188\",\"eventUrl\":\"http:\\/\\/baijiahao.baidu.com\\/s?id=1674775298876647449\",\"homepageUrl\":\"https:\\/\\/baijiahao.baidu.com\\/u?app_id=1601149438053974\",\"item_avatar\":\"http:\\/\\/pic.rmb.bdstatic.com\\/814a110b34d52c106f9ba99c5d415478.jpeg\",\"siteName\":\"北京日报客户端\"}\n",
"{\"bjh_na\":{\"easyBrowse\":\"1\",\"easyBrowseConfirm\":\"1\",\"nid\":\"9974943368943181587\",\"thread_id\":\"1088000033375829\",\"type\":\"news\"},\"eventDescription\":\"新疆昨日新增确诊13例新增无症状感染者11例均在乌鲁木齐市\",\"eventTime\":\"1597105476\",\"eventUrl\":\"http:\\/\\/baijiahao.baidu.com\\/s?id=1674686471966488007\",\"homepageUrl\":\"https:\\/\\/baijiahao.baidu.com\\/u?app_id=1561825967470087\",\"item_avatar\":\"http:\\/\\/pic.rmb.bdstatic.com\\/bjh\\/user\\/b5b87556f7efcdf3c67fb0fd355dcaac.jpeg\",\"siteName\":\"红星新闻\"}\n",
"{\"bjh_na\":{\"easyBrowse\":\"1\",\"easyBrowseConfirm\":\"1\",\"nid\":\"9619452370238472138\",\"thread_id\":\"1007000033365024\",\"type\":\"news\"},\"eventDescription\":\"昨天上海新增8例境外输入病例均为中国籍在阿联酋工作\",\"eventTime\":\"1597103162\",\"eventUrl\":\"http:\\/\\/baijiahao.baidu.com\\/s?id=1674684045559410432\",\"homepageUrl\":\"https:\\/\\/baijiahao.baidu.com\\/u?app_id=1561825967470087\",\"item_avatar\":\"http:\\/\\/pic.rmb.bdstatic.com\\/bjh\\/user\\/b5b87556f7efcdf3c67fb0fd355dcaac.jpeg\",\"siteName\":\"红星新闻\"}\n",
"{\"bjh_na\":{\"easyBrowse\":\"1\",\"easyBrowseConfirm\":\"1\",\"nid\":\"9507350360885120833\",\"thread_id\":\"1063000033345790\",\"type\":\"news\"},\"eventDescription\":\"辽宁省新增2例境外输入确诊病例分别来自美国和菲律宾\",\"eventTime\":\"1597057109\",\"eventUrl\":\"http:\\/\\/baijiahao.baidu.com\\/s?id=1674636151291490440\",\"homepageUrl\":\"http:\\/\\/baijiahao.baidu.com\\/u?app_id=1549608413453462\",\"item_avatar\":\"http:\\/\\/timg01.bdimg.com\\/timg?pacompress=&imgtype=0&sec=1439619614&di=3fb711aa6bf5a0beda833cb57989fa42&quality=90&size=b870_10000&src=http%3A%2F%2Fpic.rmb.bdstatic.com%2F2f338c67bc166c145cd457c01e102de5.jpeg\",\"siteName\":\"环球网\"}\n",
"{\"bjh_na\":{\"easyBrowse\":\"1\",\"easyBrowseConfirm\":\"1\",\"nid\":\"10041585112433645986\",\"thread_id\":\"1009000033336073\",\"type\":\"news\"},\"eventDescription\":\"香港新增69例确诊病例 连续8天少于100例\",\"eventTime\":\"1597047600\",\"eventUrl\":\"http:\\/\\/baijiahao.baidu.com\\/s?id=1674626913670664857\",\"homepageUrl\":\"https:\\/\\/baijiahao.baidu.com\\/u?app_id=1552864910655429\",\"item_avatar\":\"http:\\/\\/pic.rmb.bdstatic.com\\/d2f919e031c47d671a5748b5aeafe096.jpeg\",\"siteName\":\"人民日报海外网\"}\n",
"{\"bjh_na\":{\"easyBrowse\":\"1\",\"easyBrowseConfirm\":\"1\",\"nid\":\"9094183316011565694\",\"thread_id\":\"1067000033325987\",\"type\":\"news\"},\"eventDescription\":\"韩国境外输入性新冠病例中检出3例病毒变异\",\"eventTime\":\"1597045994\",\"eventUrl\":\"http:\\/\\/baijiahao.baidu.com\\/s?id=1674624304511109399\",\"homepageUrl\":\"https:\\/\\/baijiahao.baidu.com\\/u?app_id=1565176801499628\",\"item_avatar\":\"http:\\/\\/pic.rmb.bdstatic.com\\/8963c44fe9bb2de078a778f71a32f6eb.jpeg\",\"siteName\":\"界面新闻\"}\n",
"{\"bjh_na\":{\"easyBrowse\":\"1\",\"easyBrowseConfirm\":\"1\",\"nid\":\"10678170579397374006\",\"thread_id\":\"1091000033323462\",\"type\":\"news\"},\"eventDescription\":\"新疆新增新冠确诊病例14例无症状感染者7例均在乌鲁木齐市\",\"eventTime\":\"1597019819\",\"eventUrl\":\"http:\\/\\/baijiahao.baidu.com\\/s?id=1674596654587658867\",\"homepageUrl\":\"https:\\/\\/baijiahao.baidu.com\\/u?app_id=1561825967470087\",\"item_avatar\":\"http:\\/\\/pic.rmb.bdstatic.com\\/bjh\\/user\\/b5b87556f7efcdf3c67fb0fd355dcaac.jpeg\",\"siteName\":\"红星新闻\"}\n",
"{\"bjh_na\":{\"easyBrowse\":\"1\",\"easyBrowseConfirm\":\"1\",\"nid\":\"10594182283454069402\",\"thread_id\":\"1117000033305820\",\"type\":\"news\"},\"eventDescription\":\"北京连续三日零新增!\",\"eventTime\":\"1597019546\",\"eventUrl\":\"http:\\/\\/baijiahao.baidu.com\\/s?id=1674596367795749792\",\"homepageUrl\":\"https:\\/\\/baijiahao.baidu.com\\/u?app_id=1601149438053974\",\"item_avatar\":\"http:\\/\\/pic.rmb.bdstatic.com\\/814a110b34d52c106f9ba99c5d415478.jpeg\",\"siteName\":\"北京日报客户端\"}\n",
"{\"bjh_na\":{\"easyBrowse\":\"1\",\"easyBrowseConfirm\":\"1\",\"nid\":\"9838778629077913962\",\"thread_id\":\"1022000033302920\",\"type\":\"news\"},\"eventDescription\":\"广东新增境外输入3例入境后即被隔离观察\",\"eventTime\":\"1597018528\",\"eventUrl\":\"http:\\/\\/baijiahao.baidu.com\\/s?id=1674595300038757825\",\"homepageUrl\":\"https:\\/\\/baijiahao.baidu.com\\/u?app_id=1601149438053974\",\"item_avatar\":\"http:\\/\\/pic.rmb.bdstatic.com\\/814a110b34d52c106f9ba99c5d415478.jpeg\",\"siteName\":\"北京日报客户端\"}\n",
"{\"bjh_na\":{\"easyBrowse\":\"1\",\"easyBrowseConfirm\":\"1\",\"nid\":\"9564560403354013811\",\"thread_id\":\"1018000033303382\",\"type\":\"news\"},\"eventDescription\":\"上海昨日无新增本地新冠肺炎确诊病例新增境外输入18例\",\"eventTime\":\"1597016945\",\"eventUrl\":\"http:\\/\\/baijiahao.baidu.com\\/s?id=1674593640123663447\",\"homepageUrl\":\"https:\\/\\/baijiahao.baidu.com\\/u?app_id=1549608413453462\",\"item_avatar\":\"http:\\/\\/pic.rmb.bdstatic.com\\/89e9e167d806065171363ecbcb708a41.png@c_1,w_812,h_812,x_63,y_39\",\"siteName\":\"环球网\"}\n",
"{\"bjh_na\":{\"easyBrowse\":\"1\",\"easyBrowseConfirm\":\"1\",\"nid\":\"10189864705033416741\",\"thread_id\":\"1051000033311081\",\"type\":\"news\"},\"eventDescription\":\"重庆昨日新增无症状感染者1例为新加坡输入\",\"eventTime\":\"1597015727\",\"eventUrl\":\"http:\\/\\/baijiahao.baidu.com\\/s?id=1674592612999522909\",\"homepageUrl\":\"https:\\/\\/baijiahao.baidu.com\\/u?app_id=1549608413453462\",\"item_avatar\":\"http:\\/\\/pic.rmb.bdstatic.com\\/89e9e167d806065171363ecbcb708a41.png@c_1,w_812,h_812,x_63,y_39\",\"siteName\":\"环球网\"}\n",
"{\"bjh_na\":{\"easyBrowse\":\"1\",\"easyBrowseConfirm\":\"1\",\"nid\":\"9484762286110882343\",\"thread_id\":\"1031000033318117\",\"type\":\"news\"},\"eventDescription\":\"一文读懂全球疫情全球累计确诊超1999万例 美国暴发沙门氏菌疫情\",\"eventTime\":\"1597013216\",\"eventUrl\":\"http:\\/\\/baijiahao.baidu.com\\/s?id=1674590351892834223\",\"homepageUrl\":\"https:\\/\\/baijiahao.baidu.com\\/u?app_id=1552864910655429\",\"item_avatar\":\"http:\\/\\/pic.rmb.bdstatic.com\\/d2f919e031c47d671a5748b5aeafe096.jpeg\",\"siteName\":\"人民日报海外网\"}\n",
"{\"bjh_na\":{\"easyBrowse\":\"1\",\"easyBrowseConfirm\":\"1\",\"nid\":\"10031387837680691121\",\"thread_id\":\"1047000033289493\",\"type\":\"news\"},\"eventDescription\":\"香港新增72例新冠确诊病例连续7日少于100例\",\"eventTime\":\"1596962023\",\"eventUrl\":\"http:\\/\\/baijiahao.baidu.com\\/s?id=1674536151578789920\",\"homepageUrl\":\"https:\\/\\/baijiahao.baidu.com\\/u?app_id=1565176801499628\",\"item_avatar\":\"http:\\/\\/pic.rmb.bdstatic.com\\/8963c44fe9bb2de078a778f71a32f6eb.jpeg\",\"siteName\":\"界面新闻\"}\n",
"{\"bjh_na\":{\"easyBrowse\":\"1\",\"easyBrowseConfirm\":\"1\",\"nid\":\"8870194084231278988\",\"thread_id\":\"1032000033266987\",\"type\":\"news\"},\"eventDescription\":\"新疆新增确诊病例15例均在乌鲁木齐\",\"eventTime\":\"1596933298\",\"eventUrl\":\"http:\\/\\/baijiahao.baidu.com\\/s?id=1674505943705180172\",\"homepageUrl\":\"https:\\/\\/baijiahao.baidu.com\\/u?app_id=1566453612428800\",\"item_avatar\":\"http:\\/\\/pic.rmb.bdstatic.com\\/9da74a517eb1befeba93a5f3167cc74b.jpeg\",\"siteName\":\"新京报\"}\n",
"{\"bjh_na\":{\"easyBrowse\":\"1\",\"easyBrowseConfirm\":\"1\",\"nid\":\"9364628799509405044\",\"thread_id\":\"1122000033252294\",\"type\":\"news\"},\"eventDescription\":\"31省区市新增确诊病例23例其中本土病例15例均在新疆\",\"eventTime\":\"1596930618\",\"eventUrl\":\"http:\\/\\/baijiahao.baidu.com\\/s?id=1674503120087258676\",\"homepageUrl\":\"https:\\/\\/baijiahao.baidu.com\\/u?app_id=1589908044211516\",\"item_avatar\":\"http:\\/\\/pic.rmb.bdstatic.com\\/ec0438088a2c4589fb541fbc12b448f0.jpeg\",\"siteName\":\"健康中国\"}\n",
"{\"bjh_na\":{\"easyBrowse\":\"1\",\"easyBrowseConfirm\":\"1\",\"nid\":\"9965173851761410585\",\"thread_id\":\"1002000033232184\",\"type\":\"news\"},\"eventDescription\":\"香港新增69例新冠确诊病例累计超过4000例\",\"eventTime\":\"1596875762\",\"eventUrl\":\"http:\\/\\/baijiahao.baidu.com\\/s?id=1674445865981756049\",\"homepageUrl\":\"https:\\/\\/baijiahao.baidu.com\\/u?app_id=1565176801499628\",\"item_avatar\":\"http:\\/\\/pic.rmb.bdstatic.com\\/8963c44fe9bb2de078a778f71a32f6eb.jpeg\",\"siteName\":\"界面新闻\"}\n",
"{\"bjh_na\":{\"easyBrowse\":\"1\",\"easyBrowseConfirm\":\"1\",\"nid\":\"10269292514474689323\",\"thread_id\":\"1005000033194943\",\"type\":\"news\"},\"eventDescription\":\"截至8月7日24时新型冠状病毒肺炎疫情最新情况\",\"eventTime\":\"1596848726\",\"eventUrl\":\"http:\\/\\/baijiahao.baidu.com\\/s?id=1674417127284939659\",\"homepageUrl\":\"https:\\/\\/baijiahao.baidu.com\\/u?app_id=1589908044211516\",\"item_avatar\":\"http:\\/\\/pic.rmb.bdstatic.com\\/ec0438088a2c4589fb541fbc12b448f0.jpeg\",\"siteName\":\"健康中国\"}\n",
"{\"bjh_na\":{\"easyBrowse\":\"1\",\"easyBrowseConfirm\":\"1\",\"nid\":\"9285567187511393140\",\"thread_id\":\"1087000033199166\",\"type\":\"news\"},\"eventDescription\":\"北京8月7日无新增报告新冠肺炎确诊病例\",\"eventTime\":\"1596846935\",\"eventUrl\":\"http:\\/\\/baijiahao.baidu.com\\/s?id=1674415443327545029\",\"homepageUrl\":\"https:\\/\\/baijiahao.baidu.com\\/u?app_id=1636203306265037\",\"item_avatar\":\"http:\\/\\/pic.rmb.bdstatic.com\\/10c0099fe5c2ef55fbd758a03a6ecafa.jpeg@c_1,w_300,h_300,x_0,y_0\",\"siteName\":\"青瞳视角\"}\n",
"{\"bjh_na\":{\"easyBrowse\":\"1\",\"easyBrowseConfirm\":\"1\",\"nid\":\"9141704573616219966\",\"thread_id\":\"1055000033207982\",\"type\":\"news\"},\"eventDescription\":\"新疆新增确诊病例25例、无症状感染者8例\",\"eventTime\":\"1596846320\",\"eventUrl\":\"http:\\/\\/baijiahao.baidu.com\\/s?id=1674414780683115272\",\"homepageUrl\":\"https:\\/\\/baijiahao.baidu.com\\/u?app_id=1566453612428800\",\"item_avatar\":\"http:\\/\\/pic.rmb.bdstatic.com\\/9da74a517eb1befeba93a5f3167cc74b.jpeg\",\"siteName\":\"新京报\"}\n",
"{\"bjh_na\":{\"easyBrowse\":\"1\",\"easyBrowseConfirm\":\"1\",\"nid\":\"8385504052586009526\",\"thread_id\":\"1049000033197879\",\"type\":\"news\"},\"eventDescription\":\"辽宁连续两日无新增本土确诊病例 新增治愈出院3例\",\"eventTime\":\"1596844828\",\"eventUrl\":\"http:\\/\\/baijiahao.baidu.com\\/s?id=1674413542605070060\",\"homepageUrl\":\"https:\\/\\/baijiahao.baidu.com\\/u?app_id=1549608413453462\",\"item_avatar\":\"http:\\/\\/pic.rmb.bdstatic.com\\/89e9e167d806065171363ecbcb708a41.png@c_1,w_812,h_812,x_63,y_39\",\"siteName\":\"环球网\"}\n",
"{\"bjh_na\":{\"easyBrowse\":\"1\",\"easyBrowseConfirm\":\"1\",\"nid\":\"10340470323184094852\",\"thread_id\":\"1006000033182361\",\"type\":\"news\"},\"eventDescription\":\"新疆目前疫情形势如何?本轮疫情有何特点?最新解读来了\",\"eventTime\":\"1596812732\",\"eventUrl\":\"http:\\/\\/baijiahao.baidu.com\\/s?id=1674379507756801699\",\"homepageUrl\":\"https:\\/\\/baijiahao.baidu.com\\/u?app_id=1601149438053974\",\"item_avatar\":\"http:\\/\\/pic.rmb.bdstatic.com\\/814a110b34d52c106f9ba99c5d415478.jpeg\",\"siteName\":\"北京日报客户端\"}\n",
"{\"bjh_na\":{\"easyBrowse\":\"1\",\"easyBrowseConfirm\":\"1\",\"nid\":\"9212218199886086839\",\"thread_id\":\"1073000033176131\",\"type\":\"news\"},\"eventDescription\":\"对话地坛医院专家陈志海:半年抗疫 “新冠”刷新认知\",\"eventTime\":\"1596800212\",\"eventUrl\":\"http:\\/\\/baijiahao.baidu.com\\/s?id=1674366411525373651\",\"homepageUrl\":\"https:\\/\\/baijiahao.baidu.com\\/u?app_id=1566453612428800\",\"item_avatar\":\"http:\\/\\/pic.rmb.bdstatic.com\\/9da74a517eb1befeba93a5f3167cc74b.jpeg\",\"siteName\":\"新京报\"}\n",
"{\"bjh_na\":{\"easyBrowse\":\"1\",\"easyBrowseConfirm\":\"1\",\"nid\":\"9522441535993403492\",\"thread_id\":\"1112000033153621\",\"type\":\"news\"},\"eventDescription\":\"林郑月娥:港府将免费为全港市民进行新冠病毒检测\",\"eventTime\":\"1596785041\",\"eventUrl\":\"http:\\/\\/baijiahao.baidu.com\\/s?id=1674350469425444439\",\"homepageUrl\":\"https:\\/\\/baijiahao.baidu.com\\/u?app_id=1552864910655429\",\"item_avatar\":\"http:\\/\\/pic.rmb.bdstatic.com\\/d2f919e031c47d671a5748b5aeafe096.jpeg\",\"siteName\":\"人民日报海外网\"}\n",
"{\"bjh_na\":{\"easyBrowse\":\"1\",\"easyBrowseConfirm\":\"1\",\"nid\":\"8873266033734356254\",\"thread_id\":\"1040000033143344\",\"type\":\"news\"},\"eventDescription\":\"北京新增大连疫情关联病例1例新发地疫情病例清零\",\"eventTime\":\"1596760416\",\"eventUrl\":\"http:\\/\\/baijiahao.baidu.com\\/s?id=1674324650362213371\",\"homepageUrl\":\"https:\\/\\/baijiahao.baidu.com\\/u?app_id=1601149438053974\",\"item_avatar\":\"http:\\/\\/pic.rmb.bdstatic.com\\/814a110b34d52c106f9ba99c5d415478.jpeg\",\"siteName\":\"北京日报客户端\"}\n",
"{\"bjh_na\":{\"easyBrowse\":\"1\",\"easyBrowseConfirm\":\"1\",\"nid\":\"9716904130394627526\",\"thread_id\":\"1027000033142735\",\"type\":\"news\"},\"eventDescription\":\"31省市区新增确诊37例其中本土病例27例新疆26例 北京1例\",\"eventTime\":\"1596760201\",\"eventUrl\":\"http:\\/\\/baijiahao.baidu.com\\/s?id=1674324373876029599\",\"homepageUrl\":\"https:\\/\\/baijiahao.baidu.com\\/u?app_id=1589908044211516\",\"item_avatar\":\"http:\\/\\/pic.rmb.bdstatic.com\\/ec0438088a2c4589fb541fbc12b448f0.jpeg\",\"siteName\":\"健康中国\"}\n",
"{\"bjh_na\":{\"easyBrowse\":\"1\",\"easyBrowseConfirm\":\"1\",\"nid\":\"9549964897517921046\",\"thread_id\":\"1033000033139415\",\"type\":\"news\"},\"eventDescription\":\"新疆含兵团新增确诊病例26例新增无症状感染者10例\",\"eventTime\":\"1596759965\",\"eventUrl\":\"http:\\/\\/baijiahao.baidu.com\\/s?id=1674324177292404207\",\"homepageUrl\":\"https:\\/\\/baijiahao.baidu.com\\/u?app_id=1561825967470087\",\"item_avatar\":\"http:\\/\\/pic.rmb.bdstatic.com\\/bjh\\/user\\/b5b87556f7efcdf3c67fb0fd355dcaac.jpeg\",\"siteName\":\"红星新闻\"}\n",
"eventTime: , eventDescription: 北京连续6日零新增, siteName: 北京日报客户端, eventUrl: http://baijiahao.baidu.com/s?id=1674868131526596687, homepageUrl: https://baijiahao.baidu.com/u?app_id=1601149438053974, item_avatar: http://pic.rmb.bdstatic.com/814a110b34d52c106f9ba99c5d415478.jpeg\n",
"++++++++++++++++++++++++++++++++++++++++++++++++++\n",
"eventTime: , eventDescription: 国家卫健委昨日新增确诊病例19例 本土病例8例, siteName: 环球时报, eventUrl: http://baijiahao.baidu.com/s?id=1674867651765148223, homepageUrl: https://baijiahao.baidu.com/u?app_id=1551968238585112, item_avatar: http://pic.rmb.bdstatic.com/07feda26036edb976980ce1e803de532.jpeg@c_1,w_640,h_640,x_0,y_0\n",
"++++++++++++++++++++++++++++++++++++++++++++++++++\n",
"eventTime: , eventDescription: 新疆含兵团新增新冠肺炎确诊病例8例 均在乌鲁木齐市, siteName: 环球网, eventUrl: http://baijiahao.baidu.com/s?id=1674867007109286633, homepageUrl: https://baijiahao.baidu.com/u?app_id=1549608413453462, item_avatar: http://pic.rmb.bdstatic.com/89e9e167d806065171363ecbcb708a41.png@c_1,w_812,h_812,x_63,y_39\n",
"++++++++++++++++++++++++++++++++++++++++++++++++++\n",
"eventTime: , eventDescription: 上海昨日无新增本地新冠肺炎确诊病例新增境外输入2例, siteName: 环球网, eventUrl: http://baijiahao.baidu.com/s?id=1674865718710262411, homepageUrl: https://baijiahao.baidu.com/u?app_id=1549608413453462, item_avatar: http://pic.rmb.bdstatic.com/89e9e167d806065171363ecbcb708a41.png@c_1,w_812,h_812,x_63,y_39\n",
"++++++++++++++++++++++++++++++++++++++++++++++++++\n",
"eventTime: , eventDescription: 一境外输入确诊患者治愈数月后复阳 目前在上海隔离诊治, siteName: 新京报, eventUrl: http://baijiahao.baidu.com/s?id=1674823034158688355, homepageUrl: https://baijiahao.baidu.com/u?app_id=1566453612428800, item_avatar: http://pic.rmb.bdstatic.com/9da74a517eb1befeba93a5f3167cc74b.jpeg\n",
"++++++++++++++++++++++++++++++++++++++++++++++++++\n",
"eventTime: , eventDescription: 香港新增62例确诊病例 连续10天少于100例, siteName: 人民日报海外网, eventUrl: http://baijiahao.baidu.com/s?id=1674808294135893351, homepageUrl: https://baijiahao.baidu.com/u?app_id=1552864910655429, item_avatar: http://pic.rmb.bdstatic.com/d2f919e031c47d671a5748b5aeafe096.jpeg\n",
"++++++++++++++++++++++++++++++++++++++++++++++++++\n",
"eventTime: , eventDescription: 安徽芜湖一餐饮店进口冻虾新冠病毒检出疑似阳性, siteName: 央视新闻, eventUrl: http://baijiahao.baidu.com/s?id=1674804105747106594, homepageUrl: http://baijiahao.baidu.com/u?app_id=1570168240515616, item_avatar: http://pic.rmb.bdstatic.com/0026e9a191787761348be081d428a354.jpeg\n",
"++++++++++++++++++++++++++++++++++++++++++++++++++\n",
"eventTime: , eventDescription: 国家卫健委昨日新增确诊25例其中本土病例9例均在新疆, siteName: 红星新闻, eventUrl: http://baijiahao.baidu.com/s?id=1674772359564935639, homepageUrl: https://baijiahao.baidu.com/u?app_id=1561825967470087, item_avatar: http://pic.rmb.bdstatic.com/bjh/user/b5b87556f7efcdf3c67fb0fd355dcaac.jpeg\n",
"++++++++++++++++++++++++++++++++++++++++++++++++++\n",
"eventTime: , eventDescription: 北京连续5天无新增确诊病例, siteName: 北京日报客户端, eventUrl: http://baijiahao.baidu.com/s?id=1674777629341250609, homepageUrl: http://baijiahao.baidu.com/u?app_id=1601149438053974, item_avatar: http://pic.rmb.bdstatic.com/73c41abfdd401cc72e04d64b3ce0103d.jpeg\n",
"++++++++++++++++++++++++++++++++++++++++++++++++++\n",
"eventTime: , eventDescription: 广东新增境外输入确诊病例6例广州报告1例、珠海5例, siteName: 红星新闻, eventUrl: http://baijiahao.baidu.com/s?id=1674777366221789944, homepageUrl: https://baijiahao.baidu.com/u?app_id=1561825967470087, item_avatar: http://pic.rmb.bdstatic.com/bjh/user/b5b87556f7efcdf3c67fb0fd355dcaac.jpeg\n",
"++++++++++++++++++++++++++++++++++++++++++++++++++\n",
"eventTime: , eventDescription: 新疆报告新增新冠肺炎确诊病例9例新增无症状感染者8例, siteName: 环球网, eventUrl: http://baijiahao.baidu.com/s?id=1674776890002663976, homepageUrl: https://baijiahao.baidu.com/u?app_id=1549608413453462, item_avatar: http://pic.rmb.bdstatic.com/89e9e167d806065171363ecbcb708a41.png@c_1,w_812,h_812,x_63,y_39\n",
"++++++++++++++++++++++++++++++++++++++++++++++++++\n",
"eventTime: , eventDescription: 上海新增境外输入4例已追踪同航班密接者106人, siteName: 北京日报客户端, eventUrl: http://baijiahao.baidu.com/s?id=1674775298876647449, homepageUrl: https://baijiahao.baidu.com/u?app_id=1601149438053974, item_avatar: http://pic.rmb.bdstatic.com/814a110b34d52c106f9ba99c5d415478.jpeg\n",
"++++++++++++++++++++++++++++++++++++++++++++++++++\n",
"eventTime: , eventDescription: 新疆昨日新增确诊13例新增无症状感染者11例均在乌鲁木齐市, siteName: 红星新闻, eventUrl: http://baijiahao.baidu.com/s?id=1674686471966488007, homepageUrl: https://baijiahao.baidu.com/u?app_id=1561825967470087, item_avatar: http://pic.rmb.bdstatic.com/bjh/user/b5b87556f7efcdf3c67fb0fd355dcaac.jpeg\n",
"++++++++++++++++++++++++++++++++++++++++++++++++++\n",
"eventTime: , eventDescription: 昨天上海新增8例境外输入病例均为中国籍在阿联酋工作, siteName: 红星新闻, eventUrl: http://baijiahao.baidu.com/s?id=1674684045559410432, homepageUrl: https://baijiahao.baidu.com/u?app_id=1561825967470087, item_avatar: http://pic.rmb.bdstatic.com/bjh/user/b5b87556f7efcdf3c67fb0fd355dcaac.jpeg\n",
"++++++++++++++++++++++++++++++++++++++++++++++++++\n",
"eventTime: , eventDescription: 辽宁省新增2例境外输入确诊病例分别来自美国和菲律宾, siteName: 环球网, eventUrl: http://baijiahao.baidu.com/s?id=1674636151291490440, homepageUrl: http://baijiahao.baidu.com/u?app_id=1549608413453462, item_avatar: http://timg01.bdimg.com/timg?pacompress=&imgtype=0&sec=1439619614&di=3fb711aa6bf5a0beda833cb57989fa42&quality=90&size=b870_10000&src=http%3A%2F%2Fpic.rmb.bdstatic.com%2F2f338c67bc166c145cd457c01e102de5.jpeg\n",
"++++++++++++++++++++++++++++++++++++++++++++++++++\n",
"eventTime: , eventDescription: 香港新增69例确诊病例 连续8天少于100例, siteName: 人民日报海外网, eventUrl: http://baijiahao.baidu.com/s?id=1674626913670664857, homepageUrl: https://baijiahao.baidu.com/u?app_id=1552864910655429, item_avatar: http://pic.rmb.bdstatic.com/d2f919e031c47d671a5748b5aeafe096.jpeg\n",
"++++++++++++++++++++++++++++++++++++++++++++++++++\n",
"eventTime: , eventDescription: 韩国境外输入性新冠病例中检出3例病毒变异, siteName: 界面新闻, eventUrl: http://baijiahao.baidu.com/s?id=1674624304511109399, homepageUrl: https://baijiahao.baidu.com/u?app_id=1565176801499628, item_avatar: http://pic.rmb.bdstatic.com/8963c44fe9bb2de078a778f71a32f6eb.jpeg\n",
"++++++++++++++++++++++++++++++++++++++++++++++++++\n",
"eventTime: , eventDescription: 新疆新增新冠确诊病例14例无症状感染者7例均在乌鲁木齐市, siteName: 红星新闻, eventUrl: http://baijiahao.baidu.com/s?id=1674596654587658867, homepageUrl: https://baijiahao.baidu.com/u?app_id=1561825967470087, item_avatar: http://pic.rmb.bdstatic.com/bjh/user/b5b87556f7efcdf3c67fb0fd355dcaac.jpeg\n",
"++++++++++++++++++++++++++++++++++++++++++++++++++\n",
"eventTime: , eventDescription: 北京连续三日零新增!, siteName: 北京日报客户端, eventUrl: http://baijiahao.baidu.com/s?id=1674596367795749792, homepageUrl: https://baijiahao.baidu.com/u?app_id=1601149438053974, item_avatar: http://pic.rmb.bdstatic.com/814a110b34d52c106f9ba99c5d415478.jpeg\n",
"++++++++++++++++++++++++++++++++++++++++++++++++++\n",
"eventTime: , eventDescription: 广东新增境外输入3例入境后即被隔离观察, siteName: 北京日报客户端, eventUrl: http://baijiahao.baidu.com/s?id=1674595300038757825, homepageUrl: https://baijiahao.baidu.com/u?app_id=1601149438053974, item_avatar: http://pic.rmb.bdstatic.com/814a110b34d52c106f9ba99c5d415478.jpeg\n",
"++++++++++++++++++++++++++++++++++++++++++++++++++\n",
"eventTime: , eventDescription: 上海昨日无新增本地新冠肺炎确诊病例新增境外输入18例, siteName: 环球网, eventUrl: http://baijiahao.baidu.com/s?id=1674593640123663447, homepageUrl: https://baijiahao.baidu.com/u?app_id=1549608413453462, item_avatar: http://pic.rmb.bdstatic.com/89e9e167d806065171363ecbcb708a41.png@c_1,w_812,h_812,x_63,y_39\n",
"++++++++++++++++++++++++++++++++++++++++++++++++++\n",
"eventTime: , eventDescription: 重庆昨日新增无症状感染者1例为新加坡输入, siteName: 环球网, eventUrl: http://baijiahao.baidu.com/s?id=1674592612999522909, homepageUrl: https://baijiahao.baidu.com/u?app_id=1549608413453462, item_avatar: http://pic.rmb.bdstatic.com/89e9e167d806065171363ecbcb708a41.png@c_1,w_812,h_812,x_63,y_39\n",
"++++++++++++++++++++++++++++++++++++++++++++++++++\n",
"eventTime: , eventDescription: 一文读懂全球疫情全球累计确诊超1999万例 美国暴发沙门氏菌疫情, siteName: 人民日报海外网, eventUrl: http://baijiahao.baidu.com/s?id=1674590351892834223, homepageUrl: https://baijiahao.baidu.com/u?app_id=1552864910655429, item_avatar: http://pic.rmb.bdstatic.com/d2f919e031c47d671a5748b5aeafe096.jpeg\n",
"++++++++++++++++++++++++++++++++++++++++++++++++++\n",
"eventTime: , eventDescription: 香港新增72例新冠确诊病例连续7日少于100例, siteName: 界面新闻, eventUrl: http://baijiahao.baidu.com/s?id=1674536151578789920, homepageUrl: https://baijiahao.baidu.com/u?app_id=1565176801499628, item_avatar: http://pic.rmb.bdstatic.com/8963c44fe9bb2de078a778f71a32f6eb.jpeg\n",
"++++++++++++++++++++++++++++++++++++++++++++++++++\n",
"eventTime: , eventDescription: 新疆新增确诊病例15例均在乌鲁木齐, siteName: 新京报, eventUrl: http://baijiahao.baidu.com/s?id=1674505943705180172, homepageUrl: https://baijiahao.baidu.com/u?app_id=1566453612428800, item_avatar: http://pic.rmb.bdstatic.com/9da74a517eb1befeba93a5f3167cc74b.jpeg\n",
"++++++++++++++++++++++++++++++++++++++++++++++++++\n",
"eventTime: , eventDescription: 31省区市新增确诊病例23例其中本土病例15例均在新疆, siteName: 健康中国, eventUrl: http://baijiahao.baidu.com/s?id=1674503120087258676, homepageUrl: https://baijiahao.baidu.com/u?app_id=1589908044211516, item_avatar: http://pic.rmb.bdstatic.com/ec0438088a2c4589fb541fbc12b448f0.jpeg\n",
"++++++++++++++++++++++++++++++++++++++++++++++++++\n",
"eventTime: , eventDescription: 香港新增69例新冠确诊病例累计超过4000例, siteName: 界面新闻, eventUrl: http://baijiahao.baidu.com/s?id=1674445865981756049, homepageUrl: https://baijiahao.baidu.com/u?app_id=1565176801499628, item_avatar: http://pic.rmb.bdstatic.com/8963c44fe9bb2de078a778f71a32f6eb.jpeg\n",
"++++++++++++++++++++++++++++++++++++++++++++++++++\n",
"eventTime: , eventDescription: 截至8月7日24时新型冠状病毒肺炎疫情最新情况, siteName: 健康中国, eventUrl: http://baijiahao.baidu.com/s?id=1674417127284939659, homepageUrl: https://baijiahao.baidu.com/u?app_id=1589908044211516, item_avatar: http://pic.rmb.bdstatic.com/ec0438088a2c4589fb541fbc12b448f0.jpeg\n",
"++++++++++++++++++++++++++++++++++++++++++++++++++\n",
"eventTime: , eventDescription: 北京8月7日无新增报告新冠肺炎确诊病例, siteName: 青瞳视角, eventUrl: http://baijiahao.baidu.com/s?id=1674415443327545029, homepageUrl: https://baijiahao.baidu.com/u?app_id=1636203306265037, item_avatar: http://pic.rmb.bdstatic.com/10c0099fe5c2ef55fbd758a03a6ecafa.jpeg@c_1,w_300,h_300,x_0,y_0\n",
"++++++++++++++++++++++++++++++++++++++++++++++++++\n",
"eventTime: , eventDescription: 新疆新增确诊病例25例、无症状感染者8例, siteName: 新京报, eventUrl: http://baijiahao.baidu.com/s?id=1674414780683115272, homepageUrl: https://baijiahao.baidu.com/u?app_id=1566453612428800, item_avatar: http://pic.rmb.bdstatic.com/9da74a517eb1befeba93a5f3167cc74b.jpeg\n",
"++++++++++++++++++++++++++++++++++++++++++++++++++\n",
"eventTime: , eventDescription: 辽宁连续两日无新增本土确诊病例 新增治愈出院3例, siteName: 环球网, eventUrl: http://baijiahao.baidu.com/s?id=1674413542605070060, homepageUrl: https://baijiahao.baidu.com/u?app_id=1549608413453462, item_avatar: http://pic.rmb.bdstatic.com/89e9e167d806065171363ecbcb708a41.png@c_1,w_812,h_812,x_63,y_39\n",
"++++++++++++++++++++++++++++++++++++++++++++++++++\n",
"eventTime: , eventDescription: 新疆目前疫情形势如何?本轮疫情有何特点?最新解读来了, siteName: 北京日报客户端, eventUrl: http://baijiahao.baidu.com/s?id=1674379507756801699, homepageUrl: https://baijiahao.baidu.com/u?app_id=1601149438053974, item_avatar: http://pic.rmb.bdstatic.com/814a110b34d52c106f9ba99c5d415478.jpeg\n",
"++++++++++++++++++++++++++++++++++++++++++++++++++\n",
"eventTime: , eventDescription: 对话地坛医院专家陈志海:半年抗疫 “新冠”刷新认知, siteName: 新京报, eventUrl: http://baijiahao.baidu.com/s?id=1674366411525373651, homepageUrl: https://baijiahao.baidu.com/u?app_id=1566453612428800, item_avatar: http://pic.rmb.bdstatic.com/9da74a517eb1befeba93a5f3167cc74b.jpeg\n",
"++++++++++++++++++++++++++++++++++++++++++++++++++\n",
"eventTime: , eventDescription: 林郑月娥:港府将免费为全港市民进行新冠病毒检测, siteName: 人民日报海外网, eventUrl: http://baijiahao.baidu.com/s?id=1674350469425444439, homepageUrl: https://baijiahao.baidu.com/u?app_id=1552864910655429, item_avatar: http://pic.rmb.bdstatic.com/d2f919e031c47d671a5748b5aeafe096.jpeg\n",
"++++++++++++++++++++++++++++++++++++++++++++++++++\n",
"eventTime: , eventDescription: 北京新增大连疫情关联病例1例新发地疫情病例清零, siteName: 北京日报客户端, eventUrl: http://baijiahao.baidu.com/s?id=1674324650362213371, homepageUrl: https://baijiahao.baidu.com/u?app_id=1601149438053974, item_avatar: http://pic.rmb.bdstatic.com/814a110b34d52c106f9ba99c5d415478.jpeg\n",
"++++++++++++++++++++++++++++++++++++++++++++++++++\n",
"eventTime: , eventDescription: 31省市区新增确诊37例其中本土病例27例新疆26例 北京1例, siteName: 健康中国, eventUrl: http://baijiahao.baidu.com/s?id=1674324373876029599, homepageUrl: https://baijiahao.baidu.com/u?app_id=1589908044211516, item_avatar: http://pic.rmb.bdstatic.com/ec0438088a2c4589fb541fbc12b448f0.jpeg\n",
"++++++++++++++++++++++++++++++++++++++++++++++++++\n",
"eventTime: 1596759965, eventDescription: 新疆含兵团新增确诊病例26例新增无症状感染者10例, siteName: 红星新闻, eventUrl: http://baijiahao.baidu.com/s?id=1674324177292404207, homepageUrl: https://baijiahao.baidu.com/u?app_id=1561825967470087, item_avatar: http://pic.rmb.bdstatic.com/bjh/user/b5b87556f7efcdf3c67fb0fd355dcaac.jpeg\n",
"++++++++++++++++++++++++++++++++++++++++++++++++++\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"E:\\anaconda3\\lib\\site-packages\\ipykernel_launcher.py:20: DeprecationWarning: invalid escape sequence '\\/'\n"
]
}
],
"source": [
"import requests\n",
"from bs4 import BeautifulSoup\n",
"import re \n",
"import json \n",
"# 定义实时国内新闻类\n",
"class insideNews: \n",
" def __init__(self): \n",
" insidenews.eventTime = ''\n",
" self.eventDescription = '' \n",
" self.siteName = '' \n",
" self.eventUrl = '' \n",
" self.homepageUrl = ''\n",
" self.item_avatar = ''\n",
" \n",
" def __str__(self): \n",
" return 'eventTime: %s, eventDescription: %s, siteName: %s, eventUrl: %s, homepageUrl: %s, item_avatar: %s' % (self.eventTime, self.eventDescription, self.siteName, self.eventUrl, self.homepageUrl, self.item_avatar)\n",
"\n",
" \n",
"response = requests.get('https://opendata.baidu.com/data/inner?tn=reserved_all_res_tn&dspName=iphone&from_sf=1&dsp=iphone&resource_id=28565&alr=1&query=%E8%82%BA%E7%82%8E&cb=jsonp_1597231600668_30527&qq-pf-to=pcqq.group') #发送get请求获得目标服务器相应 \n",
"html = response.content.decode('unicode-escape')# 解码 \n",
"soup = BeautifulSoup(html)# 构建soup对象 \n",
"tag = soup.find('p')# 查找指定标签\n",
"tagstr = tag.string# 转换成字符串\n",
"results = re.findall(r'(\\{\"bjh_na\".*?\"eventDescription\":.*?\\})', tagstr)# 使用正则表达式查找所有的实时新闻\n",
"\n",
"all_insidenews = [] \n",
"for item in results:\n",
" insidenews = insideNews() \n",
" print(item)\n",
" itemjson = json.loads(item) \n",
" insidenews.eventTime = itemjson['eventTime']\n",
" insidenews.eventDescription = itemjson['eventDescription'] \n",
" insidenews.siteName = itemjson['siteName'] \n",
" insidenews.eventUrl = itemjson['eventUrl'] \n",
" insidenews.homepageUrl = itemjson['homepageUrl'] \n",
" insidenews.item_avatar = itemjson['item_avatar'] \n",
" \n",
" all_insidenews.append(insidenews) \n",
" \n",
"for insidenews in all_insidenews: \n",
" print(insidenews) \n",
" print('+++++' * 10)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# 获取国外疫情实时滚动新闻播报数据"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"eventTime: 1597275291, eventDescription: 日增5.5万巴西新冠肺炎确诊病例超316万例, siteName: 人民日报, eventUrl: http://baijiahao.baidu.com/s?id=1674864544431254391, homepageUrl: https://baijiahao.baidu.com/u?app_id=1593743208952652, item_avatar: http://pic.rmb.bdstatic.com/53864a5e77b735000396c815760f7b4c.jpeg\n",
"++++++++++++++++++++++++++++++++++++++++++++++++++\n",
"eventTime: 1597273097, eventDescription: 一文读懂全球疫情全球累计确诊超2076万例 美国现另一场公共卫生危机, siteName: 人民日报海外网, eventUrl: http://baijiahao.baidu.com/s?id=1674862818784287730, homepageUrl: https://baijiahao.baidu.com/u?app_id=1552864910655429, item_avatar: http://pic.rmb.bdstatic.com/d2f919e031c47d671a5748b5aeafe096.jpeg\n",
"++++++++++++++++++++++++++++++++++++++++++++++++++\n",
"eventTime: 1597272142, eventDescription: 「战疫全时区」美国单日新增确诊病例逾4.8万例 累计超535万例, siteName: 人民日报海外网, eventUrl: http://baijiahao.baidu.com/s?id=1674861714204253861, homepageUrl: https://baijiahao.baidu.com/u?app_id=1552864910655429, item_avatar: http://pic.rmb.bdstatic.com/d2f919e031c47d671a5748b5aeafe096.jpeg\n",
"++++++++++++++++++++++++++++++++++++++++++++++++++\n",
"eventTime: 1597217570, eventDescription: 「战疫全时区」俄罗斯新增5102例确诊病例 累计超90万例, siteName: 人民日报海外网, eventUrl: http://baijiahao.baidu.com/s?id=1674804011129533783, homepageUrl: https://baijiahao.baidu.com/u?app_id=1552864910655429, item_avatar: http://pic.rmb.bdstatic.com/d2f919e031c47d671a5748b5aeafe096.jpeg\n",
"++++++++++++++++++++++++++++++++++++++++++++++++++\n",
"eventTime: 1597187332, eventDescription: 美国确诊超513万死亡超16.4万例超38万儿童感染, siteName: 北京日报客户端, eventUrl: http://baijiahao.baidu.com/s?id=1674772303894901158, homepageUrl: https://baijiahao.baidu.com/u?app_id=1601149438053974, item_avatar: http://pic.rmb.bdstatic.com/814a110b34d52c106f9ba99c5d415478.jpeg\n",
"++++++++++++++++++++++++++++++++++++++++++++++++++\n",
"eventTime: 1597186109, eventDescription: 一文读懂全球疫情全球累计确诊超2048万例 俄罗斯注册首款新冠疫苗, siteName: 人民日报海外网, eventUrl: http://baijiahao.baidu.com/s?id=1674771802796395216, homepageUrl: https://baijiahao.baidu.com/u?app_id=1552864910655429, item_avatar: http://pic.rmb.bdstatic.com/d2f919e031c47d671a5748b5aeafe096.jpeg\n",
"++++++++++++++++++++++++++++++++++++++++++++++++++\n",
"eventTime: 1597108712, eventDescription: 全球2000万+!确诊数前三的国家,疫情形势怎么样了?, siteName: 北京日报客户端, eventUrl: http://baijiahao.baidu.com/s?id=1674689865386982063, homepageUrl: https://baijiahao.baidu.com/u?app_id=1601149438053974, item_avatar: http://pic.rmb.bdstatic.com/814a110b34d52c106f9ba99c5d415478.jpeg\n",
"++++++++++++++++++++++++++++++++++++++++++++++++++\n",
"eventTime: 1597099802, eventDescription: 一文读懂全球疫情全球累计确诊超2021万例 巴西90万人在疫情期间成功戒烟, siteName: 人民日报海外网, eventUrl: http://baijiahao.baidu.com/s?id=1674681138323257988, homepageUrl: https://baijiahao.baidu.com/u?app_id=1552864910655429, item_avatar: http://pic.rmb.bdstatic.com/d2f919e031c47d671a5748b5aeafe096.jpeg\n",
"++++++++++++++++++++++++++++++++++++++++++++++++++\n",
"eventTime: 1597098897, eventDescription: 「战疫全时区」美国单日新增确诊病例逾4.1万例 累计超524万例, siteName: 人民日报海外网, eventUrl: http://baijiahao.baidu.com/s?id=1674680199047536719, homepageUrl: https://baijiahao.baidu.com/u?app_id=1552864910655429, item_avatar: http://pic.rmb.bdstatic.com/d2f919e031c47d671a5748b5aeafe096.jpeg\n",
"++++++++++++++++++++++++++++++++++++++++++++++++++\n",
"eventTime: 1597098540, eventDescription: 「战疫全时区」巴西单日新增确诊病例逾2.2万例 累计超305万例, siteName: 人民日报海外网, eventUrl: http://baijiahao.baidu.com/s?id=1674679503309357561, homepageUrl: https://baijiahao.baidu.com/u?app_id=1552864910655429, item_avatar: http://pic.rmb.bdstatic.com/d2f919e031c47d671a5748b5aeafe096.jpeg\n",
"++++++++++++++++++++++++++++++++++++++++++++++++++\n",
"eventTime: 1597044840, eventDescription: 「战疫全时区」俄罗斯新增5118例确诊病例 累计超89万例, siteName: 人民日报海外网, eventUrl: http://baijiahao.baidu.com/s?id=1674623132178961292, homepageUrl: https://baijiahao.baidu.com/u?app_id=1552864910655429, item_avatar: http://pic.rmb.bdstatic.com/d2f919e031c47d671a5748b5aeafe096.jpeg\n",
"++++++++++++++++++++++++++++++++++++++++++++++++++\n",
"eventTime: 1597041465, eventDescription: 返校前夕全美近10万学生两周内感染新冠肺炎, siteName: 人民日报海外网, eventUrl: http://baijiahao.baidu.com/s?id=1674620237736618467, homepageUrl: https://baijiahao.baidu.com/u?app_id=1552864910655429, item_avatar: http://pic.rmb.bdstatic.com/d2f919e031c47d671a5748b5aeafe096.jpeg\n",
"++++++++++++++++++++++++++++++++++++++++++++++++++\n",
"eventTime: 1597013216, eventDescription: 一文读懂全球疫情全球累计确诊超1999万例 美国暴发沙门氏菌疫情, siteName: 人民日报海外网, eventUrl: http://baijiahao.baidu.com/s?id=1674590351892834223, homepageUrl: https://baijiahao.baidu.com/u?app_id=1552864910655429, item_avatar: http://pic.rmb.bdstatic.com/d2f919e031c47d671a5748b5aeafe096.jpeg\n",
"++++++++++++++++++++++++++++++++++++++++++++++++++\n",
"eventTime: 1597012823, eventDescription: 「战疫全时区」美国单日新增确诊病例逾4万例 累计超519万例, siteName: 人民日报海外网, eventUrl: http://baijiahao.baidu.com/s?id=1674589542624222920, homepageUrl: https://baijiahao.baidu.com/u?app_id=1552864910655429, item_avatar: http://pic.rmb.bdstatic.com/d2f919e031c47d671a5748b5aeafe096.jpeg\n",
"++++++++++++++++++++++++++++++++++++++++++++++++++\n",
"eventTime: 1597012711, eventDescription: 「战疫全时区」巴西单日新增确诊病例逾2.3万例 累计超303万例, siteName: 人民日报海外网, eventUrl: http://baijiahao.baidu.com/s?id=1674589406966244222, homepageUrl: https://baijiahao.baidu.com/u?app_id=1552864910655429, item_avatar: http://pic.rmb.bdstatic.com/d2f919e031c47d671a5748b5aeafe096.jpeg\n",
"++++++++++++++++++++++++++++++++++++++++++++++++++\n",
"eventTime: 1596961970, eventDescription: 首批哈萨克斯坦华人回国 隔离14天后可回家 亲历者提前12小时就到机场, siteName: 封面新闻, eventUrl: http://baijiahao.baidu.com/s?id=1674536083513406606, homepageUrl: https://baijiahao.baidu.com/u?app_id=1577667706397024, item_avatar: http://pic.rmb.bdstatic.com/7d15a50cee51b0a96e00a85cfabbb513.jpeg\n",
"++++++++++++++++++++++++++++++++++++++++++++++++++\n",
"eventTime: 1596960850, eventDescription: 日本连续5天新增逾千例 安倍:极力避免再次进入紧急状态, siteName: 人民日报海外网, eventUrl: http://baijiahao.baidu.com/s?id=1674535368426252580, homepageUrl: https://baijiahao.baidu.com/u?app_id=1552864910655429, item_avatar: http://pic.rmb.bdstatic.com/d2f919e031c47d671a5748b5aeafe096.jpeg\n",
"++++++++++++++++++++++++++++++++++++++++++++++++++\n",
"eventTime: 1596960494, eventDescription: 美国新冠确诊病例接近500万例, siteName: 新华社客户端, eventUrl: http://baijiahao.baidu.com/s?id=1674534439138090155, homepageUrl: https://baijiahao.baidu.com/u?app_id=1552507899985619, item_avatar: http://pic.rmb.bdstatic.com/f873c8976c274e5f9bacc77a5c1b3e89.jpeg@c_1,w_385,h_385,x_6,y_11\n",
"++++++++++++++++++++++++++++++++++++++++++++++++++\n",
"eventTime: 1596953765, eventDescription: 印度第3位部长级官员确诊感染新冠肺炎 身兼多份国务要职, siteName: 新京报, eventUrl: http://baijiahao.baidu.com/s?id=1674527459855975038, homepageUrl: https://baijiahao.baidu.com/u?app_id=1566453612428800, item_avatar: http://pic.rmb.bdstatic.com/9da74a517eb1befeba93a5f3167cc74b.jpeg\n",
"++++++++++++++++++++++++++++++++++++++++++++++++++\n",
"eventTime: 1596942761, eventDescription: 印度安德拉邦一新冠肺炎征用酒店起火 已致4人死亡, siteName: 新京报, eventUrl: http://baijiahao.baidu.com/s?id=1674515947367591821, homepageUrl: https://baijiahao.baidu.com/u?app_id=1566453612428800, item_avatar: http://pic.rmb.bdstatic.com/9da74a517eb1befeba93a5f3167cc74b.jpeg\n",
"++++++++++++++++++++++++++++++++++++++++++++++++++\n",
"eventTime: 1596931253, eventDescription: 警惕!日本研究发现变异新冠病毒,已向全国各地扩散, siteName: 北晚新视觉网, eventUrl: http://baijiahao.baidu.com/s?id=1674503882559884129, homepageUrl: https://baijiahao.baidu.com/u?app_id=1549941228125394, item_avatar: http://pic.rmb.bdstatic.com/b9279adf974b78d27201a0b34970c2a9.jpeg\n",
"++++++++++++++++++++++++++++++++++++++++++++++++++\n",
"eventTime: 1596927420, eventDescription: 一文读懂全球疫情全球累计确诊超1977万例 印度连续10天日增病例超5万, siteName: 人民日报海外网, eventUrl: http://baijiahao.baidu.com/s?id=1674500137134331393, homepageUrl: https://baijiahao.baidu.com/u?app_id=1552864910655429, item_avatar: http://pic.rmb.bdstatic.com/d2f919e031c47d671a5748b5aeafe096.jpeg\n",
"++++++++++++++++++++++++++++++++++++++++++++++++++\n",
"eventTime: 1596926700, eventDescription: 「战疫全时区」美国单日新增确诊病例超6.1万例 累计逾514万例, siteName: 人民日报海外网, eventUrl: http://baijiahao.baidu.com/s?id=1674499173102689654, homepageUrl: https://baijiahao.baidu.com/u?app_id=1552864910655429, item_avatar: http://pic.rmb.bdstatic.com/d2f919e031c47d671a5748b5aeafe096.jpeg\n",
"++++++++++++++++++++++++++++++++++++++++++++++++++\n",
"eventTime: 1596926340, eventDescription: 「战疫全时区」巴西累计确诊病例超301万例 死亡逾10万例, siteName: 人民日报海外网, eventUrl: http://baijiahao.baidu.com/s?id=1674498936636405454, homepageUrl: https://baijiahao.baidu.com/u?app_id=1552864910655429, item_avatar: http://pic.rmb.bdstatic.com/d2f919e031c47d671a5748b5aeafe096.jpeg\n",
"++++++++++++++++++++++++++++++++++++++++++++++++++\n",
"eventTime: 1596871924, eventDescription: 「战疫全时区」俄罗斯新增5212例确诊病例 累计确诊超88万例, siteName: 人民日报海外网, eventUrl: http://baijiahao.baidu.com/s?id=1674442809541789098, homepageUrl: https://baijiahao.baidu.com/u?app_id=1552864910655429, item_avatar: http://pic.rmb.bdstatic.com/d2f919e031c47d671a5748b5aeafe096.jpeg\n",
"++++++++++++++++++++++++++++++++++++++++++++++++++\n",
"eventTime: 1596860880, eventDescription: 「战疫全时区」印度日增超6.1万例 累计确诊超208万, siteName: 人民日报海外网, eventUrl: http://baijiahao.baidu.com/s?id=1674430048790588388, homepageUrl: https://baijiahao.baidu.com/u?app_id=1552864910655429, item_avatar: http://pic.rmb.bdstatic.com/d2f919e031c47d671a5748b5aeafe096.jpeg\n",
"++++++++++++++++++++++++++++++++++++++++++++++++++\n",
"eventTime: 1596840442, eventDescription: 一文读懂全球疫情全球累计确诊近1950万例 印度累计确诊超过200万例, siteName: 人民日报海外网, eventUrl: http://baijiahao.baidu.com/s?id=1674408972047141586, homepageUrl: https://baijiahao.baidu.com/u?app_id=1552864910655429, item_avatar: http://pic.rmb.bdstatic.com/d2f919e031c47d671a5748b5aeafe096.jpeg\n",
"++++++++++++++++++++++++++++++++++++++++++++++++++\n",
"eventTime: 1596839886, eventDescription: 「战疫全时区」巴西单日新增确诊病例超4.4万例 累计逾296万例, siteName: 人民日报海外网, eventUrl: http://baijiahao.baidu.com/s?id=1674408470860851393, homepageUrl: https://baijiahao.baidu.com/u?app_id=1552864910655429, item_avatar: http://pic.rmb.bdstatic.com/d2f919e031c47d671a5748b5aeafe096.jpeg\n",
"++++++++++++++++++++++++++++++++++++++++++++++++++\n",
"eventTime: 1596839434, eventDescription: 「战疫全时区」美国单日新增确诊病例超6万例 累计逾508万例, siteName: 人民日报海外网, eventUrl: http://baijiahao.baidu.com/s?id=1674407841000149898, homepageUrl: https://baijiahao.baidu.com/u?app_id=1552864910655429, item_avatar: http://pic.rmb.bdstatic.com/d2f919e031c47d671a5748b5aeafe096.jpeg\n",
"++++++++++++++++++++++++++++++++++++++++++++++++++\n",
"eventTime: 1596817404, eventDescription: 20天印度新冠确诊病例从100万增至200万, siteName: 北京日报客户端, eventUrl: http://baijiahao.baidu.com/s?id=1674384907923543549, homepageUrl: https://baijiahao.baidu.com/u?app_id=1601149438053974, item_avatar: http://pic.rmb.bdstatic.com/814a110b34d52c106f9ba99c5d415478.jpeg\n",
"++++++++++++++++++++++++++++++++++++++++++++++++++\n",
"eventTime: 1596801592, eventDescription: 8月7日全球疫情观察至少22国日增确诊超千例 印度累计确诊逾200万例, siteName: 人民日报海外网, eventUrl: http://baijiahao.baidu.com/s?id=1674368083759315686, homepageUrl: https://baijiahao.baidu.com/u?app_id=1552864910655429, item_avatar: http://pic.rmb.bdstatic.com/d2f919e031c47d671a5748b5aeafe096.jpeg\n",
"++++++++++++++++++++++++++++++++++++++++++++++++++\n",
"eventTime: 1596786000, eventDescription: 「战疫全时区」俄罗斯新增5241例确诊病例 累计确诊超87万例, siteName: 人民日报海外网, eventUrl: http://baijiahao.baidu.com/s?id=1674351790374377698, homepageUrl: https://baijiahao.baidu.com/u?app_id=1552864910655429, item_avatar: http://pic.rmb.bdstatic.com/d2f919e031c47d671a5748b5aeafe096.jpeg\n",
"++++++++++++++++++++++++++++++++++++++++++++++++++\n",
"eventTime: 1596783247, eventDescription: 白俄罗斯总统卢卡申科:有人故意将新冠病毒传染给我, siteName: 新京报, eventUrl: http://baijiahao.baidu.com/s?id=1674348658542718182, homepageUrl: https://baijiahao.baidu.com/u?app_id=1566453612428800, item_avatar: http://pic.rmb.bdstatic.com/9da74a517eb1befeba93a5f3167cc74b.jpeg\n",
"++++++++++++++++++++++++++++++++++++++++++++++++++\n",
"eventTime: 1596769135, eventDescription: 印度成全球第三个确诊破200万的国家21天新增100万, siteName: 新京报, eventUrl: http://baijiahao.baidu.com/s?id=1674333884198078936, homepageUrl: https://baijiahao.baidu.com/u?app_id=1566453612428800, item_avatar: http://pic.rmb.bdstatic.com/9da74a517eb1befeba93a5f3167cc74b.jpeg\n",
"++++++++++++++++++++++++++++++++++++++++++++++++++\n",
"eventTime: 1596760866, eventDescription: 世卫组织:已有六种新冠疫苗进入三期临床试验阶段 其中三种来自中国, siteName: 中国日报网, eventUrl: http://baijiahao.baidu.com/s?id=1674325335422513595, homepageUrl: https://baijiahao.baidu.com/u?app_id=1567805706555546, item_avatar: http://pic.rmb.bdstatic.com/8f27cc4a0abf470446a58a0066b710f5.png\n",
"++++++++++++++++++++++++++++++++++++++++++++++++++\n",
"eventTime: 1596753951, eventDescription: 一文读懂全球疫情全球累计确诊近1920万例 美国一州长为特朗普接机前确诊, siteName: 人民日报海外网, eventUrl: http://baijiahao.baidu.com/s?id=1674318314838065202, homepageUrl: https://baijiahao.baidu.com/u?app_id=1552864910655429, item_avatar: http://pic.rmb.bdstatic.com/d2f919e031c47d671a5748b5aeafe096.jpeg\n",
"++++++++++++++++++++++++++++++++++++++++++++++++++\n",
"eventTime: 1596753846, eventDescription: 「战疫全时区」巴西单日新增确诊病例超5.3万例 累计逾291万例, siteName: 人民资讯, eventUrl: http://baijiahao.baidu.com/s?id=1674317758867433207, homepageUrl: https://baijiahao.baidu.com/u?app_id=1669728810290752, item_avatar: http://pic.rmb.bdstatic.com/bjh/user/867084918f0beae5baa88b1fcbc34f1f.jpeg\n",
"++++++++++++++++++++++++++++++++++++++++++++++++++\n",
"eventTime: 1596753788, eventDescription: 「战疫全时区」美国单日新增确诊病例超5.9万例 累计逾502万例, siteName: 人民日报海外网, eventUrl: http://baijiahao.baidu.com/s?id=1674318066095349625, homepageUrl: https://baijiahao.baidu.com/u?app_id=1552864910655429, item_avatar: http://pic.rmb.bdstatic.com/d2f919e031c47d671a5748b5aeafe096.jpeg\n",
"++++++++++++++++++++++++++++++++++++++++++++++++++\n",
"eventTime: 1596723779, eventDescription: 今日疫情汇总:黎巴嫩爆炸恐加剧疫情传播,朝鲜首例疑似检测“没结果”, siteName: 纵相新闻, eventUrl: http://baijiahao.baidu.com/s?id=1674286560061876250, homepageUrl: https://baijiahao.baidu.com/u?app_id=1607773795161133, item_avatar: http://pic.rmb.bdstatic.com/4221a08a04ad0d69f15b15252297b8e2.jpeg\n",
"++++++++++++++++++++++++++++++++++++++++++++++++++\n",
"eventTime: 1596708377, eventDescription: 全球新冠肺炎确诊病例超1900万例 死亡病例逾71.1万例, siteName: 人民日报海外网, eventUrl: http://baijiahao.baidu.com/s?id=1674270432508007457, homepageUrl: https://baijiahao.baidu.com/u?app_id=1552864910655429, item_avatar: http://pic.rmb.bdstatic.com/d2f919e031c47d671a5748b5aeafe096.jpeg\n",
"++++++++++++++++++++++++++++++++++++++++++++++++++\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"E:\\anaconda3\\lib\\site-packages\\ipykernel_launcher.py:20: DeprecationWarning: invalid escape sequence '\\/'\n"
]
}
],
"source": [
"import requests\n",
"from bs4 import BeautifulSoup\n",
"import re \n",
"import json \n",
"# 定义实时国外新闻类\n",
"class outsideNews: \n",
" def __init__(self): \n",
" insidenews.eventTime = ''\n",
" self.eventDescription = '' \n",
" self.siteName = '' \n",
" self.eventUrl = '' \n",
" self.homepageUrl = ''\n",
" self.item_avatar = ''\n",
" \n",
" def __str__(self): \n",
" return 'eventTime: %s, eventDescription: %s, siteName: %s, eventUrl: %s, homepageUrl: %s, item_avatar: %s' % (self.eventTime, self.eventDescription, self.siteName, self.eventUrl, self.homepageUrl, self.item_avatar)\n",
"\n",
" \n",
"response = requests.get('https://opendata.baidu.com/data/inner?tn=reserved_all_res_tn&dspName=iphone&from_sf=1&dsp=iphone&resource_id=28565&alr=1&query=%E6%96%B0%E5%86%A0%E8%82%BA%E7%82%8E%E5%9B%BD%E5%A4%96%E7%96%AB%E6%83%85&cb=jsonp_1597232049116_92879&qq-pf-to=pcqq.group') #发送get请求获得目标服务器相应 \n",
"html = response.content.decode('unicode-escape')# 解码 \n",
"soup = BeautifulSoup(html)# 构建soup对象 \n",
"tag = soup.find('p')# 查找指定标签\n",
"tagstr = tag.string# 转换成字符串\n",
"results = re.findall(r'(\\{\"bjh_na\".*?\"eventDescription\":.*?\\})', tagstr)# 使用正则表达式查找所有的实时新闻\n",
"\n",
"all_outsidenews = [] \n",
"for item in results:\n",
" outsidenews = outsideNews() \n",
" itemjson = json.loads(item) \n",
" outsidenews.eventTime = itemjson['eventTime']\n",
" outsidenews.eventDescription = itemjson['eventDescription'] \n",
" outsidenews.siteName = itemjson['siteName'] \n",
" outsidenews.eventUrl = itemjson['eventUrl'] \n",
" outsidenews.homepageUrl = itemjson['homepageUrl'] \n",
" outsidenews.item_avatar = itemjson['item_avatar'] \n",
" \n",
" all_outsidenews.append(outsidenews) \n",
" \n",
"for outsidenews in all_outsidenews: \n",
" print(outsidenews) \n",
" print('+++++' * 10)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# 将获取到的国内外实时新闻数据导入数据库中"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
" ## 数据库实体类"
]
},
{
"cell_type": "code",
"execution_count": 45,
"metadata": {},
"outputs": [],
"source": [
"import pymysql\n",
"\n",
"class MyDB:\n",
" def __init__(self, host, user, passwd, db):\n",
" self.conn = pymysql.connect(host, user, passwd, db)\n",
" self.cursor = self.conn.cursor()\n",
" \n",
" def get_insideNews_list_tuple(self, all_insideNews):\n",
" info_tuple = []\n",
" for item in all_insideNews:\n",
" info_tuple.append(item.get_info_tuple())\n",
" return info_tuple\n",
" \n",
" # 保存国内新闻数据\n",
" def save_insideNews_datas(self, all_insideNews):\n",
" sql = 'insert into insideNews_daily_datas(eventTime, eventDescription, siteName, eventUrl, homepageUrl, item_avatar) values(%s,%s,%s,%s,%s,%s)'\n",
" res = self.get_insideNews_list_tuple(all_insideNews)\n",
" \n",
" print('+++ save_insideNews_datas, data len: %d' % len(res))\n",
" try:\n",
" self.cursor.executemany(sql, res)#批处理插入函数res为元组类型其中还是元组类型\n",
" self.conn.commit()\n",
" except Exception as e:\n",
" print(e)\n",
" print('+++ save_insideNews_datas is over.')\n",
" \n",
" \n",
" \n",
" def get_outsideNews_list_tuple(self, all_outsideNews):\n",
" info_tuple = []\n",
" for item in all_outsideNews:\n",
" info_tuple.append(item.get_info_tuple())\n",
" return info_tuple\n",
" \n",
" #保存国外新闻数据\n",
" def save_outsideNews_datas(self, all_outsideNews):\n",
" sql = 'insert into outsideNews_daily_datas(eventTime, eventDescription, siteName, eventUrl, homepageUrl, item_avatar) values(%s,%s,%s,%s,%s,%s)'\n",
" res = self.get_outsideNews_list_tuple(all_outsideNews)\n",
" \n",
" print('+++ save_outsideNews_datas, data len: %d' % len(res))\n",
" try:\n",
" self.cursor.executemany(sql, res)#批处理插入函数res为元组类型其中还是元组类型\n",
" self.conn.commit()\n",
" except Exception as e:\n",
" print(e)\n",
" print('+++ save_outsideNews_datas is over.')\n",
" \n",
" def __del__(self):\n",
" if self.conn is not None:\n",
" self.conn.close()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 国内新闻"
]
},
{
"cell_type": "code",
"execution_count": 43,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"E:\\anaconda3\\lib\\site-packages\\ipykernel_launcher.py:33: DeprecationWarning: invalid escape sequence '\\/'\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"+++ save_insideNews_datas, data len: 37\n",
"+++ save_insideNews_datas is over.\n"
]
}
],
"source": [
"import requests\n",
"import re\n",
"from bs4 import BeautifulSoup\n",
"import json\n",
" \n",
"\n",
"# 定义实时国内新闻类\n",
"class InsideNews: \n",
" def __init__(self): \n",
" self.nid = ''\n",
" self.thread_id = ''\n",
" self.eventTime = '' \n",
" self.eventDescription = '' \n",
" self.siteName = '' \n",
" self.eventUrl = '' \n",
" self.homepageUrl = ''\n",
" self.item_avatar = ''\n",
" \n",
" def __str__(self): \n",
" return 'eventTime: %s, eventDescription: %s, siteName: %s, eventUrl: %s, homepageUrl: %s, item_avatar: %s' % (self.eventTime, self.eventDescription, self.siteName, self.eventUrl, self.homepageUrl, self.item_avatar)\n",
"\n",
" def get_info_tuple(self):\n",
" return ((self.eventTime, self.eventDescription, self.siteName, self.eventUrl, self.homepageUrl, self.item_avatar))\n",
" \n",
"class DataService:\n",
" def __init__(self):\n",
" self.url = 'https://opendata.baidu.com/data/inner?tn=reserved_all_res_tn&dspName=iphone&from_sf=1&dsp=iphone&resource_id=28565&alr=1&query=%E8%82%BA%E7%82%8E&cb=jsonp_1597231600668_30527&qq-pf-to=pcqq.group'\n",
" self.db = MyDB('localhost', 'root', '2000721zsf', 'covid19_datas_guangxi')\n",
" \n",
" #抓取网页\n",
" def fetch_html_page(self):\n",
" res = requests.get(self.url)#发送get请求获得目标服务器响应\n",
" res = res.content.decode('unicode-escape')#解码\n",
" return res\n",
" \n",
" #解析网页\n",
" def parse_target_page(self, html):\n",
" soup = BeautifulSoup(html)#构建soup对象\n",
" tag = soup.find('p')#查找指定标签\n",
" tagStr = tag.string#转换为字符串\n",
" \n",
" #使用正则表达式查找所有内容结果返回为list类型(即获取国内实时新闻数据)\n",
" self.insideNews = re.findall(r'(\\{\"bjh_na\".*?\"eventDescription\":.*?\\})',tagStr)\n",
" \n",
" def fetch_insideNews_datas(self):\n",
" all_insideNews = []\n",
" for item in self.insideNews:\n",
" insideNews = InsideNews() \n",
" itemjson = json.loads(item) \n",
" insideNews.eventTime = itemjson['eventTime']\n",
" insideNews.eventDescription = itemjson['eventDescription'] \n",
" insideNews.siteName = itemjson['siteName'] \n",
" insideNews.eventUrl = itemjson['eventUrl'] \n",
" insideNews.homepageUrl = itemjson['homepageUrl'] \n",
" insideNews.item_avatar = itemjson['item_avatar'] \n",
" \n",
" all_insideNews.append(insideNews)\n",
" return all_insideNews\n",
" \n",
" #提取内容生成对象\n",
" def fetch_page_datas(self):\n",
" all_insideNews = self.fetch_insideNews_datas()\n",
" \n",
" # for item in all_insideNews:\n",
" # print(item)\n",
" # print(\"+++++\"*10)\n",
" \n",
" return all_insideNews\n",
" \n",
" #业务函数\n",
" def process_data(self):\n",
" html = self.fetch_html_page()\n",
" self.parse_target_page(html)\n",
" all_insideNews = self.fetch_page_datas()\n",
" \n",
" #保存国内实时播报新闻数据\n",
" self.db.save_insideNews_datas(all_insideNews)\n",
" \n",
"#创建DataService对象\n",
"ds = DataService()\n",
"ds.process_data()\n",
"\n",
"#print(len(ds.insideNews))\n",
"#print(type(ds.insideNews[0]))\n",
"#print(ds.insideNews[0])\n",
"#print()\n",
"#print(ds.outsideNews)\n",
"#print()\n",
"\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
" ## 国外新闻"
]
},
{
"cell_type": "code",
"execution_count": 46,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"E:\\anaconda3\\lib\\site-packages\\ipykernel_launcher.py:33: DeprecationWarning: invalid escape sequence '\\/'\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"+++ save_outsideNews_datas, data len: 40\n",
"+++ save_outsideNews_datas is over.\n"
]
}
],
"source": [
"import requests\n",
"import re\n",
"from bs4 import BeautifulSoup\n",
"import json\n",
" \n",
"\n",
"# 定义实时国内新闻类\n",
"class OutsideNews: \n",
" def __init__(self): \n",
" self.nid = ''\n",
" self.thread_id = ''\n",
" self.eventTime = '' \n",
" self.eventDescription = '' \n",
" self.siteName = '' \n",
" self.eventUrl = '' \n",
" self.homepageUrl = ''\n",
" self.item_avatar = ''\n",
" \n",
" def __str__(self): \n",
" return 'eventTime: %s, eventDescription: %s, siteName: %s, eventUrl: %s, homepageUrl: %s, item_avatar: %s' % (self.eventTime, self.eventDescription, self.siteName, self.eventUrl, self.homepageUrl, self.item_avatar)\n",
"\n",
" def get_info_tuple(self):\n",
" return ((self.eventTime, self.eventDescription, self.siteName, self.eventUrl, self.homepageUrl, self.item_avatar))\n",
" \n",
"class DataService:\n",
" def __init__(self):\n",
" self.url = 'https://opendata.baidu.com/data/inner?tn=reserved_all_res_tn&dspName=iphone&from_sf=1&dsp=iphone&resource_id=28565&alr=1&query=%E6%96%B0%E5%86%A0%E8%82%BA%E7%82%8E%E5%9B%BD%E5%A4%96%E7%96%AB%E6%83%85&cb=jsonp_1597232049116_92879&qq-pf-to=pcqq.group'\n",
" self.db = MyDB('localhost', 'root', '2000721zsf', 'covid19_datas_guangxi')\n",
" \n",
" #抓取网页\n",
" def fetch_html_page(self):\n",
" res = requests.get(self.url)#发送get请求获得目标服务器响应\n",
" res = res.content.decode('unicode-escape')#解码\n",
" return res\n",
" \n",
" #解析网页\n",
" def parse_target_page(self, html):\n",
" soup = BeautifulSoup(html)#构建soup对象\n",
" tag = soup.find('p')#查找指定标签\n",
" tagStr = tag.string#转换为字符串\n",
" \n",
" #使用正则表达式查找所有内容结果返回为list类型(即获取国内实时新闻数据)\n",
" self.outsideNews = re.findall(r'(\\{\"bjh_na\".*?\"eventDescription\":.*?\\})',tagStr)\n",
" \n",
" def fetch_outsideNews_datas(self):\n",
" all_outsideNews = []\n",
" for item in self.outsideNews:\n",
" outsideNews = OutsideNews() \n",
" itemjson = json.loads(item) \n",
" outsideNews.eventTime = itemjson['eventTime']\n",
" outsideNews.eventDescription = itemjson['eventDescription'] \n",
" outsideNews.siteName = itemjson['siteName'] \n",
" outsideNews.eventUrl = itemjson['eventUrl'] \n",
" outsideNews.homepageUrl = itemjson['homepageUrl'] \n",
" outsideNews.item_avatar = itemjson['item_avatar'] \n",
" \n",
" all_outsideNews.append(outsideNews)\n",
" return all_outsideNews\n",
" \n",
" #提取内容生成对象\n",
" def fetch_page_datas(self):\n",
" all_outsideNews = self.fetch_outsideNews_datas()\n",
" \n",
" # for item in all_insideNews:\n",
" # print(item)\n",
" # print(\"+++++\"*10)\n",
" \n",
" return all_outsideNews\n",
" \n",
" #业务函数\n",
" def process_data(self):\n",
" html = self.fetch_html_page()\n",
" self.parse_target_page(html)\n",
" all_outsideNews = self.fetch_page_datas()\n",
" \n",
" #保存国内实时播报新闻数据\n",
" self.db.save_outsideNews_datas(all_outsideNews)\n",
" \n",
"#创建DataService对象\n",
"ds = DataService()\n",
"ds.process_data()\n",
"\n",
"#print(len(ds.insideNews))\n",
"#print(type(ds.insideNews[0]))\n",
"#print(ds.insideNews[0])\n",
"#print()\n",
"#print(ds.outsideNews)\n",
"#print()\n",
"\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 4
}