You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
ppre8onyw/提取实时新闻数据.ipynb

670 lines
70 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# 获取国内疫情实时滚动新闻播报数据"
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{\"bjh_na\":{\"easyBrowse\":\"1\",\"easyBrowseConfirm\":\"1\",\"nid\":\"10193162063076338121\",\"thread_id\":\"1125000033495772\",\"type\":\"news\"},\"eventDescription\":\"北京连续6日零新增\",\"eventTime\":\"1597278720\",\"eventUrl\":\"http:\\/\\/baijiahao.baidu.com\\/s?id=1674868131526596687\",\"homepageUrl\":\"https:\\/\\/baijiahao.baidu.com\\/u?app_id=1601149438053974\",\"item_avatar\":\"http:\\/\\/pic.rmb.bdstatic.com\\/814a110b34d52c106f9ba99c5d415478.jpeg\",\"siteName\":\"北京日报客户端\"}\n",
"{\"bjh_na\":{\"easyBrowse\":\"1\",\"easyBrowseConfirm\":\"1\",\"nid\":\"9729390377660864175\",\"thread_id\":\"1062000033493672\",\"type\":\"news\"},\"eventDescription\":\"国家卫健委昨日新增确诊病例19例 本土病例8例\",\"eventTime\":\"1597278262\",\"eventUrl\":\"http:\\/\\/baijiahao.baidu.com\\/s?id=1674867651765148223\",\"homepageUrl\":\"https:\\/\\/baijiahao.baidu.com\\/u?app_id=1551968238585112\",\"item_avatar\":\"http:\\/\\/pic.rmb.bdstatic.com\\/07feda26036edb976980ce1e803de532.jpeg@c_1,w_640,h_640,x_0,y_0\",\"siteName\":\"环球时报\"}\n",
"{\"bjh_na\":{\"easyBrowse\":\"1\",\"easyBrowseConfirm\":\"1\",\"nid\":\"9766033389288531073\",\"thread_id\":\"1035000033486933\",\"type\":\"news\"},\"eventDescription\":\"新疆含兵团新增新冠肺炎确诊病例8例 均在乌鲁木齐市\",\"eventTime\":\"1597277648\",\"eventUrl\":\"http:\\/\\/baijiahao.baidu.com\\/s?id=1674867007109286633\",\"homepageUrl\":\"https:\\/\\/baijiahao.baidu.com\\/u?app_id=1549608413453462\",\"item_avatar\":\"http:\\/\\/pic.rmb.bdstatic.com\\/89e9e167d806065171363ecbcb708a41.png@c_1,w_812,h_812,x_63,y_39\",\"siteName\":\"环球网\"}\n",
"{\"bjh_na\":{\"easyBrowse\":\"1\",\"easyBrowseConfirm\":\"1\",\"nid\":\"9382499071856256847\",\"thread_id\":\"1027000033493454\",\"type\":\"news\"},\"eventDescription\":\"上海昨日无新增本地新冠肺炎确诊病例新增境外输入2例\",\"eventTime\":\"1597276086\",\"eventUrl\":\"http:\\/\\/baijiahao.baidu.com\\/s?id=1674865718710262411\",\"homepageUrl\":\"https:\\/\\/baijiahao.baidu.com\\/u?app_id=1549608413453462\",\"item_avatar\":\"http:\\/\\/pic.rmb.bdstatic.com\\/89e9e167d806065171363ecbcb708a41.png@c_1,w_812,h_812,x_63,y_39\",\"siteName\":\"环球网\"}\n",
"{\"bjh_na\":{\"easyBrowse\":\"1\",\"easyBrowseConfirm\":\"1\",\"nid\":\"9691932825381303415\",\"thread_id\":\"1001000033461176\",\"type\":\"news\"},\"eventDescription\":\"一境外输入确诊患者治愈数月后复阳 目前在上海隔离诊治\",\"eventTime\":\"1597235700\",\"eventUrl\":\"http:\\/\\/baijiahao.baidu.com\\/s?id=1674823034158688355\",\"homepageUrl\":\"https:\\/\\/baijiahao.baidu.com\\/u?app_id=1566453612428800\",\"item_avatar\":\"http:\\/\\/pic.rmb.bdstatic.com\\/9da74a517eb1befeba93a5f3167cc74b.jpeg\",\"siteName\":\"新京报\"}\n",
"{\"bjh_na\":{\"easyBrowse\":\"1\",\"easyBrowseConfirm\":\"1\",\"nid\":\"9964760177505552343\",\"thread_id\":\"1011000033456808\",\"type\":\"news\"},\"eventDescription\":\"香港新增62例确诊病例 连续10天少于100例\",\"eventTime\":\"1597221655\",\"eventUrl\":\"http:\\/\\/baijiahao.baidu.com\\/s?id=1674808294135893351\",\"homepageUrl\":\"https:\\/\\/baijiahao.baidu.com\\/u?app_id=1552864910655429\",\"item_avatar\":\"http:\\/\\/pic.rmb.bdstatic.com\\/d2f919e031c47d671a5748b5aeafe096.jpeg\",\"siteName\":\"人民日报海外网\"}\n",
"{\"bjh_na\":{\"easyBrowse\":\"1\",\"easyBrowseConfirm\":\"1\",\"nid\":\"9288792493272135235\",\"thread_id\":\"1059000033444318\",\"type\":\"news\"},\"eventDescription\":\"安徽芜湖一餐饮店进口冻虾新冠病毒检出疑似阳性\",\"eventTime\":\"1597217665\",\"eventUrl\":\"http:\\/\\/baijiahao.baidu.com\\/s?id=1674804105747106594\",\"homepageUrl\":\"http:\\/\\/baijiahao.baidu.com\\/u?app_id=1570168240515616\",\"item_avatar\":\"http:\\/\\/pic.rmb.bdstatic.com\\/0026e9a191787761348be081d428a354.jpeg\",\"siteName\":\"央视新闻\"}\n",
"{\"bjh_na\":{\"easyBrowse\":\"1\",\"easyBrowseConfirm\":\"1\",\"nid\":\"10446745964617569625\",\"thread_id\":\"1051000033431472\",\"type\":\"news\"},\"eventDescription\":\"国家卫健委昨日新增确诊25例其中本土病例9例均在新疆\",\"eventTime\":\"1597193291\",\"eventUrl\":\"http:\\/\\/baijiahao.baidu.com\\/s?id=1674772359564935639\",\"homepageUrl\":\"https:\\/\\/baijiahao.baidu.com\\/u?app_id=1561825967470087\",\"item_avatar\":\"http:\\/\\/pic.rmb.bdstatic.com\\/bjh\\/user\\/b5b87556f7efcdf3c67fb0fd355dcaac.jpeg\",\"siteName\":\"红星新闻\"}\n",
"{\"bjh_na\":{\"easyBrowse\":\"1\",\"easyBrowseConfirm\":\"1\",\"nid\":\"10156202482208840230\",\"thread_id\":\"1113000033424506\",\"type\":\"news\"},\"eventDescription\":\"北京连续5天无新增确诊病例\",\"eventTime\":\"1597192410\",\"eventUrl\":\"http:\\/\\/baijiahao.baidu.com\\/s?id=1674777629341250609\",\"homepageUrl\":\"http:\\/\\/baijiahao.baidu.com\\/u?app_id=1601149438053974\",\"item_avatar\":\"http:\\/\\/pic.rmb.bdstatic.com\\/73c41abfdd401cc72e04d64b3ce0103d.jpeg\",\"siteName\":\"北京日报客户端\"}\n",
"{\"bjh_na\":{\"easyBrowse\":\"1\",\"easyBrowseConfirm\":\"1\",\"nid\":\"10445989328771260362\",\"thread_id\":\"1038000033429039\",\"type\":\"news\"},\"eventDescription\":\"广东新增境外输入确诊病例6例广州报告1例、珠海5例\",\"eventTime\":\"1597192142\",\"eventUrl\":\"http:\\/\\/baijiahao.baidu.com\\/s?id=1674777366221789944\",\"homepageUrl\":\"https:\\/\\/baijiahao.baidu.com\\/u?app_id=1561825967470087\",\"item_avatar\":\"http:\\/\\/pic.rmb.bdstatic.com\\/bjh\\/user\\/b5b87556f7efcdf3c67fb0fd355dcaac.jpeg\",\"siteName\":\"红星新闻\"}\n",
"{\"bjh_na\":{\"easyBrowse\":\"1\",\"easyBrowseConfirm\":\"1\",\"nid\":\"10225492811636488031\",\"thread_id\":\"1005000033424867\",\"type\":\"news\"},\"eventDescription\":\"新疆报告新增新冠肺炎确诊病例9例新增无症状感染者8例\",\"eventTime\":\"1597191705\",\"eventUrl\":\"http:\\/\\/baijiahao.baidu.com\\/s?id=1674776890002663976\",\"homepageUrl\":\"https:\\/\\/baijiahao.baidu.com\\/u?app_id=1549608413453462\",\"item_avatar\":\"http:\\/\\/pic.rmb.bdstatic.com\\/89e9e167d806065171363ecbcb708a41.png@c_1,w_812,h_812,x_63,y_39\",\"siteName\":\"环球网\"}\n",
"{\"bjh_na\":{\"easyBrowse\":\"1\",\"easyBrowseConfirm\":\"1\",\"nid\":\"10687708877099983703\",\"thread_id\":\"1122000033425687\",\"type\":\"news\"},\"eventDescription\":\"上海新增境外输入4例已追踪同航班密接者106人\",\"eventTime\":\"1597190188\",\"eventUrl\":\"http:\\/\\/baijiahao.baidu.com\\/s?id=1674775298876647449\",\"homepageUrl\":\"https:\\/\\/baijiahao.baidu.com\\/u?app_id=1601149438053974\",\"item_avatar\":\"http:\\/\\/pic.rmb.bdstatic.com\\/814a110b34d52c106f9ba99c5d415478.jpeg\",\"siteName\":\"北京日报客户端\"}\n",
"{\"bjh_na\":{\"easyBrowse\":\"1\",\"easyBrowseConfirm\":\"1\",\"nid\":\"9974943368943181587\",\"thread_id\":\"1088000033375829\",\"type\":\"news\"},\"eventDescription\":\"新疆昨日新增确诊13例新增无症状感染者11例均在乌鲁木齐市\",\"eventTime\":\"1597105476\",\"eventUrl\":\"http:\\/\\/baijiahao.baidu.com\\/s?id=1674686471966488007\",\"homepageUrl\":\"https:\\/\\/baijiahao.baidu.com\\/u?app_id=1561825967470087\",\"item_avatar\":\"http:\\/\\/pic.rmb.bdstatic.com\\/bjh\\/user\\/b5b87556f7efcdf3c67fb0fd355dcaac.jpeg\",\"siteName\":\"红星新闻\"}\n",
"{\"bjh_na\":{\"easyBrowse\":\"1\",\"easyBrowseConfirm\":\"1\",\"nid\":\"9619452370238472138\",\"thread_id\":\"1007000033365024\",\"type\":\"news\"},\"eventDescription\":\"昨天上海新增8例境外输入病例均为中国籍在阿联酋工作\",\"eventTime\":\"1597103162\",\"eventUrl\":\"http:\\/\\/baijiahao.baidu.com\\/s?id=1674684045559410432\",\"homepageUrl\":\"https:\\/\\/baijiahao.baidu.com\\/u?app_id=1561825967470087\",\"item_avatar\":\"http:\\/\\/pic.rmb.bdstatic.com\\/bjh\\/user\\/b5b87556f7efcdf3c67fb0fd355dcaac.jpeg\",\"siteName\":\"红星新闻\"}\n",
"{\"bjh_na\":{\"easyBrowse\":\"1\",\"easyBrowseConfirm\":\"1\",\"nid\":\"9507350360885120833\",\"thread_id\":\"1063000033345790\",\"type\":\"news\"},\"eventDescription\":\"辽宁省新增2例境外输入确诊病例分别来自美国和菲律宾\",\"eventTime\":\"1597057109\",\"eventUrl\":\"http:\\/\\/baijiahao.baidu.com\\/s?id=1674636151291490440\",\"homepageUrl\":\"http:\\/\\/baijiahao.baidu.com\\/u?app_id=1549608413453462\",\"item_avatar\":\"http:\\/\\/timg01.bdimg.com\\/timg?pacompress=&imgtype=0&sec=1439619614&di=3fb711aa6bf5a0beda833cb57989fa42&quality=90&size=b870_10000&src=http%3A%2F%2Fpic.rmb.bdstatic.com%2F2f338c67bc166c145cd457c01e102de5.jpeg\",\"siteName\":\"环球网\"}\n",
"{\"bjh_na\":{\"easyBrowse\":\"1\",\"easyBrowseConfirm\":\"1\",\"nid\":\"10041585112433645986\",\"thread_id\":\"1009000033336073\",\"type\":\"news\"},\"eventDescription\":\"香港新增69例确诊病例 连续8天少于100例\",\"eventTime\":\"1597047600\",\"eventUrl\":\"http:\\/\\/baijiahao.baidu.com\\/s?id=1674626913670664857\",\"homepageUrl\":\"https:\\/\\/baijiahao.baidu.com\\/u?app_id=1552864910655429\",\"item_avatar\":\"http:\\/\\/pic.rmb.bdstatic.com\\/d2f919e031c47d671a5748b5aeafe096.jpeg\",\"siteName\":\"人民日报海外网\"}\n",
"{\"bjh_na\":{\"easyBrowse\":\"1\",\"easyBrowseConfirm\":\"1\",\"nid\":\"9094183316011565694\",\"thread_id\":\"1067000033325987\",\"type\":\"news\"},\"eventDescription\":\"韩国境外输入性新冠病例中检出3例病毒变异\",\"eventTime\":\"1597045994\",\"eventUrl\":\"http:\\/\\/baijiahao.baidu.com\\/s?id=1674624304511109399\",\"homepageUrl\":\"https:\\/\\/baijiahao.baidu.com\\/u?app_id=1565176801499628\",\"item_avatar\":\"http:\\/\\/pic.rmb.bdstatic.com\\/8963c44fe9bb2de078a778f71a32f6eb.jpeg\",\"siteName\":\"界面新闻\"}\n",
"{\"bjh_na\":{\"easyBrowse\":\"1\",\"easyBrowseConfirm\":\"1\",\"nid\":\"10678170579397374006\",\"thread_id\":\"1091000033323462\",\"type\":\"news\"},\"eventDescription\":\"新疆新增新冠确诊病例14例无症状感染者7例均在乌鲁木齐市\",\"eventTime\":\"1597019819\",\"eventUrl\":\"http:\\/\\/baijiahao.baidu.com\\/s?id=1674596654587658867\",\"homepageUrl\":\"https:\\/\\/baijiahao.baidu.com\\/u?app_id=1561825967470087\",\"item_avatar\":\"http:\\/\\/pic.rmb.bdstatic.com\\/bjh\\/user\\/b5b87556f7efcdf3c67fb0fd355dcaac.jpeg\",\"siteName\":\"红星新闻\"}\n",
"{\"bjh_na\":{\"easyBrowse\":\"1\",\"easyBrowseConfirm\":\"1\",\"nid\":\"10594182283454069402\",\"thread_id\":\"1117000033305820\",\"type\":\"news\"},\"eventDescription\":\"北京连续三日零新增!\",\"eventTime\":\"1597019546\",\"eventUrl\":\"http:\\/\\/baijiahao.baidu.com\\/s?id=1674596367795749792\",\"homepageUrl\":\"https:\\/\\/baijiahao.baidu.com\\/u?app_id=1601149438053974\",\"item_avatar\":\"http:\\/\\/pic.rmb.bdstatic.com\\/814a110b34d52c106f9ba99c5d415478.jpeg\",\"siteName\":\"北京日报客户端\"}\n",
"{\"bjh_na\":{\"easyBrowse\":\"1\",\"easyBrowseConfirm\":\"1\",\"nid\":\"9838778629077913962\",\"thread_id\":\"1022000033302920\",\"type\":\"news\"},\"eventDescription\":\"广东新增境外输入3例入境后即被隔离观察\",\"eventTime\":\"1597018528\",\"eventUrl\":\"http:\\/\\/baijiahao.baidu.com\\/s?id=1674595300038757825\",\"homepageUrl\":\"https:\\/\\/baijiahao.baidu.com\\/u?app_id=1601149438053974\",\"item_avatar\":\"http:\\/\\/pic.rmb.bdstatic.com\\/814a110b34d52c106f9ba99c5d415478.jpeg\",\"siteName\":\"北京日报客户端\"}\n",
"{\"bjh_na\":{\"easyBrowse\":\"1\",\"easyBrowseConfirm\":\"1\",\"nid\":\"9564560403354013811\",\"thread_id\":\"1018000033303382\",\"type\":\"news\"},\"eventDescription\":\"上海昨日无新增本地新冠肺炎确诊病例新增境外输入18例\",\"eventTime\":\"1597016945\",\"eventUrl\":\"http:\\/\\/baijiahao.baidu.com\\/s?id=1674593640123663447\",\"homepageUrl\":\"https:\\/\\/baijiahao.baidu.com\\/u?app_id=1549608413453462\",\"item_avatar\":\"http:\\/\\/pic.rmb.bdstatic.com\\/89e9e167d806065171363ecbcb708a41.png@c_1,w_812,h_812,x_63,y_39\",\"siteName\":\"环球网\"}\n",
"{\"bjh_na\":{\"easyBrowse\":\"1\",\"easyBrowseConfirm\":\"1\",\"nid\":\"10189864705033416741\",\"thread_id\":\"1051000033311081\",\"type\":\"news\"},\"eventDescription\":\"重庆昨日新增无症状感染者1例为新加坡输入\",\"eventTime\":\"1597015727\",\"eventUrl\":\"http:\\/\\/baijiahao.baidu.com\\/s?id=1674592612999522909\",\"homepageUrl\":\"https:\\/\\/baijiahao.baidu.com\\/u?app_id=1549608413453462\",\"item_avatar\":\"http:\\/\\/pic.rmb.bdstatic.com\\/89e9e167d806065171363ecbcb708a41.png@c_1,w_812,h_812,x_63,y_39\",\"siteName\":\"环球网\"}\n",
"{\"bjh_na\":{\"easyBrowse\":\"1\",\"easyBrowseConfirm\":\"1\",\"nid\":\"9484762286110882343\",\"thread_id\":\"1031000033318117\",\"type\":\"news\"},\"eventDescription\":\"一文读懂全球疫情全球累计确诊超1999万例 美国暴发沙门氏菌疫情\",\"eventTime\":\"1597013216\",\"eventUrl\":\"http:\\/\\/baijiahao.baidu.com\\/s?id=1674590351892834223\",\"homepageUrl\":\"https:\\/\\/baijiahao.baidu.com\\/u?app_id=1552864910655429\",\"item_avatar\":\"http:\\/\\/pic.rmb.bdstatic.com\\/d2f919e031c47d671a5748b5aeafe096.jpeg\",\"siteName\":\"人民日报海外网\"}\n",
"{\"bjh_na\":{\"easyBrowse\":\"1\",\"easyBrowseConfirm\":\"1\",\"nid\":\"10031387837680691121\",\"thread_id\":\"1047000033289493\",\"type\":\"news\"},\"eventDescription\":\"香港新增72例新冠确诊病例连续7日少于100例\",\"eventTime\":\"1596962023\",\"eventUrl\":\"http:\\/\\/baijiahao.baidu.com\\/s?id=1674536151578789920\",\"homepageUrl\":\"https:\\/\\/baijiahao.baidu.com\\/u?app_id=1565176801499628\",\"item_avatar\":\"http:\\/\\/pic.rmb.bdstatic.com\\/8963c44fe9bb2de078a778f71a32f6eb.jpeg\",\"siteName\":\"界面新闻\"}\n",
"{\"bjh_na\":{\"easyBrowse\":\"1\",\"easyBrowseConfirm\":\"1\",\"nid\":\"8870194084231278988\",\"thread_id\":\"1032000033266987\",\"type\":\"news\"},\"eventDescription\":\"新疆新增确诊病例15例均在乌鲁木齐\",\"eventTime\":\"1596933298\",\"eventUrl\":\"http:\\/\\/baijiahao.baidu.com\\/s?id=1674505943705180172\",\"homepageUrl\":\"https:\\/\\/baijiahao.baidu.com\\/u?app_id=1566453612428800\",\"item_avatar\":\"http:\\/\\/pic.rmb.bdstatic.com\\/9da74a517eb1befeba93a5f3167cc74b.jpeg\",\"siteName\":\"新京报\"}\n",
"{\"bjh_na\":{\"easyBrowse\":\"1\",\"easyBrowseConfirm\":\"1\",\"nid\":\"9364628799509405044\",\"thread_id\":\"1122000033252294\",\"type\":\"news\"},\"eventDescription\":\"31省区市新增确诊病例23例其中本土病例15例均在新疆\",\"eventTime\":\"1596930618\",\"eventUrl\":\"http:\\/\\/baijiahao.baidu.com\\/s?id=1674503120087258676\",\"homepageUrl\":\"https:\\/\\/baijiahao.baidu.com\\/u?app_id=1589908044211516\",\"item_avatar\":\"http:\\/\\/pic.rmb.bdstatic.com\\/ec0438088a2c4589fb541fbc12b448f0.jpeg\",\"siteName\":\"健康中国\"}\n",
"{\"bjh_na\":{\"easyBrowse\":\"1\",\"easyBrowseConfirm\":\"1\",\"nid\":\"9965173851761410585\",\"thread_id\":\"1002000033232184\",\"type\":\"news\"},\"eventDescription\":\"香港新增69例新冠确诊病例累计超过4000例\",\"eventTime\":\"1596875762\",\"eventUrl\":\"http:\\/\\/baijiahao.baidu.com\\/s?id=1674445865981756049\",\"homepageUrl\":\"https:\\/\\/baijiahao.baidu.com\\/u?app_id=1565176801499628\",\"item_avatar\":\"http:\\/\\/pic.rmb.bdstatic.com\\/8963c44fe9bb2de078a778f71a32f6eb.jpeg\",\"siteName\":\"界面新闻\"}\n",
"{\"bjh_na\":{\"easyBrowse\":\"1\",\"easyBrowseConfirm\":\"1\",\"nid\":\"10269292514474689323\",\"thread_id\":\"1005000033194943\",\"type\":\"news\"},\"eventDescription\":\"截至8月7日24时新型冠状病毒肺炎疫情最新情况\",\"eventTime\":\"1596848726\",\"eventUrl\":\"http:\\/\\/baijiahao.baidu.com\\/s?id=1674417127284939659\",\"homepageUrl\":\"https:\\/\\/baijiahao.baidu.com\\/u?app_id=1589908044211516\",\"item_avatar\":\"http:\\/\\/pic.rmb.bdstatic.com\\/ec0438088a2c4589fb541fbc12b448f0.jpeg\",\"siteName\":\"健康中国\"}\n",
"{\"bjh_na\":{\"easyBrowse\":\"1\",\"easyBrowseConfirm\":\"1\",\"nid\":\"9285567187511393140\",\"thread_id\":\"1087000033199166\",\"type\":\"news\"},\"eventDescription\":\"北京8月7日无新增报告新冠肺炎确诊病例\",\"eventTime\":\"1596846935\",\"eventUrl\":\"http:\\/\\/baijiahao.baidu.com\\/s?id=1674415443327545029\",\"homepageUrl\":\"https:\\/\\/baijiahao.baidu.com\\/u?app_id=1636203306265037\",\"item_avatar\":\"http:\\/\\/pic.rmb.bdstatic.com\\/10c0099fe5c2ef55fbd758a03a6ecafa.jpeg@c_1,w_300,h_300,x_0,y_0\",\"siteName\":\"青瞳视角\"}\n",
"{\"bjh_na\":{\"easyBrowse\":\"1\",\"easyBrowseConfirm\":\"1\",\"nid\":\"9141704573616219966\",\"thread_id\":\"1055000033207982\",\"type\":\"news\"},\"eventDescription\":\"新疆新增确诊病例25例、无症状感染者8例\",\"eventTime\":\"1596846320\",\"eventUrl\":\"http:\\/\\/baijiahao.baidu.com\\/s?id=1674414780683115272\",\"homepageUrl\":\"https:\\/\\/baijiahao.baidu.com\\/u?app_id=1566453612428800\",\"item_avatar\":\"http:\\/\\/pic.rmb.bdstatic.com\\/9da74a517eb1befeba93a5f3167cc74b.jpeg\",\"siteName\":\"新京报\"}\n",
"{\"bjh_na\":{\"easyBrowse\":\"1\",\"easyBrowseConfirm\":\"1\",\"nid\":\"8385504052586009526\",\"thread_id\":\"1049000033197879\",\"type\":\"news\"},\"eventDescription\":\"辽宁连续两日无新增本土确诊病例 新增治愈出院3例\",\"eventTime\":\"1596844828\",\"eventUrl\":\"http:\\/\\/baijiahao.baidu.com\\/s?id=1674413542605070060\",\"homepageUrl\":\"https:\\/\\/baijiahao.baidu.com\\/u?app_id=1549608413453462\",\"item_avatar\":\"http:\\/\\/pic.rmb.bdstatic.com\\/89e9e167d806065171363ecbcb708a41.png@c_1,w_812,h_812,x_63,y_39\",\"siteName\":\"环球网\"}\n",
"{\"bjh_na\":{\"easyBrowse\":\"1\",\"easyBrowseConfirm\":\"1\",\"nid\":\"10340470323184094852\",\"thread_id\":\"1006000033182361\",\"type\":\"news\"},\"eventDescription\":\"新疆目前疫情形势如何?本轮疫情有何特点?最新解读来了\",\"eventTime\":\"1596812732\",\"eventUrl\":\"http:\\/\\/baijiahao.baidu.com\\/s?id=1674379507756801699\",\"homepageUrl\":\"https:\\/\\/baijiahao.baidu.com\\/u?app_id=1601149438053974\",\"item_avatar\":\"http:\\/\\/pic.rmb.bdstatic.com\\/814a110b34d52c106f9ba99c5d415478.jpeg\",\"siteName\":\"北京日报客户端\"}\n",
"{\"bjh_na\":{\"easyBrowse\":\"1\",\"easyBrowseConfirm\":\"1\",\"nid\":\"9212218199886086839\",\"thread_id\":\"1073000033176131\",\"type\":\"news\"},\"eventDescription\":\"对话地坛医院专家陈志海:半年抗疫 “新冠”刷新认知\",\"eventTime\":\"1596800212\",\"eventUrl\":\"http:\\/\\/baijiahao.baidu.com\\/s?id=1674366411525373651\",\"homepageUrl\":\"https:\\/\\/baijiahao.baidu.com\\/u?app_id=1566453612428800\",\"item_avatar\":\"http:\\/\\/pic.rmb.bdstatic.com\\/9da74a517eb1befeba93a5f3167cc74b.jpeg\",\"siteName\":\"新京报\"}\n",
"{\"bjh_na\":{\"easyBrowse\":\"1\",\"easyBrowseConfirm\":\"1\",\"nid\":\"9522441535993403492\",\"thread_id\":\"1112000033153621\",\"type\":\"news\"},\"eventDescription\":\"林郑月娥:港府将免费为全港市民进行新冠病毒检测\",\"eventTime\":\"1596785041\",\"eventUrl\":\"http:\\/\\/baijiahao.baidu.com\\/s?id=1674350469425444439\",\"homepageUrl\":\"https:\\/\\/baijiahao.baidu.com\\/u?app_id=1552864910655429\",\"item_avatar\":\"http:\\/\\/pic.rmb.bdstatic.com\\/d2f919e031c47d671a5748b5aeafe096.jpeg\",\"siteName\":\"人民日报海外网\"}\n",
"{\"bjh_na\":{\"easyBrowse\":\"1\",\"easyBrowseConfirm\":\"1\",\"nid\":\"8873266033734356254\",\"thread_id\":\"1040000033143344\",\"type\":\"news\"},\"eventDescription\":\"北京新增大连疫情关联病例1例新发地疫情病例清零\",\"eventTime\":\"1596760416\",\"eventUrl\":\"http:\\/\\/baijiahao.baidu.com\\/s?id=1674324650362213371\",\"homepageUrl\":\"https:\\/\\/baijiahao.baidu.com\\/u?app_id=1601149438053974\",\"item_avatar\":\"http:\\/\\/pic.rmb.bdstatic.com\\/814a110b34d52c106f9ba99c5d415478.jpeg\",\"siteName\":\"北京日报客户端\"}\n",
"{\"bjh_na\":{\"easyBrowse\":\"1\",\"easyBrowseConfirm\":\"1\",\"nid\":\"9716904130394627526\",\"thread_id\":\"1027000033142735\",\"type\":\"news\"},\"eventDescription\":\"31省市区新增确诊37例其中本土病例27例新疆26例 北京1例\",\"eventTime\":\"1596760201\",\"eventUrl\":\"http:\\/\\/baijiahao.baidu.com\\/s?id=1674324373876029599\",\"homepageUrl\":\"https:\\/\\/baijiahao.baidu.com\\/u?app_id=1589908044211516\",\"item_avatar\":\"http:\\/\\/pic.rmb.bdstatic.com\\/ec0438088a2c4589fb541fbc12b448f0.jpeg\",\"siteName\":\"健康中国\"}\n",
"{\"bjh_na\":{\"easyBrowse\":\"1\",\"easyBrowseConfirm\":\"1\",\"nid\":\"9549964897517921046\",\"thread_id\":\"1033000033139415\",\"type\":\"news\"},\"eventDescription\":\"新疆含兵团新增确诊病例26例新增无症状感染者10例\",\"eventTime\":\"1596759965\",\"eventUrl\":\"http:\\/\\/baijiahao.baidu.com\\/s?id=1674324177292404207\",\"homepageUrl\":\"https:\\/\\/baijiahao.baidu.com\\/u?app_id=1561825967470087\",\"item_avatar\":\"http:\\/\\/pic.rmb.bdstatic.com\\/bjh\\/user\\/b5b87556f7efcdf3c67fb0fd355dcaac.jpeg\",\"siteName\":\"红星新闻\"}\n",
"eventTime: , eventDescription: 北京连续6日零新增, siteName: 北京日报客户端, eventUrl: http://baijiahao.baidu.com/s?id=1674868131526596687, homepageUrl: https://baijiahao.baidu.com/u?app_id=1601149438053974, item_avatar: http://pic.rmb.bdstatic.com/814a110b34d52c106f9ba99c5d415478.jpeg\n",
"++++++++++++++++++++++++++++++++++++++++++++++++++\n",
"eventTime: , eventDescription: 国家卫健委昨日新增确诊病例19例 本土病例8例, siteName: 环球时报, eventUrl: http://baijiahao.baidu.com/s?id=1674867651765148223, homepageUrl: https://baijiahao.baidu.com/u?app_id=1551968238585112, item_avatar: http://pic.rmb.bdstatic.com/07feda26036edb976980ce1e803de532.jpeg@c_1,w_640,h_640,x_0,y_0\n",
"++++++++++++++++++++++++++++++++++++++++++++++++++\n",
"eventTime: , eventDescription: 新疆含兵团新增新冠肺炎确诊病例8例 均在乌鲁木齐市, siteName: 环球网, eventUrl: http://baijiahao.baidu.com/s?id=1674867007109286633, homepageUrl: https://baijiahao.baidu.com/u?app_id=1549608413453462, item_avatar: http://pic.rmb.bdstatic.com/89e9e167d806065171363ecbcb708a41.png@c_1,w_812,h_812,x_63,y_39\n",
"++++++++++++++++++++++++++++++++++++++++++++++++++\n",
"eventTime: , eventDescription: 上海昨日无新增本地新冠肺炎确诊病例新增境外输入2例, siteName: 环球网, eventUrl: http://baijiahao.baidu.com/s?id=1674865718710262411, homepageUrl: https://baijiahao.baidu.com/u?app_id=1549608413453462, item_avatar: http://pic.rmb.bdstatic.com/89e9e167d806065171363ecbcb708a41.png@c_1,w_812,h_812,x_63,y_39\n",
"++++++++++++++++++++++++++++++++++++++++++++++++++\n",
"eventTime: , eventDescription: 一境外输入确诊患者治愈数月后复阳 目前在上海隔离诊治, siteName: 新京报, eventUrl: http://baijiahao.baidu.com/s?id=1674823034158688355, homepageUrl: https://baijiahao.baidu.com/u?app_id=1566453612428800, item_avatar: http://pic.rmb.bdstatic.com/9da74a517eb1befeba93a5f3167cc74b.jpeg\n",
"++++++++++++++++++++++++++++++++++++++++++++++++++\n",
"eventTime: , eventDescription: 香港新增62例确诊病例 连续10天少于100例, siteName: 人民日报海外网, eventUrl: http://baijiahao.baidu.com/s?id=1674808294135893351, homepageUrl: https://baijiahao.baidu.com/u?app_id=1552864910655429, item_avatar: http://pic.rmb.bdstatic.com/d2f919e031c47d671a5748b5aeafe096.jpeg\n",
"++++++++++++++++++++++++++++++++++++++++++++++++++\n",
"eventTime: , eventDescription: 安徽芜湖一餐饮店进口冻虾新冠病毒检出疑似阳性, siteName: 央视新闻, eventUrl: http://baijiahao.baidu.com/s?id=1674804105747106594, homepageUrl: http://baijiahao.baidu.com/u?app_id=1570168240515616, item_avatar: http://pic.rmb.bdstatic.com/0026e9a191787761348be081d428a354.jpeg\n",
"++++++++++++++++++++++++++++++++++++++++++++++++++\n",
"eventTime: , eventDescription: 国家卫健委昨日新增确诊25例其中本土病例9例均在新疆, siteName: 红星新闻, eventUrl: http://baijiahao.baidu.com/s?id=1674772359564935639, homepageUrl: https://baijiahao.baidu.com/u?app_id=1561825967470087, item_avatar: http://pic.rmb.bdstatic.com/bjh/user/b5b87556f7efcdf3c67fb0fd355dcaac.jpeg\n",
"++++++++++++++++++++++++++++++++++++++++++++++++++\n",
"eventTime: , eventDescription: 北京连续5天无新增确诊病例, siteName: 北京日报客户端, eventUrl: http://baijiahao.baidu.com/s?id=1674777629341250609, homepageUrl: http://baijiahao.baidu.com/u?app_id=1601149438053974, item_avatar: http://pic.rmb.bdstatic.com/73c41abfdd401cc72e04d64b3ce0103d.jpeg\n",
"++++++++++++++++++++++++++++++++++++++++++++++++++\n",
"eventTime: , eventDescription: 广东新增境外输入确诊病例6例广州报告1例、珠海5例, siteName: 红星新闻, eventUrl: http://baijiahao.baidu.com/s?id=1674777366221789944, homepageUrl: https://baijiahao.baidu.com/u?app_id=1561825967470087, item_avatar: http://pic.rmb.bdstatic.com/bjh/user/b5b87556f7efcdf3c67fb0fd355dcaac.jpeg\n",
"++++++++++++++++++++++++++++++++++++++++++++++++++\n",
"eventTime: , eventDescription: 新疆报告新增新冠肺炎确诊病例9例新增无症状感染者8例, siteName: 环球网, eventUrl: http://baijiahao.baidu.com/s?id=1674776890002663976, homepageUrl: https://baijiahao.baidu.com/u?app_id=1549608413453462, item_avatar: http://pic.rmb.bdstatic.com/89e9e167d806065171363ecbcb708a41.png@c_1,w_812,h_812,x_63,y_39\n",
"++++++++++++++++++++++++++++++++++++++++++++++++++\n",
"eventTime: , eventDescription: 上海新增境外输入4例已追踪同航班密接者106人, siteName: 北京日报客户端, eventUrl: http://baijiahao.baidu.com/s?id=1674775298876647449, homepageUrl: https://baijiahao.baidu.com/u?app_id=1601149438053974, item_avatar: http://pic.rmb.bdstatic.com/814a110b34d52c106f9ba99c5d415478.jpeg\n",
"++++++++++++++++++++++++++++++++++++++++++++++++++\n",
"eventTime: , eventDescription: 新疆昨日新增确诊13例新增无症状感染者11例均在乌鲁木齐市, siteName: 红星新闻, eventUrl: http://baijiahao.baidu.com/s?id=1674686471966488007, homepageUrl: https://baijiahao.baidu.com/u?app_id=1561825967470087, item_avatar: http://pic.rmb.bdstatic.com/bjh/user/b5b87556f7efcdf3c67fb0fd355dcaac.jpeg\n",
"++++++++++++++++++++++++++++++++++++++++++++++++++\n",
"eventTime: , eventDescription: 昨天上海新增8例境外输入病例均为中国籍在阿联酋工作, siteName: 红星新闻, eventUrl: http://baijiahao.baidu.com/s?id=1674684045559410432, homepageUrl: https://baijiahao.baidu.com/u?app_id=1561825967470087, item_avatar: http://pic.rmb.bdstatic.com/bjh/user/b5b87556f7efcdf3c67fb0fd355dcaac.jpeg\n",
"++++++++++++++++++++++++++++++++++++++++++++++++++\n",
"eventTime: , eventDescription: 辽宁省新增2例境外输入确诊病例分别来自美国和菲律宾, siteName: 环球网, eventUrl: http://baijiahao.baidu.com/s?id=1674636151291490440, homepageUrl: http://baijiahao.baidu.com/u?app_id=1549608413453462, item_avatar: http://timg01.bdimg.com/timg?pacompress=&imgtype=0&sec=1439619614&di=3fb711aa6bf5a0beda833cb57989fa42&quality=90&size=b870_10000&src=http%3A%2F%2Fpic.rmb.bdstatic.com%2F2f338c67bc166c145cd457c01e102de5.jpeg\n",
"++++++++++++++++++++++++++++++++++++++++++++++++++\n",
"eventTime: , eventDescription: 香港新增69例确诊病例 连续8天少于100例, siteName: 人民日报海外网, eventUrl: http://baijiahao.baidu.com/s?id=1674626913670664857, homepageUrl: https://baijiahao.baidu.com/u?app_id=1552864910655429, item_avatar: http://pic.rmb.bdstatic.com/d2f919e031c47d671a5748b5aeafe096.jpeg\n",
"++++++++++++++++++++++++++++++++++++++++++++++++++\n",
"eventTime: , eventDescription: 韩国境外输入性新冠病例中检出3例病毒变异, siteName: 界面新闻, eventUrl: http://baijiahao.baidu.com/s?id=1674624304511109399, homepageUrl: https://baijiahao.baidu.com/u?app_id=1565176801499628, item_avatar: http://pic.rmb.bdstatic.com/8963c44fe9bb2de078a778f71a32f6eb.jpeg\n",
"++++++++++++++++++++++++++++++++++++++++++++++++++\n",
"eventTime: , eventDescription: 新疆新增新冠确诊病例14例无症状感染者7例均在乌鲁木齐市, siteName: 红星新闻, eventUrl: http://baijiahao.baidu.com/s?id=1674596654587658867, homepageUrl: https://baijiahao.baidu.com/u?app_id=1561825967470087, item_avatar: http://pic.rmb.bdstatic.com/bjh/user/b5b87556f7efcdf3c67fb0fd355dcaac.jpeg\n",
"++++++++++++++++++++++++++++++++++++++++++++++++++\n",
"eventTime: , eventDescription: 北京连续三日零新增!, siteName: 北京日报客户端, eventUrl: http://baijiahao.baidu.com/s?id=1674596367795749792, homepageUrl: https://baijiahao.baidu.com/u?app_id=1601149438053974, item_avatar: http://pic.rmb.bdstatic.com/814a110b34d52c106f9ba99c5d415478.jpeg\n",
"++++++++++++++++++++++++++++++++++++++++++++++++++\n",
"eventTime: , eventDescription: 广东新增境外输入3例入境后即被隔离观察, siteName: 北京日报客户端, eventUrl: http://baijiahao.baidu.com/s?id=1674595300038757825, homepageUrl: https://baijiahao.baidu.com/u?app_id=1601149438053974, item_avatar: http://pic.rmb.bdstatic.com/814a110b34d52c106f9ba99c5d415478.jpeg\n",
"++++++++++++++++++++++++++++++++++++++++++++++++++\n",
"eventTime: , eventDescription: 上海昨日无新增本地新冠肺炎确诊病例新增境外输入18例, siteName: 环球网, eventUrl: http://baijiahao.baidu.com/s?id=1674593640123663447, homepageUrl: https://baijiahao.baidu.com/u?app_id=1549608413453462, item_avatar: http://pic.rmb.bdstatic.com/89e9e167d806065171363ecbcb708a41.png@c_1,w_812,h_812,x_63,y_39\n",
"++++++++++++++++++++++++++++++++++++++++++++++++++\n",
"eventTime: , eventDescription: 重庆昨日新增无症状感染者1例为新加坡输入, siteName: 环球网, eventUrl: http://baijiahao.baidu.com/s?id=1674592612999522909, homepageUrl: https://baijiahao.baidu.com/u?app_id=1549608413453462, item_avatar: http://pic.rmb.bdstatic.com/89e9e167d806065171363ecbcb708a41.png@c_1,w_812,h_812,x_63,y_39\n",
"++++++++++++++++++++++++++++++++++++++++++++++++++\n",
"eventTime: , eventDescription: 一文读懂全球疫情全球累计确诊超1999万例 美国暴发沙门氏菌疫情, siteName: 人民日报海外网, eventUrl: http://baijiahao.baidu.com/s?id=1674590351892834223, homepageUrl: https://baijiahao.baidu.com/u?app_id=1552864910655429, item_avatar: http://pic.rmb.bdstatic.com/d2f919e031c47d671a5748b5aeafe096.jpeg\n",
"++++++++++++++++++++++++++++++++++++++++++++++++++\n",
"eventTime: , eventDescription: 香港新增72例新冠确诊病例连续7日少于100例, siteName: 界面新闻, eventUrl: http://baijiahao.baidu.com/s?id=1674536151578789920, homepageUrl: https://baijiahao.baidu.com/u?app_id=1565176801499628, item_avatar: http://pic.rmb.bdstatic.com/8963c44fe9bb2de078a778f71a32f6eb.jpeg\n",
"++++++++++++++++++++++++++++++++++++++++++++++++++\n",
"eventTime: , eventDescription: 新疆新增确诊病例15例均在乌鲁木齐, siteName: 新京报, eventUrl: http://baijiahao.baidu.com/s?id=1674505943705180172, homepageUrl: https://baijiahao.baidu.com/u?app_id=1566453612428800, item_avatar: http://pic.rmb.bdstatic.com/9da74a517eb1befeba93a5f3167cc74b.jpeg\n",
"++++++++++++++++++++++++++++++++++++++++++++++++++\n",
"eventTime: , eventDescription: 31省区市新增确诊病例23例其中本土病例15例均在新疆, siteName: 健康中国, eventUrl: http://baijiahao.baidu.com/s?id=1674503120087258676, homepageUrl: https://baijiahao.baidu.com/u?app_id=1589908044211516, item_avatar: http://pic.rmb.bdstatic.com/ec0438088a2c4589fb541fbc12b448f0.jpeg\n",
"++++++++++++++++++++++++++++++++++++++++++++++++++\n",
"eventTime: , eventDescription: 香港新增69例新冠确诊病例累计超过4000例, siteName: 界面新闻, eventUrl: http://baijiahao.baidu.com/s?id=1674445865981756049, homepageUrl: https://baijiahao.baidu.com/u?app_id=1565176801499628, item_avatar: http://pic.rmb.bdstatic.com/8963c44fe9bb2de078a778f71a32f6eb.jpeg\n",
"++++++++++++++++++++++++++++++++++++++++++++++++++\n",
"eventTime: , eventDescription: 截至8月7日24时新型冠状病毒肺炎疫情最新情况, siteName: 健康中国, eventUrl: http://baijiahao.baidu.com/s?id=1674417127284939659, homepageUrl: https://baijiahao.baidu.com/u?app_id=1589908044211516, item_avatar: http://pic.rmb.bdstatic.com/ec0438088a2c4589fb541fbc12b448f0.jpeg\n",
"++++++++++++++++++++++++++++++++++++++++++++++++++\n",
"eventTime: , eventDescription: 北京8月7日无新增报告新冠肺炎确诊病例, siteName: 青瞳视角, eventUrl: http://baijiahao.baidu.com/s?id=1674415443327545029, homepageUrl: https://baijiahao.baidu.com/u?app_id=1636203306265037, item_avatar: http://pic.rmb.bdstatic.com/10c0099fe5c2ef55fbd758a03a6ecafa.jpeg@c_1,w_300,h_300,x_0,y_0\n",
"++++++++++++++++++++++++++++++++++++++++++++++++++\n",
"eventTime: , eventDescription: 新疆新增确诊病例25例、无症状感染者8例, siteName: 新京报, eventUrl: http://baijiahao.baidu.com/s?id=1674414780683115272, homepageUrl: https://baijiahao.baidu.com/u?app_id=1566453612428800, item_avatar: http://pic.rmb.bdstatic.com/9da74a517eb1befeba93a5f3167cc74b.jpeg\n",
"++++++++++++++++++++++++++++++++++++++++++++++++++\n",
"eventTime: , eventDescription: 辽宁连续两日无新增本土确诊病例 新增治愈出院3例, siteName: 环球网, eventUrl: http://baijiahao.baidu.com/s?id=1674413542605070060, homepageUrl: https://baijiahao.baidu.com/u?app_id=1549608413453462, item_avatar: http://pic.rmb.bdstatic.com/89e9e167d806065171363ecbcb708a41.png@c_1,w_812,h_812,x_63,y_39\n",
"++++++++++++++++++++++++++++++++++++++++++++++++++\n",
"eventTime: , eventDescription: 新疆目前疫情形势如何?本轮疫情有何特点?最新解读来了, siteName: 北京日报客户端, eventUrl: http://baijiahao.baidu.com/s?id=1674379507756801699, homepageUrl: https://baijiahao.baidu.com/u?app_id=1601149438053974, item_avatar: http://pic.rmb.bdstatic.com/814a110b34d52c106f9ba99c5d415478.jpeg\n",
"++++++++++++++++++++++++++++++++++++++++++++++++++\n",
"eventTime: , eventDescription: 对话地坛医院专家陈志海:半年抗疫 “新冠”刷新认知, siteName: 新京报, eventUrl: http://baijiahao.baidu.com/s?id=1674366411525373651, homepageUrl: https://baijiahao.baidu.com/u?app_id=1566453612428800, item_avatar: http://pic.rmb.bdstatic.com/9da74a517eb1befeba93a5f3167cc74b.jpeg\n",
"++++++++++++++++++++++++++++++++++++++++++++++++++\n",
"eventTime: , eventDescription: 林郑月娥:港府将免费为全港市民进行新冠病毒检测, siteName: 人民日报海外网, eventUrl: http://baijiahao.baidu.com/s?id=1674350469425444439, homepageUrl: https://baijiahao.baidu.com/u?app_id=1552864910655429, item_avatar: http://pic.rmb.bdstatic.com/d2f919e031c47d671a5748b5aeafe096.jpeg\n",
"++++++++++++++++++++++++++++++++++++++++++++++++++\n",
"eventTime: , eventDescription: 北京新增大连疫情关联病例1例新发地疫情病例清零, siteName: 北京日报客户端, eventUrl: http://baijiahao.baidu.com/s?id=1674324650362213371, homepageUrl: https://baijiahao.baidu.com/u?app_id=1601149438053974, item_avatar: http://pic.rmb.bdstatic.com/814a110b34d52c106f9ba99c5d415478.jpeg\n",
"++++++++++++++++++++++++++++++++++++++++++++++++++\n",
"eventTime: , eventDescription: 31省市区新增确诊37例其中本土病例27例新疆26例 北京1例, siteName: 健康中国, eventUrl: http://baijiahao.baidu.com/s?id=1674324373876029599, homepageUrl: https://baijiahao.baidu.com/u?app_id=1589908044211516, item_avatar: http://pic.rmb.bdstatic.com/ec0438088a2c4589fb541fbc12b448f0.jpeg\n",
"++++++++++++++++++++++++++++++++++++++++++++++++++\n",
"eventTime: 1596759965, eventDescription: 新疆含兵团新增确诊病例26例新增无症状感染者10例, siteName: 红星新闻, eventUrl: http://baijiahao.baidu.com/s?id=1674324177292404207, homepageUrl: https://baijiahao.baidu.com/u?app_id=1561825967470087, item_avatar: http://pic.rmb.bdstatic.com/bjh/user/b5b87556f7efcdf3c67fb0fd355dcaac.jpeg\n",
"++++++++++++++++++++++++++++++++++++++++++++++++++\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"E:\\anaconda3\\lib\\site-packages\\ipykernel_launcher.py:20: DeprecationWarning: invalid escape sequence '\\/'\n"
]
}
],
"source": [
"import requests\n",
"from bs4 import BeautifulSoup\n",
"import re \n",
"import json \n",
"# 定义实时国内新闻类\n",
"class insideNews: \n",
" def __init__(self): \n",
" insidenews.eventTime = ''\n",
" self.eventDescription = '' \n",
" self.siteName = '' \n",
" self.eventUrl = '' \n",
" self.homepageUrl = ''\n",
" self.item_avatar = ''\n",
" \n",
" def __str__(self): \n",
" return 'eventTime: %s, eventDescription: %s, siteName: %s, eventUrl: %s, homepageUrl: %s, item_avatar: %s' % (self.eventTime, self.eventDescription, self.siteName, self.eventUrl, self.homepageUrl, self.item_avatar)\n",
"\n",
" \n",
"response = requests.get('https://opendata.baidu.com/data/inner?tn=reserved_all_res_tn&dspName=iphone&from_sf=1&dsp=iphone&resource_id=28565&alr=1&query=%E8%82%BA%E7%82%8E&cb=jsonp_1597231600668_30527&qq-pf-to=pcqq.group') #发送get请求获得目标服务器相应 \n",
"html = response.content.decode('unicode-escape')# 解码 \n",
"soup = BeautifulSoup(html)# 构建soup对象 \n",
"tag = soup.find('p')# 查找指定标签\n",
"tagstr = tag.string# 转换成字符串\n",
"results = re.findall(r'(\\{\"bjh_na\".*?\"eventDescription\":.*?\\})', tagstr)# 使用正则表达式查找所有的实时新闻\n",
"\n",
"all_insidenews = [] \n",
"for item in results:\n",
" insidenews = insideNews() \n",
" print(item)\n",
" itemjson = json.loads(item) \n",
" insidenews.eventTime = itemjson['eventTime']\n",
" insidenews.eventDescription = itemjson['eventDescription'] \n",
" insidenews.siteName = itemjson['siteName'] \n",
" insidenews.eventUrl = itemjson['eventUrl'] \n",
" insidenews.homepageUrl = itemjson['homepageUrl'] \n",
" insidenews.item_avatar = itemjson['item_avatar'] \n",
" \n",
" all_insidenews.append(insidenews) \n",
" \n",
"for insidenews in all_insidenews: \n",
" print(insidenews) \n",
" print('+++++' * 10)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# 获取国外疫情实时滚动新闻播报数据"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"eventTime: 1597275291, eventDescription: 日增5.5万巴西新冠肺炎确诊病例超316万例, siteName: 人民日报, eventUrl: http://baijiahao.baidu.com/s?id=1674864544431254391, homepageUrl: https://baijiahao.baidu.com/u?app_id=1593743208952652, item_avatar: http://pic.rmb.bdstatic.com/53864a5e77b735000396c815760f7b4c.jpeg\n",
"++++++++++++++++++++++++++++++++++++++++++++++++++\n",
"eventTime: 1597273097, eventDescription: 一文读懂全球疫情全球累计确诊超2076万例 美国现另一场公共卫生危机, siteName: 人民日报海外网, eventUrl: http://baijiahao.baidu.com/s?id=1674862818784287730, homepageUrl: https://baijiahao.baidu.com/u?app_id=1552864910655429, item_avatar: http://pic.rmb.bdstatic.com/d2f919e031c47d671a5748b5aeafe096.jpeg\n",
"++++++++++++++++++++++++++++++++++++++++++++++++++\n",
"eventTime: 1597272142, eventDescription: 「战疫全时区」美国单日新增确诊病例逾4.8万例 累计超535万例, siteName: 人民日报海外网, eventUrl: http://baijiahao.baidu.com/s?id=1674861714204253861, homepageUrl: https://baijiahao.baidu.com/u?app_id=1552864910655429, item_avatar: http://pic.rmb.bdstatic.com/d2f919e031c47d671a5748b5aeafe096.jpeg\n",
"++++++++++++++++++++++++++++++++++++++++++++++++++\n",
"eventTime: 1597217570, eventDescription: 「战疫全时区」俄罗斯新增5102例确诊病例 累计超90万例, siteName: 人民日报海外网, eventUrl: http://baijiahao.baidu.com/s?id=1674804011129533783, homepageUrl: https://baijiahao.baidu.com/u?app_id=1552864910655429, item_avatar: http://pic.rmb.bdstatic.com/d2f919e031c47d671a5748b5aeafe096.jpeg\n",
"++++++++++++++++++++++++++++++++++++++++++++++++++\n",
"eventTime: 1597187332, eventDescription: 美国确诊超513万死亡超16.4万例超38万儿童感染, siteName: 北京日报客户端, eventUrl: http://baijiahao.baidu.com/s?id=1674772303894901158, homepageUrl: https://baijiahao.baidu.com/u?app_id=1601149438053974, item_avatar: http://pic.rmb.bdstatic.com/814a110b34d52c106f9ba99c5d415478.jpeg\n",
"++++++++++++++++++++++++++++++++++++++++++++++++++\n",
"eventTime: 1597186109, eventDescription: 一文读懂全球疫情全球累计确诊超2048万例 俄罗斯注册首款新冠疫苗, siteName: 人民日报海外网, eventUrl: http://baijiahao.baidu.com/s?id=1674771802796395216, homepageUrl: https://baijiahao.baidu.com/u?app_id=1552864910655429, item_avatar: http://pic.rmb.bdstatic.com/d2f919e031c47d671a5748b5aeafe096.jpeg\n",
"++++++++++++++++++++++++++++++++++++++++++++++++++\n",
"eventTime: 1597108712, eventDescription: 全球2000万+!确诊数前三的国家,疫情形势怎么样了?, siteName: 北京日报客户端, eventUrl: http://baijiahao.baidu.com/s?id=1674689865386982063, homepageUrl: https://baijiahao.baidu.com/u?app_id=1601149438053974, item_avatar: http://pic.rmb.bdstatic.com/814a110b34d52c106f9ba99c5d415478.jpeg\n",
"++++++++++++++++++++++++++++++++++++++++++++++++++\n",
"eventTime: 1597099802, eventDescription: 一文读懂全球疫情全球累计确诊超2021万例 巴西90万人在疫情期间成功戒烟, siteName: 人民日报海外网, eventUrl: http://baijiahao.baidu.com/s?id=1674681138323257988, homepageUrl: https://baijiahao.baidu.com/u?app_id=1552864910655429, item_avatar: http://pic.rmb.bdstatic.com/d2f919e031c47d671a5748b5aeafe096.jpeg\n",
"++++++++++++++++++++++++++++++++++++++++++++++++++\n",
"eventTime: 1597098897, eventDescription: 「战疫全时区」美国单日新增确诊病例逾4.1万例 累计超524万例, siteName: 人民日报海外网, eventUrl: http://baijiahao.baidu.com/s?id=1674680199047536719, homepageUrl: https://baijiahao.baidu.com/u?app_id=1552864910655429, item_avatar: http://pic.rmb.bdstatic.com/d2f919e031c47d671a5748b5aeafe096.jpeg\n",
"++++++++++++++++++++++++++++++++++++++++++++++++++\n",
"eventTime: 1597098540, eventDescription: 「战疫全时区」巴西单日新增确诊病例逾2.2万例 累计超305万例, siteName: 人民日报海外网, eventUrl: http://baijiahao.baidu.com/s?id=1674679503309357561, homepageUrl: https://baijiahao.baidu.com/u?app_id=1552864910655429, item_avatar: http://pic.rmb.bdstatic.com/d2f919e031c47d671a5748b5aeafe096.jpeg\n",
"++++++++++++++++++++++++++++++++++++++++++++++++++\n",
"eventTime: 1597044840, eventDescription: 「战疫全时区」俄罗斯新增5118例确诊病例 累计超89万例, siteName: 人民日报海外网, eventUrl: http://baijiahao.baidu.com/s?id=1674623132178961292, homepageUrl: https://baijiahao.baidu.com/u?app_id=1552864910655429, item_avatar: http://pic.rmb.bdstatic.com/d2f919e031c47d671a5748b5aeafe096.jpeg\n",
"++++++++++++++++++++++++++++++++++++++++++++++++++\n",
"eventTime: 1597041465, eventDescription: 返校前夕全美近10万学生两周内感染新冠肺炎, siteName: 人民日报海外网, eventUrl: http://baijiahao.baidu.com/s?id=1674620237736618467, homepageUrl: https://baijiahao.baidu.com/u?app_id=1552864910655429, item_avatar: http://pic.rmb.bdstatic.com/d2f919e031c47d671a5748b5aeafe096.jpeg\n",
"++++++++++++++++++++++++++++++++++++++++++++++++++\n",
"eventTime: 1597013216, eventDescription: 一文读懂全球疫情全球累计确诊超1999万例 美国暴发沙门氏菌疫情, siteName: 人民日报海外网, eventUrl: http://baijiahao.baidu.com/s?id=1674590351892834223, homepageUrl: https://baijiahao.baidu.com/u?app_id=1552864910655429, item_avatar: http://pic.rmb.bdstatic.com/d2f919e031c47d671a5748b5aeafe096.jpeg\n",
"++++++++++++++++++++++++++++++++++++++++++++++++++\n",
"eventTime: 1597012823, eventDescription: 「战疫全时区」美国单日新增确诊病例逾4万例 累计超519万例, siteName: 人民日报海外网, eventUrl: http://baijiahao.baidu.com/s?id=1674589542624222920, homepageUrl: https://baijiahao.baidu.com/u?app_id=1552864910655429, item_avatar: http://pic.rmb.bdstatic.com/d2f919e031c47d671a5748b5aeafe096.jpeg\n",
"++++++++++++++++++++++++++++++++++++++++++++++++++\n",
"eventTime: 1597012711, eventDescription: 「战疫全时区」巴西单日新增确诊病例逾2.3万例 累计超303万例, siteName: 人民日报海外网, eventUrl: http://baijiahao.baidu.com/s?id=1674589406966244222, homepageUrl: https://baijiahao.baidu.com/u?app_id=1552864910655429, item_avatar: http://pic.rmb.bdstatic.com/d2f919e031c47d671a5748b5aeafe096.jpeg\n",
"++++++++++++++++++++++++++++++++++++++++++++++++++\n",
"eventTime: 1596961970, eventDescription: 首批哈萨克斯坦华人回国 隔离14天后可回家 亲历者提前12小时就到机场, siteName: 封面新闻, eventUrl: http://baijiahao.baidu.com/s?id=1674536083513406606, homepageUrl: https://baijiahao.baidu.com/u?app_id=1577667706397024, item_avatar: http://pic.rmb.bdstatic.com/7d15a50cee51b0a96e00a85cfabbb513.jpeg\n",
"++++++++++++++++++++++++++++++++++++++++++++++++++\n",
"eventTime: 1596960850, eventDescription: 日本连续5天新增逾千例 安倍:极力避免再次进入紧急状态, siteName: 人民日报海外网, eventUrl: http://baijiahao.baidu.com/s?id=1674535368426252580, homepageUrl: https://baijiahao.baidu.com/u?app_id=1552864910655429, item_avatar: http://pic.rmb.bdstatic.com/d2f919e031c47d671a5748b5aeafe096.jpeg\n",
"++++++++++++++++++++++++++++++++++++++++++++++++++\n",
"eventTime: 1596960494, eventDescription: 美国新冠确诊病例接近500万例, siteName: 新华社客户端, eventUrl: http://baijiahao.baidu.com/s?id=1674534439138090155, homepageUrl: https://baijiahao.baidu.com/u?app_id=1552507899985619, item_avatar: http://pic.rmb.bdstatic.com/f873c8976c274e5f9bacc77a5c1b3e89.jpeg@c_1,w_385,h_385,x_6,y_11\n",
"++++++++++++++++++++++++++++++++++++++++++++++++++\n",
"eventTime: 1596953765, eventDescription: 印度第3位部长级官员确诊感染新冠肺炎 身兼多份国务要职, siteName: 新京报, eventUrl: http://baijiahao.baidu.com/s?id=1674527459855975038, homepageUrl: https://baijiahao.baidu.com/u?app_id=1566453612428800, item_avatar: http://pic.rmb.bdstatic.com/9da74a517eb1befeba93a5f3167cc74b.jpeg\n",
"++++++++++++++++++++++++++++++++++++++++++++++++++\n",
"eventTime: 1596942761, eventDescription: 印度安德拉邦一新冠肺炎征用酒店起火 已致4人死亡, siteName: 新京报, eventUrl: http://baijiahao.baidu.com/s?id=1674515947367591821, homepageUrl: https://baijiahao.baidu.com/u?app_id=1566453612428800, item_avatar: http://pic.rmb.bdstatic.com/9da74a517eb1befeba93a5f3167cc74b.jpeg\n",
"++++++++++++++++++++++++++++++++++++++++++++++++++\n",
"eventTime: 1596931253, eventDescription: 警惕!日本研究发现变异新冠病毒,已向全国各地扩散, siteName: 北晚新视觉网, eventUrl: http://baijiahao.baidu.com/s?id=1674503882559884129, homepageUrl: https://baijiahao.baidu.com/u?app_id=1549941228125394, item_avatar: http://pic.rmb.bdstatic.com/b9279adf974b78d27201a0b34970c2a9.jpeg\n",
"++++++++++++++++++++++++++++++++++++++++++++++++++\n",
"eventTime: 1596927420, eventDescription: 一文读懂全球疫情全球累计确诊超1977万例 印度连续10天日增病例超5万, siteName: 人民日报海外网, eventUrl: http://baijiahao.baidu.com/s?id=1674500137134331393, homepageUrl: https://baijiahao.baidu.com/u?app_id=1552864910655429, item_avatar: http://pic.rmb.bdstatic.com/d2f919e031c47d671a5748b5aeafe096.jpeg\n",
"++++++++++++++++++++++++++++++++++++++++++++++++++\n",
"eventTime: 1596926700, eventDescription: 「战疫全时区」美国单日新增确诊病例超6.1万例 累计逾514万例, siteName: 人民日报海外网, eventUrl: http://baijiahao.baidu.com/s?id=1674499173102689654, homepageUrl: https://baijiahao.baidu.com/u?app_id=1552864910655429, item_avatar: http://pic.rmb.bdstatic.com/d2f919e031c47d671a5748b5aeafe096.jpeg\n",
"++++++++++++++++++++++++++++++++++++++++++++++++++\n",
"eventTime: 1596926340, eventDescription: 「战疫全时区」巴西累计确诊病例超301万例 死亡逾10万例, siteName: 人民日报海外网, eventUrl: http://baijiahao.baidu.com/s?id=1674498936636405454, homepageUrl: https://baijiahao.baidu.com/u?app_id=1552864910655429, item_avatar: http://pic.rmb.bdstatic.com/d2f919e031c47d671a5748b5aeafe096.jpeg\n",
"++++++++++++++++++++++++++++++++++++++++++++++++++\n",
"eventTime: 1596871924, eventDescription: 「战疫全时区」俄罗斯新增5212例确诊病例 累计确诊超88万例, siteName: 人民日报海外网, eventUrl: http://baijiahao.baidu.com/s?id=1674442809541789098, homepageUrl: https://baijiahao.baidu.com/u?app_id=1552864910655429, item_avatar: http://pic.rmb.bdstatic.com/d2f919e031c47d671a5748b5aeafe096.jpeg\n",
"++++++++++++++++++++++++++++++++++++++++++++++++++\n",
"eventTime: 1596860880, eventDescription: 「战疫全时区」印度日增超6.1万例 累计确诊超208万, siteName: 人民日报海外网, eventUrl: http://baijiahao.baidu.com/s?id=1674430048790588388, homepageUrl: https://baijiahao.baidu.com/u?app_id=1552864910655429, item_avatar: http://pic.rmb.bdstatic.com/d2f919e031c47d671a5748b5aeafe096.jpeg\n",
"++++++++++++++++++++++++++++++++++++++++++++++++++\n",
"eventTime: 1596840442, eventDescription: 一文读懂全球疫情全球累计确诊近1950万例 印度累计确诊超过200万例, siteName: 人民日报海外网, eventUrl: http://baijiahao.baidu.com/s?id=1674408972047141586, homepageUrl: https://baijiahao.baidu.com/u?app_id=1552864910655429, item_avatar: http://pic.rmb.bdstatic.com/d2f919e031c47d671a5748b5aeafe096.jpeg\n",
"++++++++++++++++++++++++++++++++++++++++++++++++++\n",
"eventTime: 1596839886, eventDescription: 「战疫全时区」巴西单日新增确诊病例超4.4万例 累计逾296万例, siteName: 人民日报海外网, eventUrl: http://baijiahao.baidu.com/s?id=1674408470860851393, homepageUrl: https://baijiahao.baidu.com/u?app_id=1552864910655429, item_avatar: http://pic.rmb.bdstatic.com/d2f919e031c47d671a5748b5aeafe096.jpeg\n",
"++++++++++++++++++++++++++++++++++++++++++++++++++\n",
"eventTime: 1596839434, eventDescription: 「战疫全时区」美国单日新增确诊病例超6万例 累计逾508万例, siteName: 人民日报海外网, eventUrl: http://baijiahao.baidu.com/s?id=1674407841000149898, homepageUrl: https://baijiahao.baidu.com/u?app_id=1552864910655429, item_avatar: http://pic.rmb.bdstatic.com/d2f919e031c47d671a5748b5aeafe096.jpeg\n",
"++++++++++++++++++++++++++++++++++++++++++++++++++\n",
"eventTime: 1596817404, eventDescription: 20天印度新冠确诊病例从100万增至200万, siteName: 北京日报客户端, eventUrl: http://baijiahao.baidu.com/s?id=1674384907923543549, homepageUrl: https://baijiahao.baidu.com/u?app_id=1601149438053974, item_avatar: http://pic.rmb.bdstatic.com/814a110b34d52c106f9ba99c5d415478.jpeg\n",
"++++++++++++++++++++++++++++++++++++++++++++++++++\n",
"eventTime: 1596801592, eventDescription: 8月7日全球疫情观察至少22国日增确诊超千例 印度累计确诊逾200万例, siteName: 人民日报海外网, eventUrl: http://baijiahao.baidu.com/s?id=1674368083759315686, homepageUrl: https://baijiahao.baidu.com/u?app_id=1552864910655429, item_avatar: http://pic.rmb.bdstatic.com/d2f919e031c47d671a5748b5aeafe096.jpeg\n",
"++++++++++++++++++++++++++++++++++++++++++++++++++\n",
"eventTime: 1596786000, eventDescription: 「战疫全时区」俄罗斯新增5241例确诊病例 累计确诊超87万例, siteName: 人民日报海外网, eventUrl: http://baijiahao.baidu.com/s?id=1674351790374377698, homepageUrl: https://baijiahao.baidu.com/u?app_id=1552864910655429, item_avatar: http://pic.rmb.bdstatic.com/d2f919e031c47d671a5748b5aeafe096.jpeg\n",
"++++++++++++++++++++++++++++++++++++++++++++++++++\n",
"eventTime: 1596783247, eventDescription: 白俄罗斯总统卢卡申科:有人故意将新冠病毒传染给我, siteName: 新京报, eventUrl: http://baijiahao.baidu.com/s?id=1674348658542718182, homepageUrl: https://baijiahao.baidu.com/u?app_id=1566453612428800, item_avatar: http://pic.rmb.bdstatic.com/9da74a517eb1befeba93a5f3167cc74b.jpeg\n",
"++++++++++++++++++++++++++++++++++++++++++++++++++\n",
"eventTime: 1596769135, eventDescription: 印度成全球第三个确诊破200万的国家21天新增100万, siteName: 新京报, eventUrl: http://baijiahao.baidu.com/s?id=1674333884198078936, homepageUrl: https://baijiahao.baidu.com/u?app_id=1566453612428800, item_avatar: http://pic.rmb.bdstatic.com/9da74a517eb1befeba93a5f3167cc74b.jpeg\n",
"++++++++++++++++++++++++++++++++++++++++++++++++++\n",
"eventTime: 1596760866, eventDescription: 世卫组织:已有六种新冠疫苗进入三期临床试验阶段 其中三种来自中国, siteName: 中国日报网, eventUrl: http://baijiahao.baidu.com/s?id=1674325335422513595, homepageUrl: https://baijiahao.baidu.com/u?app_id=1567805706555546, item_avatar: http://pic.rmb.bdstatic.com/8f27cc4a0abf470446a58a0066b710f5.png\n",
"++++++++++++++++++++++++++++++++++++++++++++++++++\n",
"eventTime: 1596753951, eventDescription: 一文读懂全球疫情全球累计确诊近1920万例 美国一州长为特朗普接机前确诊, siteName: 人民日报海外网, eventUrl: http://baijiahao.baidu.com/s?id=1674318314838065202, homepageUrl: https://baijiahao.baidu.com/u?app_id=1552864910655429, item_avatar: http://pic.rmb.bdstatic.com/d2f919e031c47d671a5748b5aeafe096.jpeg\n",
"++++++++++++++++++++++++++++++++++++++++++++++++++\n",
"eventTime: 1596753846, eventDescription: 「战疫全时区」巴西单日新增确诊病例超5.3万例 累计逾291万例, siteName: 人民资讯, eventUrl: http://baijiahao.baidu.com/s?id=1674317758867433207, homepageUrl: https://baijiahao.baidu.com/u?app_id=1669728810290752, item_avatar: http://pic.rmb.bdstatic.com/bjh/user/867084918f0beae5baa88b1fcbc34f1f.jpeg\n",
"++++++++++++++++++++++++++++++++++++++++++++++++++\n",
"eventTime: 1596753788, eventDescription: 「战疫全时区」美国单日新增确诊病例超5.9万例 累计逾502万例, siteName: 人民日报海外网, eventUrl: http://baijiahao.baidu.com/s?id=1674318066095349625, homepageUrl: https://baijiahao.baidu.com/u?app_id=1552864910655429, item_avatar: http://pic.rmb.bdstatic.com/d2f919e031c47d671a5748b5aeafe096.jpeg\n",
"++++++++++++++++++++++++++++++++++++++++++++++++++\n",
"eventTime: 1596723779, eventDescription: 今日疫情汇总:黎巴嫩爆炸恐加剧疫情传播,朝鲜首例疑似检测“没结果”, siteName: 纵相新闻, eventUrl: http://baijiahao.baidu.com/s?id=1674286560061876250, homepageUrl: https://baijiahao.baidu.com/u?app_id=1607773795161133, item_avatar: http://pic.rmb.bdstatic.com/4221a08a04ad0d69f15b15252297b8e2.jpeg\n",
"++++++++++++++++++++++++++++++++++++++++++++++++++\n",
"eventTime: 1596708377, eventDescription: 全球新冠肺炎确诊病例超1900万例 死亡病例逾71.1万例, siteName: 人民日报海外网, eventUrl: http://baijiahao.baidu.com/s?id=1674270432508007457, homepageUrl: https://baijiahao.baidu.com/u?app_id=1552864910655429, item_avatar: http://pic.rmb.bdstatic.com/d2f919e031c47d671a5748b5aeafe096.jpeg\n",
"++++++++++++++++++++++++++++++++++++++++++++++++++\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"E:\\anaconda3\\lib\\site-packages\\ipykernel_launcher.py:20: DeprecationWarning: invalid escape sequence '\\/'\n"
]
}
],
"source": [
"import requests\n",
"from bs4 import BeautifulSoup\n",
"import re \n",
"import json \n",
"# 定义实时国外新闻类\n",
"class outsideNews: \n",
" def __init__(self): \n",
" insidenews.eventTime = ''\n",
" self.eventDescription = '' \n",
" self.siteName = '' \n",
" self.eventUrl = '' \n",
" self.homepageUrl = ''\n",
" self.item_avatar = ''\n",
" \n",
" def __str__(self): \n",
" return 'eventTime: %s, eventDescription: %s, siteName: %s, eventUrl: %s, homepageUrl: %s, item_avatar: %s' % (self.eventTime, self.eventDescription, self.siteName, self.eventUrl, self.homepageUrl, self.item_avatar)\n",
"\n",
" \n",
"response = requests.get('https://opendata.baidu.com/data/inner?tn=reserved_all_res_tn&dspName=iphone&from_sf=1&dsp=iphone&resource_id=28565&alr=1&query=%E6%96%B0%E5%86%A0%E8%82%BA%E7%82%8E%E5%9B%BD%E5%A4%96%E7%96%AB%E6%83%85&cb=jsonp_1597232049116_92879&qq-pf-to=pcqq.group') #发送get请求获得目标服务器相应 \n",
"html = response.content.decode('unicode-escape')# 解码 \n",
"soup = BeautifulSoup(html)# 构建soup对象 \n",
"tag = soup.find('p')# 查找指定标签\n",
"tagstr = tag.string# 转换成字符串\n",
"results = re.findall(r'(\\{\"bjh_na\".*?\"eventDescription\":.*?\\})', tagstr)# 使用正则表达式查找所有的实时新闻\n",
"\n",
"all_outsidenews = [] \n",
"for item in results:\n",
" outsidenews = outsideNews() \n",
" itemjson = json.loads(item) \n",
" outsidenews.eventTime = itemjson['eventTime']\n",
" outsidenews.eventDescription = itemjson['eventDescription'] \n",
" outsidenews.siteName = itemjson['siteName'] \n",
" outsidenews.eventUrl = itemjson['eventUrl'] \n",
" outsidenews.homepageUrl = itemjson['homepageUrl'] \n",
" outsidenews.item_avatar = itemjson['item_avatar'] \n",
" \n",
" all_outsidenews.append(outsidenews) \n",
" \n",
"for outsidenews in all_outsidenews: \n",
" print(outsidenews) \n",
" print('+++++' * 10)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# 将获取到的国内外实时新闻数据导入数据库中"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
" ## 数据库实体类"
]
},
{
"cell_type": "code",
"execution_count": 45,
"metadata": {},
"outputs": [],
"source": [
"import pymysql\n",
"\n",
"class MyDB:\n",
" def __init__(self, host, user, passwd, db):\n",
" self.conn = pymysql.connect(host, user, passwd, db)\n",
" self.cursor = self.conn.cursor()\n",
" \n",
" def get_insideNews_list_tuple(self, all_insideNews):\n",
" info_tuple = []\n",
" for item in all_insideNews:\n",
" info_tuple.append(item.get_info_tuple())\n",
" return info_tuple\n",
" \n",
" # 保存国内新闻数据\n",
" def save_insideNews_datas(self, all_insideNews):\n",
" sql = 'insert into insideNews_daily_datas(eventTime, eventDescription, siteName, eventUrl, homepageUrl, item_avatar) values(%s,%s,%s,%s,%s,%s)'\n",
" res = self.get_insideNews_list_tuple(all_insideNews)\n",
" \n",
" print('+++ save_insideNews_datas, data len: %d' % len(res))\n",
" try:\n",
" self.cursor.executemany(sql, res)#批处理插入函数res为元组类型其中还是元组类型\n",
" self.conn.commit()\n",
" except Exception as e:\n",
" print(e)\n",
" print('+++ save_insideNews_datas is over.')\n",
" \n",
" \n",
" \n",
" def get_outsideNews_list_tuple(self, all_outsideNews):\n",
" info_tuple = []\n",
" for item in all_outsideNews:\n",
" info_tuple.append(item.get_info_tuple())\n",
" return info_tuple\n",
" \n",
" #保存国外新闻数据\n",
" def save_outsideNews_datas(self, all_outsideNews):\n",
" sql = 'insert into outsideNews_daily_datas(eventTime, eventDescription, siteName, eventUrl, homepageUrl, item_avatar) values(%s,%s,%s,%s,%s,%s)'\n",
" res = self.get_outsideNews_list_tuple(all_outsideNews)\n",
" \n",
" print('+++ save_outsideNews_datas, data len: %d' % len(res))\n",
" try:\n",
" self.cursor.executemany(sql, res)#批处理插入函数res为元组类型其中还是元组类型\n",
" self.conn.commit()\n",
" except Exception as e:\n",
" print(e)\n",
" print('+++ save_outsideNews_datas is over.')\n",
" \n",
" def __del__(self):\n",
" if self.conn is not None:\n",
" self.conn.close()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 国内新闻"
]
},
{
"cell_type": "code",
"execution_count": 43,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"E:\\anaconda3\\lib\\site-packages\\ipykernel_launcher.py:33: DeprecationWarning: invalid escape sequence '\\/'\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"+++ save_insideNews_datas, data len: 37\n",
"+++ save_insideNews_datas is over.\n"
]
}
],
"source": [
"import requests\n",
"import re\n",
"from bs4 import BeautifulSoup\n",
"import json\n",
" \n",
"\n",
"# 定义实时国内新闻类\n",
"class InsideNews: \n",
" def __init__(self): \n",
" self.nid = ''\n",
" self.thread_id = ''\n",
" self.eventTime = '' \n",
" self.eventDescription = '' \n",
" self.siteName = '' \n",
" self.eventUrl = '' \n",
" self.homepageUrl = ''\n",
" self.item_avatar = ''\n",
" \n",
" def __str__(self): \n",
" return 'eventTime: %s, eventDescription: %s, siteName: %s, eventUrl: %s, homepageUrl: %s, item_avatar: %s' % (self.eventTime, self.eventDescription, self.siteName, self.eventUrl, self.homepageUrl, self.item_avatar)\n",
"\n",
" def get_info_tuple(self):\n",
" return ((self.eventTime, self.eventDescription, self.siteName, self.eventUrl, self.homepageUrl, self.item_avatar))\n",
" \n",
"class DataService:\n",
" def __init__(self):\n",
" self.url = 'https://opendata.baidu.com/data/inner?tn=reserved_all_res_tn&dspName=iphone&from_sf=1&dsp=iphone&resource_id=28565&alr=1&query=%E8%82%BA%E7%82%8E&cb=jsonp_1597231600668_30527&qq-pf-to=pcqq.group'\n",
" self.db = MyDB('localhost', 'root', '2000721zsf', 'covid19_datas_guangxi')\n",
" \n",
" #抓取网页\n",
" def fetch_html_page(self):\n",
" res = requests.get(self.url)#发送get请求获得目标服务器响应\n",
" res = res.content.decode('unicode-escape')#解码\n",
" return res\n",
" \n",
" #解析网页\n",
" def parse_target_page(self, html):\n",
" soup = BeautifulSoup(html)#构建soup对象\n",
" tag = soup.find('p')#查找指定标签\n",
" tagStr = tag.string#转换为字符串\n",
" \n",
" #使用正则表达式查找所有内容结果返回为list类型(即获取国内实时新闻数据)\n",
" self.insideNews = re.findall(r'(\\{\"bjh_na\".*?\"eventDescription\":.*?\\})',tagStr)\n",
" \n",
" def fetch_insideNews_datas(self):\n",
" all_insideNews = []\n",
" for item in self.insideNews:\n",
" insideNews = InsideNews() \n",
" itemjson = json.loads(item) \n",
" insideNews.eventTime = itemjson['eventTime']\n",
" insideNews.eventDescription = itemjson['eventDescription'] \n",
" insideNews.siteName = itemjson['siteName'] \n",
" insideNews.eventUrl = itemjson['eventUrl'] \n",
" insideNews.homepageUrl = itemjson['homepageUrl'] \n",
" insideNews.item_avatar = itemjson['item_avatar'] \n",
" \n",
" all_insideNews.append(insideNews)\n",
" return all_insideNews\n",
" \n",
" #提取内容生成对象\n",
" def fetch_page_datas(self):\n",
" all_insideNews = self.fetch_insideNews_datas()\n",
" \n",
" # for item in all_insideNews:\n",
" # print(item)\n",
" # print(\"+++++\"*10)\n",
" \n",
" return all_insideNews\n",
" \n",
" #业务函数\n",
" def process_data(self):\n",
" html = self.fetch_html_page()\n",
" self.parse_target_page(html)\n",
" all_insideNews = self.fetch_page_datas()\n",
" \n",
" #保存国内实时播报新闻数据\n",
" self.db.save_insideNews_datas(all_insideNews)\n",
" \n",
"#创建DataService对象\n",
"ds = DataService()\n",
"ds.process_data()\n",
"\n",
"#print(len(ds.insideNews))\n",
"#print(type(ds.insideNews[0]))\n",
"#print(ds.insideNews[0])\n",
"#print()\n",
"#print(ds.outsideNews)\n",
"#print()\n",
"\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
" ## 国外新闻"
]
},
{
"cell_type": "code",
"execution_count": 46,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"E:\\anaconda3\\lib\\site-packages\\ipykernel_launcher.py:33: DeprecationWarning: invalid escape sequence '\\/'\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"+++ save_outsideNews_datas, data len: 40\n",
"+++ save_outsideNews_datas is over.\n"
]
}
],
"source": [
"import requests\n",
"import re\n",
"from bs4 import BeautifulSoup\n",
"import json\n",
" \n",
"\n",
"# 定义实时国内新闻类\n",
"class OutsideNews: \n",
" def __init__(self): \n",
" self.nid = ''\n",
" self.thread_id = ''\n",
" self.eventTime = '' \n",
" self.eventDescription = '' \n",
" self.siteName = '' \n",
" self.eventUrl = '' \n",
" self.homepageUrl = ''\n",
" self.item_avatar = ''\n",
" \n",
" def __str__(self): \n",
" return 'eventTime: %s, eventDescription: %s, siteName: %s, eventUrl: %s, homepageUrl: %s, item_avatar: %s' % (self.eventTime, self.eventDescription, self.siteName, self.eventUrl, self.homepageUrl, self.item_avatar)\n",
"\n",
" def get_info_tuple(self):\n",
" return ((self.eventTime, self.eventDescription, self.siteName, self.eventUrl, self.homepageUrl, self.item_avatar))\n",
" \n",
"class DataService:\n",
" def __init__(self):\n",
" self.url = 'https://opendata.baidu.com/data/inner?tn=reserved_all_res_tn&dspName=iphone&from_sf=1&dsp=iphone&resource_id=28565&alr=1&query=%E6%96%B0%E5%86%A0%E8%82%BA%E7%82%8E%E5%9B%BD%E5%A4%96%E7%96%AB%E6%83%85&cb=jsonp_1597232049116_92879&qq-pf-to=pcqq.group'\n",
" self.db = MyDB('localhost', 'root', '2000721zsf', 'covid19_datas_guangxi')\n",
" \n",
" #抓取网页\n",
" def fetch_html_page(self):\n",
" res = requests.get(self.url)#发送get请求获得目标服务器响应\n",
" res = res.content.decode('unicode-escape')#解码\n",
" return res\n",
" \n",
" #解析网页\n",
" def parse_target_page(self, html):\n",
" soup = BeautifulSoup(html)#构建soup对象\n",
" tag = soup.find('p')#查找指定标签\n",
" tagStr = tag.string#转换为字符串\n",
" \n",
" #使用正则表达式查找所有内容结果返回为list类型(即获取国内实时新闻数据)\n",
" self.outsideNews = re.findall(r'(\\{\"bjh_na\".*?\"eventDescription\":.*?\\})',tagStr)\n",
" \n",
" def fetch_outsideNews_datas(self):\n",
" all_outsideNews = []\n",
" for item in self.outsideNews:\n",
" outsideNews = OutsideNews() \n",
" itemjson = json.loads(item) \n",
" outsideNews.eventTime = itemjson['eventTime']\n",
" outsideNews.eventDescription = itemjson['eventDescription'] \n",
" outsideNews.siteName = itemjson['siteName'] \n",
" outsideNews.eventUrl = itemjson['eventUrl'] \n",
" outsideNews.homepageUrl = itemjson['homepageUrl'] \n",
" outsideNews.item_avatar = itemjson['item_avatar'] \n",
" \n",
" all_outsideNews.append(outsideNews)\n",
" return all_outsideNews\n",
" \n",
" #提取内容生成对象\n",
" def fetch_page_datas(self):\n",
" all_outsideNews = self.fetch_outsideNews_datas()\n",
" \n",
" # for item in all_insideNews:\n",
" # print(item)\n",
" # print(\"+++++\"*10)\n",
" \n",
" return all_outsideNews\n",
" \n",
" #业务函数\n",
" def process_data(self):\n",
" html = self.fetch_html_page()\n",
" self.parse_target_page(html)\n",
" all_outsideNews = self.fetch_page_datas()\n",
" \n",
" #保存国内实时播报新闻数据\n",
" self.db.save_outsideNews_datas(all_outsideNews)\n",
" \n",
"#创建DataService对象\n",
"ds = DataService()\n",
"ds.process_data()\n",
"\n",
"#print(len(ds.insideNews))\n",
"#print(type(ds.insideNews[0]))\n",
"#print(ds.insideNews[0])\n",
"#print()\n",
"#print(ds.outsideNews)\n",
"#print()\n",
"\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 4
}