{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "id:74894\n", "pubDate:2020-08-12 17:17:01\n", "pubDateStr:9分钟前\n", "title:以色列新增366例新冠肺炎确诊病例 累计86959例\n", "summary:当地时间8月12日上午,以色列卫生部公布的疫情通报显示,11日23时至12日11时,以色列新增366例新冠肺炎确诊病例,累计达86959例,无新增死亡病例,累计死亡622人。截至目前,现存确诊病例24761例,有377人病情危重,其中110人使用呼吸机,已有61576人治愈康复。\n", "infoSource:央视新闻app\n", "\n", "\n", "id:74891\n", "pubDate:2020-08-12 17:14:28\n", "pubDateStr:11分钟前\n", "title:巴林新增407例新冠肺炎确诊病例 累计44804例\n", "summary:巴林卫生部当地时间12日上午发布通告,确认境内新增407例新型冠状病毒肺炎确诊病例,同时新增2例死亡病例。至此巴林境内累计已发现44804例新型冠状病毒肺炎确诊病例,其中41504名患者已经治愈,165名患者病重不治去世。3135名仍在医院接受治疗的患者中38人病情较重。\n", "infoSource:央视新闻app\n", "\n", "\n", "id:74884\n", "pubDate:2020-08-12 16:46:46\n", "pubDateStr:39分钟前\n", "title:香港新增62例新冠肺炎确诊病例 连续第十日新增病例少于100例\n", "summary:8月12日下午,香港特区政府卫生署卫生防护中心举行发布会。据介绍,截至12日零时,香港新增62例新冠肺炎确诊病例。这是连续第十日新增确诊病例少于100例。香港累计报告新冠肺炎确诊病例4243例。  新增确诊者中,有1例境外输入病例,剩余61例均为本地感染病例,当中有28例病例暂未查明感染源。12日,香港新增四例死亡病例,累计死亡63例。\n", "infoSource:央视新闻app\n", "\n", "\n", "id:74878\n", "pubDate:2020-08-12 16:30:22\n", "pubDateStr:55分钟前\n", "title:菲律宾新增4444例新冠肺炎确诊病例 累计确诊143749例\n", "summary:8月12日,菲律宾卫生部发布消息称,截至12日下午4点,菲律宾新增4444例新冠肺炎确诊病例,确诊病例累计达143749例。此外,新增死亡病例93例,累计死亡2404例。同时,新增治愈病例636例,累计治愈人数为68997人。\n", "infoSource:央视新闻app\n", "\n", "\n", "id:74869\n", "pubDate:2020-08-12 15:52:13\n", "pubDateStr:1小时前\n", "title:捷克新增288例新冠肺炎确诊病例 累计18783例\n", "summary:当地时间8月12日,捷克卫生部公布的新冠肺炎疫情数据显示,11日全天捷克新增288例新冠肺炎确诊病例,累计确诊18783例;治愈病例和死亡病例分别为13222例和391例。\n", "infoSource:央视新闻app\n", "\n", "\n" ] } ], "source": [ "import requests\n", "from bs4 import BeautifulSoup\n", "import re\n", "import json\n", "import time\n", "#爬取页面\n", "class NEWS:\n", " def __init__(self):\n", " self.id=0\n", " self.pubDate=''\n", " self.pubDateStr=''\n", " self.title=''\n", " self.summary=''\n", " self.infoSource=''\n", " \n", " def __str__(self):\n", " return 'id:%d\\npubDate:%s\\npubDateStr:%s\\ntitle:%s\\nsummary:%s\\ninfoSource:%s\\n'%(self.id,self.pubDate,self.pubDateStr,self.title,self.summary,self.infoSource)\n", " \n", " \n", "res=requests.get('http://ncov.dxy.cn/ncovh5/view/pneumonia')\n", "#重新解码\n", "res=res.content.decode('utf-8')\n", "#构建对象\n", "soup=BeautifulSoup(res,'html.parser')\n", "#打标签\n", "tag = soup.find('script', attrs={'id':'getTimelineService1'})\n", "#转换为字符串\n", "tagstr = tag.string\n", "#使用正则表达式提取信息\n", "results = re.findall('(\\\\{\\\"id\\\".*?\\\"provinceId\\\".*?\\\\})', tagstr)\n", "\n", "all_New=[]\n", "for item in results:\n", " New=NEWS()\n", " obj=json.loads(item)\n", " New.id=obj['id']\n", " New.pubDate=time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(float(obj['pubDate'])/1000))\n", " New.pubDateStr=obj['pubDateStr']\n", " New.title=obj['title']\n", " New.summary=obj['summary']\n", " New.infoSource=obj['infoSource']\n", " all_New.append(New)\n", "for item in all_New:\n", " print(item)\n", " print()\n", "\n", "\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.3" } }, "nbformat": 4, "nbformat_minor": 4 }