{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "currentConfirmedCount:7543877, confirmedCount:20432235, suspectedCount:4, curedCount:12145092, deadCount:743266, suspectedIncr:0, currentConfirmedIncr:6954, confirmedIncr:14410, curedIncr:6504, deadIncr:952\n" ] } ], "source": [ "import requests\n", "from bs4 import BeautifulSoup\n", "import re\n", "import json\n", "\n", "# 国外疫情实时概况\n", "class ForeignStatistics:\n", " def __init__(self):\n", " self.currentConfirmedCount = 0\n", " self.confirmedCount = 0\n", " self.suspectedCount = 0\n", " self.curedCount = 0\n", " self.deadCount = 0\n", " self.suspectedIncr = 0\n", " self.currentConfirmedIncr = 0\n", " self.confirmedIncr = 0\n", " self.curedIncr = 0\n", " self.deadIncr = 0\n", "\n", " def get_foreign_statistics_tuple(self):\n", " return (self.currentConfirmedCount, self.confirmedCount, self.suspectedCount, self.curedCount,\n", " self.deadCount, self.suspectedIncr, self.currentConfirmedIncr, self.confirmedIncr, self.curedIncr,\n", " self.deadIncr)\n", "\n", " def __str__(self):\n", " return 'currentConfirmedCount:%s, confirmedCount:%s, suspectedCount:%s, curedCount:%s, deadCount:%s, suspectedIncr:%s, currentConfirmedIncr:%s, confirmedIncr:%s, curedIncr:%s, deadIncr:%s' % (\n", " self.currentConfirmedCount, self.confirmedCount, self.suspectedCount, self.curedCount, self.deadCount,\n", " self.suspectedIncr, self.currentConfirmedIncr, self.confirmedIncr, self.curedIncr, self.deadIncr)\n", "\n", "\n", "\n", "\n", "# 爬取页面\n", "res = requests.get('https://ncov.dxy.cn/ncovh5/view/pneumonia')\n", "# 重新解码\n", "res = res.content.decode('utf-8')\n", "# 构建soup对象\n", "soup = BeautifulSoup(res, 'html.parser')\n", "# 使用soup对象查找国外疫情数据标签\n", "tag = soup.find('script', attrs={'id': 'getStatisticsService'})\n", "# 转成字符串\n", "tagstr = tag.string\n", "# 使用正则表达式查找所有内容\n", "result = re.findall('\\{\"currentConfirmedCount\".*?\"deadIncr\".*?\\}', tagstr)\n", "# 获取国外疫情数据\n", "#print(result[0])\n", "\n", "obj = json.loads(result[0])\n", "#print(obj)\n", "foreignStatistics = ForeignStatistics()\n", "foreignStatistics.currentConfirmedCount = int(obj['currentConfirmedCount'])\n", "foreignStatistics.confirmedCount = int(obj['confirmedCount'])\n", "foreignStatistics.suspectedCount = int(obj['suspectedCount'])\n", "foreignStatistics.curedCount = int(obj['curedCount'])\n", "foreignStatistics.deadCount = int(obj['deadCount'])\n", "foreignStatistics.suspectedIncr = int(obj['suspectedIncr'])\n", "foreignStatistics.currentConfirmedIncr = int(obj['currentConfirmedIncr'])\n", "foreignStatistics.confirmedIncr = int(obj['confirmedIncr'])\n", "foreignStatistics.curedIncr = int(obj['curedIncr'])\n", "foreignStatistics.deadIncr = int(obj['deadIncr'])\n", "print(foreignStatistics)\n", "\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.3" } }, "nbformat": 4, "nbformat_minor": 4 }