commit 1661f9bdf01b878af5eafe944de6823fe6fb5eb3 Author: priest05 <1844628207@qq.com> Date: Thu Aug 13 17:44:16 2020 +0800 数据打印 diff --git a/爬取国外疫情实时概括.ipynb b/爬取国外疫情实时概括.ipynb new file mode 100644 index 0000000..18a92ae --- /dev/null +++ b/爬取国外疫情实时概括.ipynb @@ -0,0 +1,124 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "currentConfirmedCount:7543877, confirmedCount:20432235, suspectedCount:4, curedCount:12145092, deadCount:743266, suspectedIncr:0, currentConfirmedIncr:6954, confirmedIncr:14410, curedIncr:6504, deadIncr:952\n" + ] + } + ], + "source": [ + "import requests\n", + "from bs4 import BeautifulSoup\n", + "import re\n", + "import json\n", + "\n", + "# 国外疫情实时概况\n", + "class ForeignStatistics:\n", + " def __init__(self):\n", + " self.currentConfirmedCount = 0\n", + " self.confirmedCount = 0\n", + " self.suspectedCount = 0\n", + " self.curedCount = 0\n", + " self.deadCount = 0\n", + " self.suspectedIncr = 0\n", + " self.currentConfirmedIncr = 0\n", + " self.confirmedIncr = 0\n", + " self.curedIncr = 0\n", + " self.deadIncr = 0\n", + "\n", + " def get_foreign_statistics_tuple(self):\n", + " return (self.currentConfirmedCount, self.confirmedCount, self.suspectedCount, self.curedCount,\n", + " self.deadCount, self.suspectedIncr, self.currentConfirmedIncr, self.confirmedIncr, self.curedIncr,\n", + " self.deadIncr)\n", + "\n", + " def __str__(self):\n", + " return 'currentConfirmedCount:%s, confirmedCount:%s, suspectedCount:%s, curedCount:%s, deadCount:%s, suspectedIncr:%s, currentConfirmedIncr:%s, confirmedIncr:%s, curedIncr:%s, deadIncr:%s' % (\n", + " self.currentConfirmedCount, self.confirmedCount, self.suspectedCount, self.curedCount, self.deadCount,\n", + " self.suspectedIncr, self.currentConfirmedIncr, self.confirmedIncr, self.curedIncr, self.deadIncr)\n", + "\n", + "\n", + "\n", + "\n", + "# 爬取页面\n", + "res = requests.get('https://ncov.dxy.cn/ncovh5/view/pneumonia')\n", + "# 重新解码\n", + "res = res.content.decode('utf-8')\n", + "# 构建soup对象\n", + "soup = BeautifulSoup(res, 'html.parser')\n", + "# 使用soup对象查找国外疫情数据标签\n", + "tag = soup.find('script', attrs={'id': 'getStatisticsService'})\n", + "# 转成字符串\n", + "tagstr = tag.string\n", + "# 使用正则表达式查找所有内容\n", + "result = re.findall('\\{\"currentConfirmedCount\".*?\"deadIncr\".*?\\}', tagstr)\n", + "# 获取国外疫情数据\n", + "#print(result[0])\n", + "\n", + "obj = json.loads(result[0])\n", + "#print(obj)\n", + "foreignStatistics = ForeignStatistics()\n", + "foreignStatistics.currentConfirmedCount = int(obj['currentConfirmedCount'])\n", + "foreignStatistics.confirmedCount = int(obj['confirmedCount'])\n", + "foreignStatistics.suspectedCount = int(obj['suspectedCount'])\n", + "foreignStatistics.curedCount = int(obj['curedCount'])\n", + "foreignStatistics.deadCount = int(obj['deadCount'])\n", + "foreignStatistics.suspectedIncr = int(obj['suspectedIncr'])\n", + "foreignStatistics.currentConfirmedIncr = int(obj['currentConfirmedIncr'])\n", + "foreignStatistics.confirmedIncr = int(obj['confirmedIncr'])\n", + "foreignStatistics.curedIncr = int(obj['curedIncr'])\n", + "foreignStatistics.deadIncr = int(obj['deadIncr'])\n", + "print(foreignStatistics)\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.3" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +}