From 1ff0fc3e3f81a48023c76a8234d0144406acc451 Mon Sep 17 00:00:00 2001 From: priest05 <1844628207@qq.com> Date: Thu, 13 Aug 2020 18:36:26 +0800 Subject: [PATCH] =?UTF-8?q?=E5=AF=BC=E5=85=A5=E6=95=B0=E6=8D=AE=E5=BA=93?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- 存储.ipynb | 183 +++++++++++++++++++++++++++ 爬取国外疫情实时概括.ipynb | 124 ------------------ 2 files changed, 183 insertions(+), 124 deletions(-) create mode 100644 存储.ipynb delete mode 100644 爬取国外疫情实时概括.ipynb diff --git a/存储.ipynb b/存储.ipynb new file mode 100644 index 0000000..73b20f7 --- /dev/null +++ b/存储.ipynb @@ -0,0 +1,183 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# 定义实体类" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "# 国外疫情实时概况\n", + "class OutsideSummary:\n", + " def __init__(self):\n", + " self.currentConfirmedCount = 0\n", + " self.confirmedCount = 0\n", + " self.suspectedCount = 0\n", + " self.curedCount = 0\n", + " self.deadCount = 0\n", + " self.suspectedIncr = 0\n", + " self.currentConfirmedIncr = 0\n", + " self.confirmedIncr = 0\n", + " self.curedIncr = 0\n", + " self.deadIncr = 0\n", + "\n", + " def get_info_tuple(self):\n", + " return (self.currentConfirmedCount, self.confirmedCount, self.suspectedCount, self.curedCount,\n", + " self.deadCount, self.suspectedIncr, self.currentConfirmedIncr, self.confirmedIncr, self.curedIncr,\n", + " self.deadIncr)\n", + " def __str__(self):\n", + " return 'currentConfirmedCount:%s, confirmedCount:%s, suspectedCount:%s, curedCount:%s, deadCount:%s, suspectedIncr:%s, currentConfirmedIncr:%s, confirmedIncr:%s, curedIncr:%s, deadIncr:%s' % (\n", + " self.currentConfirmedCount, self.confirmedCount, self.suspectedCount, self.curedCount, self.deadCount,\n", + " self.suspectedIncr, self.currentConfirmedIncr, self.confirmedIncr, self.curedIncr, self.deadIncr)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# 数据库实体类" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "#数据库实体类\n", + "import pymysql\n", + "class MyDB:\n", + " def __init__(self, host, user, passwd, db):\n", + " self.conn = pymysql.connect(host, user, passwd, db)\n", + " self.cursor = self.conn.cursor()\n", + " def get_outsideSummary_list_tuple(self, outsideSummary):\n", + " info_tuple = []\n", + " info_tuple.append(outsideSummary.get_info_tuple())\n", + " return info_tuple\n", + " # 保存数据\n", + " def save_outsideSummary_datas(self, outsideSummary):\n", + " sql = 'insert into outsideSummary_realtime_datas(currentConfirmedCount,confirmedCount,suspectedCount,curedCount,deadCount,suspectedIncr,currentConfirmedIncr,confirmedIncr,curedIncr,deadIncr) values(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)'\n", + " res = self.get_outsideSummary_list_tuple(outsideSummary)\n", + " print('+++ save_outsideSummary_datas, data len: %d' % len(res))\n", + " try:\n", + " self.cursor.executemany(sql, res)\n", + " self.conn.commit()\n", + " except Exception as e:\n", + " print(e)\n", + " print('+++ save_outsideSummary_datas is over.')\n", + " def __del__(self):\n", + " if self.conn is not None:\n", + " self.conn.close()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# 业务逻辑类" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "+++ save_outsideSummary_datas, data len: 1\n", + "+++ save_outsideSummary_datas is over.\n" + ] + } + ], + "source": [ + "#业务逻辑类\n", + "import requests\n", + "import re\n", + "from bs4 import BeautifulSoup\n", + "import json\n", + "class DataService:\n", + " def __init__(self):\n", + " self.db = MyDB('localhost', 'root', 'liyujiao0352','covid19_datas_guangxi')\n", + " \n", + "\n", + "# 爬取页面\n", + "res = requests.get('https://ncov.dxy.cn/ncovh5/view/pneumonia')\n", + "# 重新解码\n", + "res = res.content.decode('utf-8')\n", + "# 构建soup对象\n", + "soup = BeautifulSoup(res, 'html.parser')\n", + "# 使用soup对象查找国外疫情数据标签\n", + "tag = soup.find('script', attrs={'id': 'getStatisticsService'})\n", + "# 转成字符串\n", + "tagstr = tag.string\n", + "# 使用正则表达式查找所有内容\n", + "result = re.findall('\\{\"currentConfirmedCount\".*?\"deadIncr\".*?\\}', tagstr)\n", + "# 获取国外疫情数据\n", + "#print(result[0])\n", + "obj = json.loads(result[0])\n", + "#print(obj)\n", + "def fetch_outside_summary(obj):\n", + " outsideSummary = OutsideSummary()\n", + " outsideSummary.currentConfirmedCount = int(obj['currentConfirmedCount'])\n", + " outsideSummary.confirmedCount = int(obj['confirmedCount'])\n", + " outsideSummary.suspectedCount = int(obj['suspectedCount'])\n", + " outsideSummary.curedCount = int(obj['curedCount'])\n", + " outsideSummary.deadCount = int(obj['deadCount'])\n", + " outsideSummary.suspectedIncr = int(obj['suspectedIncr'])\n", + " outsideSummary.currentConfirmedIncr = int(obj['currentConfirmedIncr'])\n", + " outsideSummary.confirmedIncr = int(obj['confirmedIncr'])\n", + " outsideSummary.curedIncr = int(obj['curedIncr'])\n", + " outsideSummary.deadIncr = int(obj['deadIncr'])\n", + " return outsideSummary\n", + " \n", + "# 创建Dataservice对象\n", + "ds = DataService()\n", + "outsideSummary=fetch_outside_summary(obj)\n", + "ds.db.save_outsideSummary_datas(outsideSummary)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.3" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/爬取国外疫情实时概括.ipynb b/爬取国外疫情实时概括.ipynb deleted file mode 100644 index 18a92ae..0000000 --- a/爬取国外疫情实时概括.ipynb +++ /dev/null @@ -1,124 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "currentConfirmedCount:7543877, confirmedCount:20432235, suspectedCount:4, curedCount:12145092, deadCount:743266, suspectedIncr:0, currentConfirmedIncr:6954, confirmedIncr:14410, curedIncr:6504, deadIncr:952\n" - ] - } - ], - "source": [ - "import requests\n", - "from bs4 import BeautifulSoup\n", - "import re\n", - "import json\n", - "\n", - "# 国外疫情实时概况\n", - "class ForeignStatistics:\n", - " def __init__(self):\n", - " self.currentConfirmedCount = 0\n", - " self.confirmedCount = 0\n", - " self.suspectedCount = 0\n", - " self.curedCount = 0\n", - " self.deadCount = 0\n", - " self.suspectedIncr = 0\n", - " self.currentConfirmedIncr = 0\n", - " self.confirmedIncr = 0\n", - " self.curedIncr = 0\n", - " self.deadIncr = 0\n", - "\n", - " def get_foreign_statistics_tuple(self):\n", - " return (self.currentConfirmedCount, self.confirmedCount, self.suspectedCount, self.curedCount,\n", - " self.deadCount, self.suspectedIncr, self.currentConfirmedIncr, self.confirmedIncr, self.curedIncr,\n", - " self.deadIncr)\n", - "\n", - " def __str__(self):\n", - " return 'currentConfirmedCount:%s, confirmedCount:%s, suspectedCount:%s, curedCount:%s, deadCount:%s, suspectedIncr:%s, currentConfirmedIncr:%s, confirmedIncr:%s, curedIncr:%s, deadIncr:%s' % (\n", - " self.currentConfirmedCount, self.confirmedCount, self.suspectedCount, self.curedCount, self.deadCount,\n", - " self.suspectedIncr, self.currentConfirmedIncr, self.confirmedIncr, self.curedIncr, self.deadIncr)\n", - "\n", - "\n", - "\n", - "\n", - "# 爬取页面\n", - "res = requests.get('https://ncov.dxy.cn/ncovh5/view/pneumonia')\n", - "# 重新解码\n", - "res = res.content.decode('utf-8')\n", - "# 构建soup对象\n", - "soup = BeautifulSoup(res, 'html.parser')\n", - "# 使用soup对象查找国外疫情数据标签\n", - "tag = soup.find('script', attrs={'id': 'getStatisticsService'})\n", - "# 转成字符串\n", - "tagstr = tag.string\n", - "# 使用正则表达式查找所有内容\n", - "result = re.findall('\\{\"currentConfirmedCount\".*?\"deadIncr\".*?\\}', tagstr)\n", - "# 获取国外疫情数据\n", - "#print(result[0])\n", - "\n", - "obj = json.loads(result[0])\n", - "#print(obj)\n", - "foreignStatistics = ForeignStatistics()\n", - "foreignStatistics.currentConfirmedCount = int(obj['currentConfirmedCount'])\n", - "foreignStatistics.confirmedCount = int(obj['confirmedCount'])\n", - "foreignStatistics.suspectedCount = int(obj['suspectedCount'])\n", - "foreignStatistics.curedCount = int(obj['curedCount'])\n", - "foreignStatistics.deadCount = int(obj['deadCount'])\n", - "foreignStatistics.suspectedIncr = int(obj['suspectedIncr'])\n", - "foreignStatistics.currentConfirmedIncr = int(obj['currentConfirmedIncr'])\n", - "foreignStatistics.confirmedIncr = int(obj['confirmedIncr'])\n", - "foreignStatistics.curedIncr = int(obj['curedIncr'])\n", - "foreignStatistics.deadIncr = int(obj['deadIncr'])\n", - "print(foreignStatistics)\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.3" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -}