From 2a4872d7cf023d649371551bde106f85be7b13a4 Mon Sep 17 00:00:00 2001 From: priest05 <1844628207@qq.com> Date: Fri, 14 Aug 2020 11:13:31 +0800 Subject: [PATCH] =?UTF-8?q?=E5=8F=AA=E5=AD=98=E4=B8=80=E7=BB=84?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- 存储.ipynb | 54 ++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 44 insertions(+), 10 deletions(-) diff --git a/存储.ipynb b/存储.ipynb index 59c884a..b74ea16 100644 --- a/存储.ipynb +++ b/存储.ipynb @@ -9,7 +9,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 30, "metadata": {}, "outputs": [], "source": [ @@ -26,15 +26,19 @@ " self.confirmedIncr = 0\n", " self.curedIncr = 0\n", " self.deadIncr = 0\n", - "\n", + " self.updatedTime = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')\n", + " \n", + " \n", " def get_info_tuple(self):\n", " return (self.currentConfirmedCount, self.confirmedCount, self.suspectedCount, self.curedCount,\n", " self.deadCount, self.suspectedIncr, self.currentConfirmedIncr, self.confirmedIncr, self.curedIncr,\n", - " self.deadIncr)\n", + " self.deadIncr, self.updatedTime)\n", + " \n", + " \n", " def __str__(self):\n", - " return 'currentConfirmedCount:%s, confirmedCount:%s, suspectedCount:%s, curedCount:%s, deadCount:%s, suspectedIncr:%s, currentConfirmedIncr:%s, confirmedIncr:%s, curedIncr:%s, deadIncr:%s' % (\n", + " return 'currentConfirmedCount:%s, confirmedCount:%s, suspectedCount:%s, curedCount:%s, deadCount:%s, suspectedIncr:%s, currentConfirmedIncr:%s, confirmedIncr:%s, curedIncr:%s, deadIncr:%s, updatedTime:%s' % (\n", " self.currentConfirmedCount, self.confirmedCount, self.suspectedCount, self.curedCount, self.deadCount,\n", - " self.suspectedIncr, self.currentConfirmedIncr, self.confirmedIncr, self.curedIncr, self.deadIncr)" + " self.suspectedIncr, self.currentConfirmedIncr, self.confirmedIncr, self.curedIncr, self.deadIncr, self.updatedTime)" ] }, { @@ -46,7 +50,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 31, "metadata": {}, "outputs": [], "source": [ @@ -56,17 +60,21 @@ " def __init__(self, host, user, passwd, db):\n", " self.conn = pymysql.connect(host, user, passwd, db)\n", " self.cursor = self.conn.cursor()\n", + " \n", + " \n", " def get_outsideSummary_list_tuple(self, outsideSummary):\n", " info_tuple = []\n", " info_tuple.append(outsideSummary.get_info_tuple())\n", " return info_tuple\n", + " \n", + " \n", " # 保存数据\n", " def save_outsideSummary_datas(self, outsideSummary):\n", " print('+++ [MyDB] delete from outsideSummary_realtime_datas') \n", " self.cursor.execute('delete from outsideSummary_realtime_datas') \n", " self.conn.commit()\n", - " \n", - " sql = 'insert into outsideSummary_realtime_datas(currentConfirmedCount,confirmedCount,suspectedCount,curedCount,deadCount,suspectedIncr,currentConfirmedIncr,confirmedIncr,curedIncr,deadIncr) values(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)'\n", + " \n", + " sql = 'insert into outsideSummary_realtime_datas(currentConfirmedCount,confirmedCount,suspectedCount,curedCount,deadCount,suspectedIncr,currentConfirmedIncr,confirmedIncr,curedIncr,deadIncr,updatedTime) values(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)'\n", " res = self.get_outsideSummary_list_tuple(outsideSummary)\n", " print('+++ save_outsideSummary_datas, data len: %d' % len(res))\n", " try:\n", @@ -75,6 +83,8 @@ " except Exception as e:\n", " print(e)\n", " print('+++ save_outsideSummary_datas is over.')\n", + " \n", + " \n", " def __del__(self):\n", " if self.conn is not None:\n", " self.conn.close()" @@ -89,8 +99,10 @@ }, { "cell_type": "code", - "execution_count": 17, - "metadata": {}, + "execution_count": 32, + "metadata": { + "scrolled": true + }, "outputs": [ { "name": "stdout", @@ -104,10 +116,14 @@ ], "source": [ "#业务逻辑类\n", + "import datetime\n", "import requests\n", "import re\n", "from bs4 import BeautifulSoup\n", "import json\n", + "\n", + "\n", + "\n", "class DataService:\n", " def __init__(self):\n", " self.db = MyDB('localhost', 'root', 'liyujiao0352','covid19_datas_guangxi')\n", @@ -115,19 +131,27 @@ "\n", "# 爬取页面\n", "res = requests.get('https://ncov.dxy.cn/ncovh5/view/pneumonia')\n", + "\n", "# 重新解码\n", "res = res.content.decode('utf-8')\n", + "\n", "# 构建soup对象\n", "soup = BeautifulSoup(res, 'html.parser')\n", + "\n", "# 使用soup对象查找国外疫情数据标签\n", "tag = soup.find('script', attrs={'id': 'getStatisticsService'})\n", + "\n", "# 转成字符串\n", "tagstr = tag.string\n", "# 使用正则表达式查找所有内容\n", "result = re.findall('\\{\"currentConfirmedCount\".*?\"deadIncr\".*?\\}', tagstr)\n", + "\n", "# 获取国外疫情数据\n", "#print(result[0])\n", "obj = json.loads(result[0])\n", + "\n", + "\n", + "\n", "#print(obj)\n", "def fetch_outside_summary(obj):\n", " outsideSummary = OutsideSummary()\n", @@ -141,7 +165,10 @@ " outsideSummary.confirmedIncr = int(obj['confirmedIncr'])\n", " outsideSummary.curedIncr = int(obj['curedIncr'])\n", " outsideSummary.deadIncr = int(obj['deadIncr'])\n", + " outsideSummary.updatedTime = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')\n", " return outsideSummary\n", + "\n", + "\n", " \n", "# 创建Dataservice对象\n", "ds = DataService()\n", @@ -156,6 +183,13 @@ "outputs": [], "source": [] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, { "cell_type": "code", "execution_count": null,