{ "cells": [ { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "executemany() missing 1 required positional argument: 'args'\n", "+++ save_country_datas, data len: 214\n", "+++ save_country_datas is over.\n" ] } ], "source": [ "import datetime\n", "import requests\n", "from bs4 import BeautifulSoup\n", "import re\n", "import json\n", "import pymysql\n", "\n", "# 定义实体类\n", "def Outside(ds):\n", " class Country:\n", " def __init__(self):\n", " self.countryName = ''\n", " self.currentConfirmedCount = 0 # 现有确诊病例数\n", " self.confirmedCount = 0 # 累计确诊\n", " self.confirmedCountRank = 0 # 累计确诊排名\n", " self.curedCount = 0 # 累计治愈\n", " self.deadCount = 0 # 累计死亡\n", " self.deadCountRank = 0 # 累计死亡排名\n", " self.deadRate = 0.0 # 死亡率\n", " self.deadRateRank = 0 # 死亡率排名\n", " self.updatedTime = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')\n", " def get_info_tuple(self):\n", " return (self.countryName, self.currentConfirmedCount, self.confirmedCount, self.confirmedCountRank, self.curedCount, self.deadCount, self.deadCountRank, self.deadRate, self.deadRateRank,self.updatedTime)\n", " def __str__(self):\n", " return 'countryName:%s,currentConfirmedCount:%d,confirmedCount:%d,\\\n", " confirmedCountRankt:%d,curedCount:%d,deadCount:%d,deadCountRank:%d,deadRate:%d,deadRateRank:%d,updatedTime:%s' % (self.countryName, self.currentConfirmedCount, self.confirmedCount, self.confirmedCountRank, self.curedCount, self.deadCount, self.deadCountRank, self.deadRate, self.deadRateRank,self.updatedTime)\n", "\n", " class MyDB:\n", " def __init__(self, host, user, passwd, db):\n", "\n", " self.conn = pymysql.connect(host, user, passwd, db)\n", " self.cursor = self.conn.cursor()\n", "\n", "\n", "\n", " def get_country_list_tuple(self, all_country):\n", " info_tuple = []\n", " for item in all_country:\n", " info_tuple.append(item.get_info_tuple())\n", " return info_tuple\n", "\n", " # 保存数据\n", " def save_country_datas(self, all_country):\n", " date=datetime.datetime.now().strftime('%Y-%m-%d')\n", " sql='delete from country_daily_datas where updatedTime like \"%s\"'%(date+\"%\")\n", " try:\n", " self.cursor.executemany(sql)\n", " self.conn.commit()\n", " except Exception as e:\n", " print(e)\n", "\n", " sql = 'insert into country_daily_datas(countryName,currentConfirmedCount,\\\n", " confirmedCount,confirmedCountRank,curedCount,deadCount,deadCountRank,deadRate,deadRateRank,pub_time) values(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)'\n", " res = self.get_country_list_tuple(all_country)\n", "\n", " print('+++ save_country_datas, data len: %d' % len(res))\n", " try:\n", " self.cursor.executemany(sql, res)\n", " self.conn.commit()\n", " except Exception as e:\n", " print(e)\n", " print('+++ save_country_datas is over.')\n", " def show_country_datas(self):\n", " self.cursor.execute('select * from country_daily_datas')\n", "\n", "\n", " def __del__(self):\n", " if self.conn is not None:\n", " self.conn.close()\n", "\n", "\n", " def forign_data_search(ds):\n", " db = MyDB(host = ds[0],user = ds[1],passwd = ds[2],db = ds[3])\n", " res = requests.get('https://ncov.dxy.cn/ncovh5/view/pneumonia')# 爬取页面\n", " res = res.content.decode('utf-8') # 重新解码\n", " soup = BeautifulSoup(res, 'html.parser')# 构建soup对象\n", " tag = soup.find('script', attrs={'id':'getListByCountryTypeService2true'}) # Tag# 使用soup对象查找实时播报新闻标签\n", " tagStr = tag.string# 获取内容\n", " results = re.findall('\\{\"id\".*?\"showRank\".*?\\}', tagStr) # length: 34, [str, str, ....]# 使用正则表达式匹配\n", " all_country = []\n", " for item in results:\n", " country=Country() \n", " obj = json.loads(item) # obj -> dict\n", " country.countryName = obj['provinceName']\n", " country.currentConfirmedCount = int(obj['currentConfirmedCount'])\n", " country.confirmedCount = int(obj['confirmedCount'])\n", " country.curedCount = int(obj['curedCount'])\n", " country.deadCount = int(obj['deadCount'])\n", " country.deadRate = float(obj['deadRate'])\n", " country.updatedTime = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')\n", " try:\n", " country.deadCountRank = int(obj['deadCountRank'])\n", " country.deadRateRank = int(obj['deadRateRank'])\n", " country.confirmedCountRank = int(obj['confirmedCountRank'])\n", " except KeyError:\n", " country.deadCountRank = 0\n", " country.deadRateRank = 0\n", " country.confirmedCountRank = 0\n", " finally:\n", " all_country.append(country)\n", "\n", " db.save_country_datas(all_country)\n", " \n", " forign_data_search(ds)\n", "\n", "ds=['localhost','root','20Z00t10x28_my','covid19']\n", "Outside(ds)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.3" } }, "nbformat": 4, "nbformat_minor": 4 }