|
|
@ -9,7 +9,7 @@
|
|
|
|
},
|
|
|
|
},
|
|
|
|
{
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
|
|
|
"cell_type": "code",
|
|
|
|
"execution_count": 15,
|
|
|
|
"execution_count": 30,
|
|
|
|
"metadata": {},
|
|
|
|
"metadata": {},
|
|
|
|
"outputs": [],
|
|
|
|
"outputs": [],
|
|
|
|
"source": [
|
|
|
|
"source": [
|
|
|
@ -26,15 +26,19 @@
|
|
|
|
" self.confirmedIncr = 0\n",
|
|
|
|
" self.confirmedIncr = 0\n",
|
|
|
|
" self.curedIncr = 0\n",
|
|
|
|
" self.curedIncr = 0\n",
|
|
|
|
" self.deadIncr = 0\n",
|
|
|
|
" self.deadIncr = 0\n",
|
|
|
|
|
|
|
|
" self.updatedTime = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')\n",
|
|
|
|
|
|
|
|
" \n",
|
|
|
|
" \n",
|
|
|
|
" \n",
|
|
|
|
" def get_info_tuple(self):\n",
|
|
|
|
" def get_info_tuple(self):\n",
|
|
|
|
" return (self.currentConfirmedCount, self.confirmedCount, self.suspectedCount, self.curedCount,\n",
|
|
|
|
" return (self.currentConfirmedCount, self.confirmedCount, self.suspectedCount, self.curedCount,\n",
|
|
|
|
" self.deadCount, self.suspectedIncr, self.currentConfirmedIncr, self.confirmedIncr, self.curedIncr,\n",
|
|
|
|
" self.deadCount, self.suspectedIncr, self.currentConfirmedIncr, self.confirmedIncr, self.curedIncr,\n",
|
|
|
|
" self.deadIncr)\n",
|
|
|
|
" self.deadIncr, self.updatedTime)\n",
|
|
|
|
|
|
|
|
" \n",
|
|
|
|
|
|
|
|
" \n",
|
|
|
|
" def __str__(self):\n",
|
|
|
|
" def __str__(self):\n",
|
|
|
|
" return 'currentConfirmedCount:%s, confirmedCount:%s, suspectedCount:%s, curedCount:%s, deadCount:%s, suspectedIncr:%s, currentConfirmedIncr:%s, confirmedIncr:%s, curedIncr:%s, deadIncr:%s' % (\n",
|
|
|
|
" return 'currentConfirmedCount:%s, confirmedCount:%s, suspectedCount:%s, curedCount:%s, deadCount:%s, suspectedIncr:%s, currentConfirmedIncr:%s, confirmedIncr:%s, curedIncr:%s, deadIncr:%s, updatedTime:%s' % (\n",
|
|
|
|
" self.currentConfirmedCount, self.confirmedCount, self.suspectedCount, self.curedCount, self.deadCount,\n",
|
|
|
|
" self.currentConfirmedCount, self.confirmedCount, self.suspectedCount, self.curedCount, self.deadCount,\n",
|
|
|
|
" self.suspectedIncr, self.currentConfirmedIncr, self.confirmedIncr, self.curedIncr, self.deadIncr)"
|
|
|
|
" self.suspectedIncr, self.currentConfirmedIncr, self.confirmedIncr, self.curedIncr, self.deadIncr, self.updatedTime)"
|
|
|
|
]
|
|
|
|
]
|
|
|
|
},
|
|
|
|
},
|
|
|
|
{
|
|
|
|
{
|
|
|
@ -46,7 +50,7 @@
|
|
|
|
},
|
|
|
|
},
|
|
|
|
{
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
|
|
|
"cell_type": "code",
|
|
|
|
"execution_count": 16,
|
|
|
|
"execution_count": 31,
|
|
|
|
"metadata": {},
|
|
|
|
"metadata": {},
|
|
|
|
"outputs": [],
|
|
|
|
"outputs": [],
|
|
|
|
"source": [
|
|
|
|
"source": [
|
|
|
@ -56,17 +60,21 @@
|
|
|
|
" def __init__(self, host, user, passwd, db):\n",
|
|
|
|
" def __init__(self, host, user, passwd, db):\n",
|
|
|
|
" self.conn = pymysql.connect(host, user, passwd, db)\n",
|
|
|
|
" self.conn = pymysql.connect(host, user, passwd, db)\n",
|
|
|
|
" self.cursor = self.conn.cursor()\n",
|
|
|
|
" self.cursor = self.conn.cursor()\n",
|
|
|
|
|
|
|
|
" \n",
|
|
|
|
|
|
|
|
" \n",
|
|
|
|
" def get_outsideSummary_list_tuple(self, outsideSummary):\n",
|
|
|
|
" def get_outsideSummary_list_tuple(self, outsideSummary):\n",
|
|
|
|
" info_tuple = []\n",
|
|
|
|
" info_tuple = []\n",
|
|
|
|
" info_tuple.append(outsideSummary.get_info_tuple())\n",
|
|
|
|
" info_tuple.append(outsideSummary.get_info_tuple())\n",
|
|
|
|
" return info_tuple\n",
|
|
|
|
" return info_tuple\n",
|
|
|
|
|
|
|
|
" \n",
|
|
|
|
|
|
|
|
" \n",
|
|
|
|
" # 保存数据\n",
|
|
|
|
" # 保存数据\n",
|
|
|
|
" def save_outsideSummary_datas(self, outsideSummary):\n",
|
|
|
|
" def save_outsideSummary_datas(self, outsideSummary):\n",
|
|
|
|
" print('+++ [MyDB] delete from outsideSummary_realtime_datas') \n",
|
|
|
|
" print('+++ [MyDB] delete from outsideSummary_realtime_datas') \n",
|
|
|
|
" self.cursor.execute('delete from outsideSummary_realtime_datas') \n",
|
|
|
|
" self.cursor.execute('delete from outsideSummary_realtime_datas') \n",
|
|
|
|
" self.conn.commit()\n",
|
|
|
|
" self.conn.commit()\n",
|
|
|
|
" \n",
|
|
|
|
" \n",
|
|
|
|
" sql = 'insert into outsideSummary_realtime_datas(currentConfirmedCount,confirmedCount,suspectedCount,curedCount,deadCount,suspectedIncr,currentConfirmedIncr,confirmedIncr,curedIncr,deadIncr) values(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)'\n",
|
|
|
|
" sql = 'insert into outsideSummary_realtime_datas(currentConfirmedCount,confirmedCount,suspectedCount,curedCount,deadCount,suspectedIncr,currentConfirmedIncr,confirmedIncr,curedIncr,deadIncr,updatedTime) values(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)'\n",
|
|
|
|
" res = self.get_outsideSummary_list_tuple(outsideSummary)\n",
|
|
|
|
" res = self.get_outsideSummary_list_tuple(outsideSummary)\n",
|
|
|
|
" print('+++ save_outsideSummary_datas, data len: %d' % len(res))\n",
|
|
|
|
" print('+++ save_outsideSummary_datas, data len: %d' % len(res))\n",
|
|
|
|
" try:\n",
|
|
|
|
" try:\n",
|
|
|
@ -75,6 +83,8 @@
|
|
|
|
" except Exception as e:\n",
|
|
|
|
" except Exception as e:\n",
|
|
|
|
" print(e)\n",
|
|
|
|
" print(e)\n",
|
|
|
|
" print('+++ save_outsideSummary_datas is over.')\n",
|
|
|
|
" print('+++ save_outsideSummary_datas is over.')\n",
|
|
|
|
|
|
|
|
" \n",
|
|
|
|
|
|
|
|
" \n",
|
|
|
|
" def __del__(self):\n",
|
|
|
|
" def __del__(self):\n",
|
|
|
|
" if self.conn is not None:\n",
|
|
|
|
" if self.conn is not None:\n",
|
|
|
|
" self.conn.close()"
|
|
|
|
" self.conn.close()"
|
|
|
@ -89,8 +99,10 @@
|
|
|
|
},
|
|
|
|
},
|
|
|
|
{
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
|
|
|
"cell_type": "code",
|
|
|
|
"execution_count": 17,
|
|
|
|
"execution_count": 32,
|
|
|
|
"metadata": {},
|
|
|
|
"metadata": {
|
|
|
|
|
|
|
|
"scrolled": true
|
|
|
|
|
|
|
|
},
|
|
|
|
"outputs": [
|
|
|
|
"outputs": [
|
|
|
|
{
|
|
|
|
{
|
|
|
|
"name": "stdout",
|
|
|
|
"name": "stdout",
|
|
|
@ -104,10 +116,14 @@
|
|
|
|
],
|
|
|
|
],
|
|
|
|
"source": [
|
|
|
|
"source": [
|
|
|
|
"#业务逻辑类\n",
|
|
|
|
"#业务逻辑类\n",
|
|
|
|
|
|
|
|
"import datetime\n",
|
|
|
|
"import requests\n",
|
|
|
|
"import requests\n",
|
|
|
|
"import re\n",
|
|
|
|
"import re\n",
|
|
|
|
"from bs4 import BeautifulSoup\n",
|
|
|
|
"from bs4 import BeautifulSoup\n",
|
|
|
|
"import json\n",
|
|
|
|
"import json\n",
|
|
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
|
|
"\n",
|
|
|
|
"class DataService:\n",
|
|
|
|
"class DataService:\n",
|
|
|
|
" def __init__(self):\n",
|
|
|
|
" def __init__(self):\n",
|
|
|
|
" self.db = MyDB('localhost', 'root', 'liyujiao0352','covid19_datas_guangxi')\n",
|
|
|
|
" self.db = MyDB('localhost', 'root', 'liyujiao0352','covid19_datas_guangxi')\n",
|
|
|
@ -115,19 +131,27 @@
|
|
|
|
"\n",
|
|
|
|
"\n",
|
|
|
|
"# 爬取页面\n",
|
|
|
|
"# 爬取页面\n",
|
|
|
|
"res = requests.get('https://ncov.dxy.cn/ncovh5/view/pneumonia')\n",
|
|
|
|
"res = requests.get('https://ncov.dxy.cn/ncovh5/view/pneumonia')\n",
|
|
|
|
|
|
|
|
"\n",
|
|
|
|
"# 重新解码\n",
|
|
|
|
"# 重新解码\n",
|
|
|
|
"res = res.content.decode('utf-8')\n",
|
|
|
|
"res = res.content.decode('utf-8')\n",
|
|
|
|
|
|
|
|
"\n",
|
|
|
|
"# 构建soup对象\n",
|
|
|
|
"# 构建soup对象\n",
|
|
|
|
"soup = BeautifulSoup(res, 'html.parser')\n",
|
|
|
|
"soup = BeautifulSoup(res, 'html.parser')\n",
|
|
|
|
|
|
|
|
"\n",
|
|
|
|
"# 使用soup对象查找国外疫情数据标签\n",
|
|
|
|
"# 使用soup对象查找国外疫情数据标签\n",
|
|
|
|
"tag = soup.find('script', attrs={'id': 'getStatisticsService'})\n",
|
|
|
|
"tag = soup.find('script', attrs={'id': 'getStatisticsService'})\n",
|
|
|
|
|
|
|
|
"\n",
|
|
|
|
"# 转成字符串\n",
|
|
|
|
"# 转成字符串\n",
|
|
|
|
"tagstr = tag.string\n",
|
|
|
|
"tagstr = tag.string\n",
|
|
|
|
"# 使用正则表达式查找所有内容\n",
|
|
|
|
"# 使用正则表达式查找所有内容\n",
|
|
|
|
"result = re.findall('\\{\"currentConfirmedCount\".*?\"deadIncr\".*?\\}', tagstr)\n",
|
|
|
|
"result = re.findall('\\{\"currentConfirmedCount\".*?\"deadIncr\".*?\\}', tagstr)\n",
|
|
|
|
|
|
|
|
"\n",
|
|
|
|
"# 获取国外疫情数据\n",
|
|
|
|
"# 获取国外疫情数据\n",
|
|
|
|
"#print(result[0])\n",
|
|
|
|
"#print(result[0])\n",
|
|
|
|
"obj = json.loads(result[0])\n",
|
|
|
|
"obj = json.loads(result[0])\n",
|
|
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
|
|
"\n",
|
|
|
|
"#print(obj)\n",
|
|
|
|
"#print(obj)\n",
|
|
|
|
"def fetch_outside_summary(obj):\n",
|
|
|
|
"def fetch_outside_summary(obj):\n",
|
|
|
|
" outsideSummary = OutsideSummary()\n",
|
|
|
|
" outsideSummary = OutsideSummary()\n",
|
|
|
@ -141,8 +165,11 @@
|
|
|
|
" outsideSummary.confirmedIncr = int(obj['confirmedIncr'])\n",
|
|
|
|
" outsideSummary.confirmedIncr = int(obj['confirmedIncr'])\n",
|
|
|
|
" outsideSummary.curedIncr = int(obj['curedIncr'])\n",
|
|
|
|
" outsideSummary.curedIncr = int(obj['curedIncr'])\n",
|
|
|
|
" outsideSummary.deadIncr = int(obj['deadIncr'])\n",
|
|
|
|
" outsideSummary.deadIncr = int(obj['deadIncr'])\n",
|
|
|
|
|
|
|
|
" outsideSummary.updatedTime = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')\n",
|
|
|
|
" return outsideSummary\n",
|
|
|
|
" return outsideSummary\n",
|
|
|
|
"\n",
|
|
|
|
"\n",
|
|
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
|
|
" \n",
|
|
|
|
"# 创建Dataservice对象\n",
|
|
|
|
"# 创建Dataservice对象\n",
|
|
|
|
"ds = DataService()\n",
|
|
|
|
"ds = DataService()\n",
|
|
|
|
"outsideSummary=fetch_outside_summary(obj)\n",
|
|
|
|
"outsideSummary=fetch_outside_summary(obj)\n",
|
|
|
@ -156,6 +183,13 @@
|
|
|
|
"outputs": [],
|
|
|
|
"outputs": [],
|
|
|
|
"source": []
|
|
|
|
"source": []
|
|
|
|
},
|
|
|
|
},
|
|
|
|
|
|
|
|
{
|
|
|
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
|
|
|
"execution_count": null,
|
|
|
|
|
|
|
|
"metadata": {},
|
|
|
|
|
|
|
|
"outputs": [],
|
|
|
|
|
|
|
|
"source": []
|
|
|
|
|
|
|
|
},
|
|
|
|
{
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
|
|
|
"cell_type": "code",
|
|
|
|
"execution_count": null,
|
|
|
|
"execution_count": null,
|
|
|
|