You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
165 lines
6.6 KiB
165 lines
6.6 KiB
4 years ago
|
{
|
||
|
"cells": [
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 7,
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"name": "stdout",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"executemany() missing 1 required positional argument: 'args'\n",
|
||
|
"+++ save_country_datas, data len: 214\n",
|
||
|
"+++ save_country_datas is over.\n"
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"import datetime\n",
|
||
|
"import requests\n",
|
||
|
"from bs4 import BeautifulSoup\n",
|
||
|
"import re\n",
|
||
|
"import json\n",
|
||
|
"import pymysql\n",
|
||
|
"\n",
|
||
|
"# 定义实体类\n",
|
||
|
"def Outside(ds):\n",
|
||
|
" class Country:\n",
|
||
|
" def __init__(self):\n",
|
||
|
" self.countryName = ''\n",
|
||
|
" self.currentConfirmedCount = 0 # 现有确诊病例数\n",
|
||
|
" self.confirmedCount = 0 # 累计确诊\n",
|
||
|
" self.confirmedCountRank = 0 # 累计确诊排名\n",
|
||
|
" self.curedCount = 0 # 累计治愈\n",
|
||
|
" self.deadCount = 0 # 累计死亡\n",
|
||
|
" self.deadCountRank = 0 # 累计死亡排名\n",
|
||
|
" self.deadRate = 0.0 # 死亡率\n",
|
||
|
" self.deadRateRank = 0 # 死亡率排名\n",
|
||
|
" self.updatedTime = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')\n",
|
||
|
" def get_info_tuple(self):\n",
|
||
|
" return (self.countryName, self.currentConfirmedCount, self.confirmedCount, self.confirmedCountRank, self.curedCount, self.deadCount, self.deadCountRank, self.deadRate, self.deadRateRank,self.updatedTime)\n",
|
||
|
" def __str__(self):\n",
|
||
|
" return 'countryName:%s,currentConfirmedCount:%d,confirmedCount:%d,\\\n",
|
||
|
" confirmedCountRankt:%d,curedCount:%d,deadCount:%d,deadCountRank:%d,deadRate:%d,deadRateRank:%d,updatedTime:%s' % (self.countryName, self.currentConfirmedCount, self.confirmedCount, self.confirmedCountRank, self.curedCount, self.deadCount, self.deadCountRank, self.deadRate, self.deadRateRank,self.updatedTime)\n",
|
||
|
"\n",
|
||
|
" class MyDB:\n",
|
||
|
" def __init__(self, host, user, passwd, db):\n",
|
||
|
"\n",
|
||
|
" self.conn = pymysql.connect(host, user, passwd, db)\n",
|
||
|
" self.cursor = self.conn.cursor()\n",
|
||
|
"\n",
|
||
|
"\n",
|
||
|
"\n",
|
||
|
" def get_country_list_tuple(self, all_country):\n",
|
||
|
" info_tuple = []\n",
|
||
|
" for item in all_country:\n",
|
||
|
" info_tuple.append(item.get_info_tuple())\n",
|
||
|
" return info_tuple\n",
|
||
|
"\n",
|
||
|
" # 保存数据\n",
|
||
|
" def save_country_datas(self, all_country):\n",
|
||
|
" date=datetime.datetime.now().strftime('%Y-%m-%d')\n",
|
||
|
" sql='delete from country_daily_datas where updatedTime like \"%s\"'%(date+\"%\")\n",
|
||
|
" try:\n",
|
||
|
" self.cursor.executemany(sql)\n",
|
||
|
" self.conn.commit()\n",
|
||
|
" except Exception as e:\n",
|
||
|
" print(e)\n",
|
||
|
"\n",
|
||
|
" sql = 'insert into country_daily_datas(countryName,currentConfirmedCount,\\\n",
|
||
|
" confirmedCount,confirmedCountRank,curedCount,deadCount,deadCountRank,deadRate,deadRateRank,pub_time) values(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)'\n",
|
||
|
" res = self.get_country_list_tuple(all_country)\n",
|
||
|
"\n",
|
||
|
" print('+++ save_country_datas, data len: %d' % len(res))\n",
|
||
|
" try:\n",
|
||
|
" self.cursor.executemany(sql, res)\n",
|
||
|
" self.conn.commit()\n",
|
||
|
" except Exception as e:\n",
|
||
|
" print(e)\n",
|
||
|
" print('+++ save_country_datas is over.')\n",
|
||
|
" def show_country_datas(self):\n",
|
||
|
" self.cursor.execute('select * from country_daily_datas')\n",
|
||
|
"\n",
|
||
|
"\n",
|
||
|
" def __del__(self):\n",
|
||
|
" if self.conn is not None:\n",
|
||
|
" self.conn.close()\n",
|
||
|
"\n",
|
||
|
"\n",
|
||
|
" def forign_data_search(ds):\n",
|
||
|
" db = MyDB(host = ds[0],user = ds[1],passwd = ds[2],db = ds[3])\n",
|
||
|
" res = requests.get('https://ncov.dxy.cn/ncovh5/view/pneumonia')# 爬取页面\n",
|
||
|
" res = res.content.decode('utf-8') # 重新解码\n",
|
||
|
" soup = BeautifulSoup(res, 'html.parser')# 构建soup对象\n",
|
||
|
" tag = soup.find('script', attrs={'id':'getListByCountryTypeService2true'}) # Tag# 使用soup对象查找实时播报新闻标签\n",
|
||
|
" tagStr = tag.string# 获取内容\n",
|
||
|
" results = re.findall('\\{\"id\".*?\"showRank\".*?\\}', tagStr) # length: 34, [str, str, ....]# 使用正则表达式匹配\n",
|
||
|
" all_country = []\n",
|
||
|
" for item in results:\n",
|
||
|
" country=Country() \n",
|
||
|
" obj = json.loads(item) # obj -> dict\n",
|
||
|
" country.countryName = obj['provinceName']\n",
|
||
|
" country.currentConfirmedCount = int(obj['currentConfirmedCount'])\n",
|
||
|
" country.confirmedCount = int(obj['confirmedCount'])\n",
|
||
|
" country.curedCount = int(obj['curedCount'])\n",
|
||
|
" country.deadCount = int(obj['deadCount'])\n",
|
||
|
" country.deadRate = float(obj['deadRate'])\n",
|
||
|
" country.updatedTime = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')\n",
|
||
|
" try:\n",
|
||
|
" country.deadCountRank = int(obj['deadCountRank'])\n",
|
||
|
" country.deadRateRank = int(obj['deadRateRank'])\n",
|
||
|
" country.confirmedCountRank = int(obj['confirmedCountRank'])\n",
|
||
|
" except KeyError:\n",
|
||
|
" country.deadCountRank = 0\n",
|
||
|
" country.deadRateRank = 0\n",
|
||
|
" country.confirmedCountRank = 0\n",
|
||
|
" finally:\n",
|
||
|
" all_country.append(country)\n",
|
||
|
"\n",
|
||
|
" db.save_country_datas(all_country)\n",
|
||
|
" \n",
|
||
|
" forign_data_search(ds)\n",
|
||
|
"\n",
|
||
|
"ds=['localhost','root','20Z00t10x28_my','covid19']\n",
|
||
|
"Outside(ds)"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": null,
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": []
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": null,
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": []
|
||
|
}
|
||
|
],
|
||
|
"metadata": {
|
||
|
"kernelspec": {
|
||
|
"display_name": "Python 3",
|
||
|
"language": "python",
|
||
|
"name": "python3"
|
||
|
},
|
||
|
"language_info": {
|
||
|
"codemirror_mode": {
|
||
|
"name": "ipython",
|
||
|
"version": 3
|
||
|
},
|
||
|
"file_extension": ".py",
|
||
|
"mimetype": "text/x-python",
|
||
|
"name": "python",
|
||
|
"nbconvert_exporter": "python",
|
||
|
"pygments_lexer": "ipython3",
|
||
|
"version": "3.8.3"
|
||
|
}
|
||
|
},
|
||
|
"nbformat": 4,
|
||
|
"nbformat_minor": 4
|
||
|
}
|