疫情数据采集存取

master
邓俊华 5 years ago
parent e7e74ee92c
commit 8afc1fa9fb

@ -0,0 +1,74 @@
import requests
from bs4 import BeautifulSoup
import re
import json
import pymysql
import time
# 发送get请求获得目标服务器相应
response =requests.get('https://ncov.dxy.cn/ncovh5/view/pneumonia')
# 解码
html = response.content.decode('utf-8')
# 构建soup对象
soup = BeautifulSoup(html, 'html.parser')
# 查找指定标签
tag = soup.find('script', attrs={'id':"getAreaStat"})
# 转换为字符串
tagstr = str(tag)
# 使用正则表达式查找所有内容, results为列表类型
results = re.findall(r'\{"provinceName".*?"cities".*?\]\}',tagstr)
#提取数据更新时间
ttag = soup.find('script', attrs={'id':"getListByCountryTypeService2true"})
ttagstr = str(ttag)
tresults = re.findall(r'\{"id".*?"showRank":true\}',ttagstr)
tstr = tresults[0]
jststr = json.loads(tstr)
ts = jststr['modifyTime']
ts = float(ts)/1000
localt = time.localtime(ts)
pub_date = time.strftime("%Y-%m-%d %H:%M:%S", localt)
#打开数据库连接
db = pymysql.connect('localhost', 'root', 'MySQL888766', 'test')
#创建cursor对象
cursor = db.cursor()
#SQL语句
Psql = 'insert into province_daily_datas(provinceName,provinceShortName,currentConfirmedCount,confirmedCount,suspectedCount,curedCount,deadCount,pub_date) values(%s, %s, %s, %s, %s, %s, %s, %s)'
Csql = 'insert into city_daily_datas(cityName,currentConfirmedCount,confirmedCount,suspectedCount,curedCount,deadCount,province,pub_date) values(%s, %s, %s, %s, %s, %s, %s, %s)'
count = 1
# 解析内容,提取各个省份以及该省份的各个地市疫情数据并存入数据库
for item in results:
print("爬取中+%d" % count)
count+=1
item = json.loads(item)
provinceName = item['provinceName']
provinceShortName = item['provinceShortName']
currentConfirmedCount = int(item['currentConfirmedCount'])
confirmedCount = int(item['confirmedCount'])
suspectedCount = int(item['suspectedCount'])
curedCount = int(item['curedCount'])
deadCount = int(item['deadCount'])
cities=item['cities']
#cursor.execute(Psql, (provinceName,provinceShortName,currentConfirmedCount,confirmedCount,suspectedCount,curedCount,deadCount,pub_date))
#db.commit()
for city in cities:
cityName = city['cityName']
currentConfirmedCount =int(city['currentConfirmedCount'])
confirmedCount = int(city['confirmedCount'])
suspectedCount = int(city['suspectedCount'])
curedCount = int(city['curedCount'])
deadCount = int(city['deadCount'])
province = item['provinceShortName']
#cursor.execute(Csql, (cityName,currentConfirmedCount,confirmedCount,suspectedCount,curedCount,deadCount,province,pub_date))
#db.commit()
db.close()
print("爬取成功")
Loading…
Cancel
Save