|
|
import requests
|
|
|
from bs4 import BeautifulSoup
|
|
|
import re
|
|
|
import json
|
|
|
import pymysql
|
|
|
import time
|
|
|
|
|
|
|
|
|
# 发送get请求,获得目标服务器相应
|
|
|
response =requests.get('https://ncov.dxy.cn/ncovh5/view/pneumonia')
|
|
|
# 解码
|
|
|
html = response.content.decode('utf-8')
|
|
|
# 构建soup对象
|
|
|
soup = BeautifulSoup(html, 'html.parser')
|
|
|
# 查找指定标签
|
|
|
tag = soup.find('script', attrs={'id':"getAreaStat"})
|
|
|
# 转换为字符串
|
|
|
tagstr = str(tag)
|
|
|
# 使用正则表达式查找所有内容, results为列表类型
|
|
|
results = re.findall(r'\{"provinceName".*?"cities".*?\]\}',tagstr)
|
|
|
|
|
|
|
|
|
#提取数据更新时间
|
|
|
ttag = soup.find('script', attrs={'id':"getListByCountryTypeService2true"})
|
|
|
ttagstr = str(ttag)
|
|
|
tresults = re.findall(r'\{"id".*?"showRank":true\}',ttagstr)
|
|
|
tstr = tresults[0]
|
|
|
jststr = json.loads(tstr)
|
|
|
ts = jststr['modifyTime']
|
|
|
ts = float(ts)/1000
|
|
|
localt = time.localtime(ts)
|
|
|
pub_date = time.strftime("%Y-%m-%d %H:%M:%S", localt)
|
|
|
|
|
|
|
|
|
#打开数据库连接
|
|
|
db = pymysql.connect('localhost', 'root', 'MySQL888766', 'test')
|
|
|
#创建cursor对象
|
|
|
cursor = db.cursor()
|
|
|
#SQL语句
|
|
|
Psql = 'insert into province_daily_datas(provinceName,provinceShortName,currentConfirmedCount,confirmedCount,suspectedCount,curedCount,deadCount,pub_date) values(%s, %s, %s, %s, %s, %s, %s, %s)'
|
|
|
Csql = 'insert into city_daily_datas(cityName,currentConfirmedCount,confirmedCount,suspectedCount,curedCount,deadCount,province,pub_date) values(%s, %s, %s, %s, %s, %s, %s, %s)'
|
|
|
|
|
|
count = 1
|
|
|
# 解析内容,提取各个省份以及该省份的各个地市疫情数据并存入数据库
|
|
|
for item in results:
|
|
|
print("爬取中+%d" % count)
|
|
|
count+=1
|
|
|
item = json.loads(item)
|
|
|
provinceName = item['provinceName']
|
|
|
provinceShortName = item['provinceShortName']
|
|
|
currentConfirmedCount = int(item['currentConfirmedCount'])
|
|
|
confirmedCount = int(item['confirmedCount'])
|
|
|
suspectedCount = int(item['suspectedCount'])
|
|
|
curedCount = int(item['curedCount'])
|
|
|
deadCount = int(item['deadCount'])
|
|
|
cities=item['cities']
|
|
|
|
|
|
#cursor.execute(Psql, (provinceName,provinceShortName,currentConfirmedCount,confirmedCount,suspectedCount,curedCount,deadCount,pub_date))
|
|
|
#db.commit()
|
|
|
|
|
|
for city in cities:
|
|
|
cityName = city['cityName']
|
|
|
currentConfirmedCount =int(city['currentConfirmedCount'])
|
|
|
confirmedCount = int(city['confirmedCount'])
|
|
|
suspectedCount = int(city['suspectedCount'])
|
|
|
curedCount = int(city['curedCount'])
|
|
|
deadCount = int(city['deadCount'])
|
|
|
province = item['provinceShortName']
|
|
|
|
|
|
#cursor.execute(Csql, (cityName,currentConfirmedCount,confirmedCount,suspectedCount,curedCount,deadCount,province,pub_date))
|
|
|
#db.commit()
|
|
|
|
|
|
db.close()
|
|
|
print("爬取成功") |