You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
pjfuiaotp/数据采集存取.txt

74 lines
2.7 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

import requests
from bs4 import BeautifulSoup
import re
import json
import pymysql
import time
# 发送get请求获得目标服务器相应
response =requests.get('https://ncov.dxy.cn/ncovh5/view/pneumonia')
# 解码
html = response.content.decode('utf-8')
# 构建soup对象
soup = BeautifulSoup(html, 'html.parser')
# 查找指定标签
tag = soup.find('script', attrs={'id':"getAreaStat"})
# 转换为字符串
tagstr = str(tag)
# 使用正则表达式查找所有内容, results为列表类型
results = re.findall(r'\{"provinceName".*?"cities".*?\]\}',tagstr)
#提取数据更新时间
ttag = soup.find('script', attrs={'id':"getListByCountryTypeService2true"})
ttagstr = str(ttag)
tresults = re.findall(r'\{"id".*?"showRank":true\}',ttagstr)
tstr = tresults[0]
jststr = json.loads(tstr)
ts = jststr['modifyTime']
ts = float(ts)/1000
localt = time.localtime(ts)
pub_date = time.strftime("%Y-%m-%d %H:%M:%S", localt)
#打开数据库连接
db = pymysql.connect('localhost', 'root', 'MySQL888766', 'test')
#创建cursor对象
cursor = db.cursor()
#SQL语句
Psql = 'insert into province_daily_datas(provinceName,provinceShortName,currentConfirmedCount,confirmedCount,suspectedCount,curedCount,deadCount,pub_date) values(%s, %s, %s, %s, %s, %s, %s, %s)'
Csql = 'insert into city_daily_datas(cityName,currentConfirmedCount,confirmedCount,suspectedCount,curedCount,deadCount,province,pub_date) values(%s, %s, %s, %s, %s, %s, %s, %s)'
count = 1
# 解析内容,提取各个省份以及该省份的各个地市疫情数据并存入数据库
for item in results:
print("爬取中+%d" % count)
count+=1
item = json.loads(item)
provinceName = item['provinceName']
provinceShortName = item['provinceShortName']
currentConfirmedCount = int(item['currentConfirmedCount'])
confirmedCount = int(item['confirmedCount'])
suspectedCount = int(item['suspectedCount'])
curedCount = int(item['curedCount'])
deadCount = int(item['deadCount'])
cities=item['cities']
#cursor.execute(Psql, (provinceName,provinceShortName,currentConfirmedCount,confirmedCount,suspectedCount,curedCount,deadCount,pub_date))
#db.commit()
for city in cities:
cityName = city['cityName']
currentConfirmedCount =int(city['currentConfirmedCount'])
confirmedCount = int(city['confirmedCount'])
suspectedCount = int(city['suspectedCount'])
curedCount = int(city['curedCount'])
deadCount = int(city['deadCount'])
province = item['provinceShortName']
#cursor.execute(Csql, (cityName,currentConfirmedCount,confirmedCount,suspectedCount,curedCount,deadCount,province,pub_date))
#db.commit()
db.close()
print("爬取成功")