diff --git a/数据采集存取.txt b/数据采集存取.txt new file mode 100644 index 0000000..e66fdaf --- /dev/null +++ b/数据采集存取.txt @@ -0,0 +1,74 @@ +import requests +from bs4 import BeautifulSoup +import re +import json +import pymysql +import time + + +# 发送get请求,获得目标服务器相应 +response =requests.get('https://ncov.dxy.cn/ncovh5/view/pneumonia') +# 解码 +html = response.content.decode('utf-8') +# 构建soup对象 +soup = BeautifulSoup(html, 'html.parser') +# 查找指定标签 +tag = soup.find('script', attrs={'id':"getAreaStat"}) +# 转换为字符串 +tagstr = str(tag) +# 使用正则表达式查找所有内容, results为列表类型 +results = re.findall(r'\{"provinceName".*?"cities".*?\]\}',tagstr) + + +#提取数据更新时间 +ttag = soup.find('script', attrs={'id':"getListByCountryTypeService2true"}) +ttagstr = str(ttag) +tresults = re.findall(r'\{"id".*?"showRank":true\}',ttagstr) +tstr = tresults[0] +jststr = json.loads(tstr) +ts = jststr['modifyTime'] +ts = float(ts)/1000 +localt = time.localtime(ts) +pub_date = time.strftime("%Y-%m-%d %H:%M:%S", localt) + + +#打开数据库连接 +db = pymysql.connect('localhost', 'root', 'MySQL888766', 'test') +#创建cursor对象 +cursor = db.cursor() +#SQL语句 +Psql = 'insert into province_daily_datas(provinceName,provinceShortName,currentConfirmedCount,confirmedCount,suspectedCount,curedCount,deadCount,pub_date) values(%s, %s, %s, %s, %s, %s, %s, %s)' +Csql = 'insert into city_daily_datas(cityName,currentConfirmedCount,confirmedCount,suspectedCount,curedCount,deadCount,province,pub_date) values(%s, %s, %s, %s, %s, %s, %s, %s)' + +count = 1 +# 解析内容,提取各个省份以及该省份的各个地市疫情数据并存入数据库 +for item in results: + print("爬取中+%d" % count) + count+=1 + item = json.loads(item) + provinceName = item['provinceName'] + provinceShortName = item['provinceShortName'] + currentConfirmedCount = int(item['currentConfirmedCount']) + confirmedCount = int(item['confirmedCount']) + suspectedCount = int(item['suspectedCount']) + curedCount = int(item['curedCount']) + deadCount = int(item['deadCount']) + cities=item['cities'] + + #cursor.execute(Psql, (provinceName,provinceShortName,currentConfirmedCount,confirmedCount,suspectedCount,curedCount,deadCount,pub_date)) + #db.commit() + + for city in cities: + cityName = city['cityName'] + currentConfirmedCount =int(city['currentConfirmedCount']) + confirmedCount = int(city['confirmedCount']) + suspectedCount = int(city['suspectedCount']) + curedCount = int(city['curedCount']) + deadCount = int(city['deadCount']) + province = item['provinceShortName'] + + #cursor.execute(Csql, (cityName,currentConfirmedCount,confirmedCount,suspectedCount,curedCount,deadCount,province,pub_date)) + #db.commit() + +db.close() +print("爬取成功") \ No newline at end of file