You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
ppre8onyw/foreign_datas.py

139 lines
4.6 KiB

5 years ago
import pymysql
import requests
from bs4 import BeautifulSoup
import json
class ForeignCountry:
def __init__(self):
self.died = 0
self.confirmed = 0
self.crued = 0
self.area = ''
self.curConfirm = 0
self.confirmedRelative = 0
self.pub_date = ''
def __str__(self):
return 'area: %s, died: %s, confirmed: %s, crued: %s, curConfirm: %s, confirmedRelative: %s, pub_date: %s' % (self.area, self.died, self.confirmed, self.crued, self.curConfirm, self.confirmedRelative, self.pub_date)
def get_fcountry_info_tuple(self):
return ((self.died, self.confirmed, self.crued, self.area, self.curConfirm, self.confirmedRelative, self.pub_date))
class MyDB:
def __init__(self, host, user, passwd, db):
self.conn = pymysql.connect(host, user, passwd, db)
self.cursor = self.conn.cursor()
# 获取国外国家类参数列表格式
def get_fcountry_args_list(self, all_foreign_countries):
all_args_list = []
for country in all_foreign_countries:
info = country.get_fcountry_info_tuple()
all_args_list.append(info)
return all_args_list
# 保存国外每日疫情数据
def save_outside_daily_datas(self, all_foreign_countries):
curdate=all_foreign_countries[0].pub_date
#先删除当天已有的数据
sql='delete from foreign_daily_datas where pub_date like "%s"'%(curdate[:10]+ '%')
try:
self.cursor.execute(sql)
self.conn.commit()
except Exception as e:
print(e)
sql = 'insert into foreign_daily_datas(died, confirmed, crued, area, curConfirm, confirmedRelative, pub_date) values(%s, %s, %s, %s, %s, %s, %s)'
res = self.get_fcountry_args_list(all_foreign_countries)
print('+++ foreign_daily_datas, data len: %d' % len(res))
try:
self.cursor.executemany(sql, res)
self.conn.commit()
except Exception as e:
print(e)
print('+++ foreign_daily_datas is over.')
def __del__(self):
if self.conn is not None:
self.conn.close()
class DataService:
#解析网页
def __init__(self):
self.url = 'https://voice.baidu.com/act/newpneumonia/newpneumonia'
self.db = MyDB('localhost', 'root', '1999z5g24x','text_data_increasing')
# 抓取网页
def fetch_html_page(self):
res = requests.get(self.url)
res = res.content.decode('utf-8')
return res
# 解析网页
def parse_target_page(self, html):
soup = BeautifulSoup(html)
tag = soup.find('script', attrs={'id':'captain-config'})
tagStr = tag.string
tagDict = json.loads(tagStr)
# 提取数据更新时间\n",
self.pub_date = tagDict['component'][0]['mapLastUpdatedTime']
# 获取国外各国家的数据\n",
self.outsideDatas = tagDict['component'][0]['caseOutsideList']
# 处理字符串
def process_str(self, s):
ret = ''
if s is None or s == '':
ret ='0'
else:
ret = s
return ret
def getOrElse(self,target, key):
ret = ''
if target.get(key) != None:
ret = target[key]
else:
ret = ''
return ret
#解析国外数据
def parse_outside_daily_datas(self):
# self.__outsideDailyDatas为list一个国家对应一个元素    
all_foreign_countries = []
for item in self.outsideDatas:
country = ForeignCountry()
country.died = self.getOrElse(item, 'died')
country.confirmed = self.getOrElse(item, 'confirmed')
country.crued = self.getOrElse(item, 'crued')
country.area = self.getOrElse(item, 'area')
country.curConfirm = self.getOrElse(item, 'curConfirm')
country.confirmedRelative = self.getOrElse(item, 'confirmedRelative')
country.pub_date = self.pub_date
all_foreign_countries.append(country)
return all_foreign_countries
# 提取内容生成对象
def fetch_page_datas(self):
all_countries = self.parse_outside_daily_datas()
#for item in all_countries:
# print(item)
return all_countries
# 业务函数
def process_data(self):
html = self.fetch_html_page()
self.parse_target_page(html)
all_countries = self.fetch_page_datas()
# 保存数据
self.db.save_outside_daily_datas(all_countries)
# 创建Dataservice对象
ds = DataService()
ds.process_data()