You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
ppre8onyw/foreign_datas.py

139 lines
4.6 KiB

This file contains invisible Unicode characters!

This file contains invisible Unicode characters that may be processed differently from what appears below. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to reveal hidden characters.

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

import pymysql
import requests
from bs4 import BeautifulSoup
import json
class ForeignCountry:
def __init__(self):
self.died = 0
self.confirmed = 0
self.crued = 0
self.area = ''
self.curConfirm = 0
self.confirmedRelative = 0
self.pub_date = ''
def __str__(self):
return 'area: %s, died: %s, confirmed: %s, crued: %s, curConfirm: %s, confirmedRelative: %s, pub_date: %s' % (self.area, self.died, self.confirmed, self.crued, self.curConfirm, self.confirmedRelative, self.pub_date)
def get_fcountry_info_tuple(self):
return ((self.died, self.confirmed, self.crued, self.area, self.curConfirm, self.confirmedRelative, self.pub_date))
class MyDB:
def __init__(self, host, user, passwd, db):
self.conn = pymysql.connect(host, user, passwd, db)
self.cursor = self.conn.cursor()
# 获取国外国家类参数列表格式
def get_fcountry_args_list(self, all_foreign_countries):
all_args_list = []
for country in all_foreign_countries:
info = country.get_fcountry_info_tuple()
all_args_list.append(info)
return all_args_list
# 保存国外每日疫情数据
def save_outside_daily_datas(self, all_foreign_countries):
curdate=all_foreign_countries[0].pub_date
#先删除当天已有的数据
sql='delete from foreign_daily_datas where pub_date like "%s"'%(curdate[:10]+ '%')
try:
self.cursor.execute(sql)
self.conn.commit()
except Exception as e:
print(e)
sql = 'insert into foreign_daily_datas(died, confirmed, crued, area, curConfirm, confirmedRelative, pub_date) values(%s, %s, %s, %s, %s, %s, %s)'
res = self.get_fcountry_args_list(all_foreign_countries)
print('+++ foreign_daily_datas, data len: %d' % len(res))
try:
self.cursor.executemany(sql, res)
self.conn.commit()
except Exception as e:
print(e)
print('+++ foreign_daily_datas is over.')
def __del__(self):
if self.conn is not None:
self.conn.close()
class DataService:
#解析网页
def __init__(self):
self.url = 'https://voice.baidu.com/act/newpneumonia/newpneumonia'
self.db = MyDB('localhost', 'root', '1999z5g24x','text_data_increasing')
# 抓取网页
def fetch_html_page(self):
res = requests.get(self.url)
res = res.content.decode('utf-8')
return res
# 解析网页
def parse_target_page(self, html):
soup = BeautifulSoup(html)
tag = soup.find('script', attrs={'id':'captain-config'})
tagStr = tag.string
tagDict = json.loads(tagStr)
# 提取数据更新时间\n",
self.pub_date = tagDict['component'][0]['mapLastUpdatedTime']
# 获取国外各国家的数据\n",
self.outsideDatas = tagDict['component'][0]['caseOutsideList']
# 处理字符串
def process_str(self, s):
ret = ''
if s is None or s == '':
ret ='0'
else:
ret = s
return ret
def getOrElse(self,target, key):
ret = ''
if target.get(key) != None:
ret = target[key]
else:
ret = ''
return ret
#解析国外数据
def parse_outside_daily_datas(self):
# self.__outsideDailyDatas为list一个国家对应一个元素    
all_foreign_countries = []
for item in self.outsideDatas:
country = ForeignCountry()
country.died = self.getOrElse(item, 'died')
country.confirmed = self.getOrElse(item, 'confirmed')
country.crued = self.getOrElse(item, 'crued')
country.area = self.getOrElse(item, 'area')
country.curConfirm = self.getOrElse(item, 'curConfirm')
country.confirmedRelative = self.getOrElse(item, 'confirmedRelative')
country.pub_date = self.pub_date
all_foreign_countries.append(country)
return all_foreign_countries
# 提取内容生成对象
def fetch_page_datas(self):
all_countries = self.parse_outside_daily_datas()
#for item in all_countries:
# print(item)
return all_countries
# 业务函数
def process_data(self):
html = self.fetch_html_page()
self.parse_target_page(html)
all_countries = self.fetch_page_datas()
# 保存数据
self.db.save_outside_daily_datas(all_countries)
# 创建Dataservice对象
ds = DataService()
ds.process_data()