diff --git a/Main.txt b/Main.txt deleted file mode 100644 index 4b658f1..0000000 --- a/Main.txt +++ /dev/null @@ -1,79 +0,0 @@ -import pymysql -import requests -from bs4 import BeautifulSoup -import json -import re - -import pymysql -def getData(DB): - cs = DB.cursor() - cs.execute('select * from cn') - res = cs.fetchone() - - print(res) - -def delData(DB,day): - cs = DB.cursor() - tmp = day[0:11:1] +'%' - sql = 'delete from cn where date like %s' - data = [tmp] - try: - cs.executemany(sql,data) - DB.commit() - except Exception as e: - print('删除不成功') - print(e) - -def fun1(summary): - tmp = summary.__dict__ - - d = [] - - for item in tmp: - d.append(tmp[item]) - - return d - -def save_summary(host,user, password, database): - - #db = pymysql.connect('localhost', 'root', '123456', 'covid19') - db = pymysql.connect(host,user, password, database) - - cs = db.cursor() - - tmp,time = Mod_Summary_CN() - D = fun1(tmp) - - tmp1 = [] - - for i in range(0,18): - tmp1.append(D[i]) - - tmp1.append(time) - tmp2 = tuple(tmp1) - - - sql = 'insert into cn(confirmed, confirmed_R, curConfirm, curConfirm_R, died, died_R,cured, cured_R, asymptomatic, asymptomatic_R, unconfirmed, \ - unconfirmed_R, icu, icu_R, overseasIn, overseasIn_R, unOverseasInC, unOverseasInA, date)values(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)' - - data = [] - data.append(tmp2) - - - try: - delData(db,time) - cs.executemany(sql,data) - except Exception as e: - print('添加不成功') - print(e) - else: - db.commit() - print('添加成功') - - - getData(db) - - cs.close() - db.close() - - diff --git a/Mod_Summary_CN().txt b/Mod_Summary_CN().txt deleted file mode 100644 index 682a5ae..0000000 --- a/Mod_Summary_CN().txt +++ /dev/null @@ -1,50 +0,0 @@ -import requests -from bs4 import BeautifulSoup -import json -import re - - -def Mod_Summary_CN(): - response = requests.get('https://voice.baidu.com/act/newpneumonia/newpneumonia/') - content = response.text - soup = BeautifulSoup(content, 'html.parser') - - - tag1 = soup.find('script', attrs={'id':'captain-config'}) - tagstr = str(tag1) - res = re.findall(r'(\{".*?".*?\})',tagstr) - - tagJson = json.loads(res[831]) - - temp1 = re.findall(r'("mapLastUpdatedTime":".*?")',tagstr) - temp2 = str(temp1[0]) - time = temp2[22:len(temp2)-1:1] - - - summary = Summary() - summary.confirmed = int(tagJson['confirmed']) - summary.confirmedRelative = int(tagJson['confirmedRelative']) - summary.curConfirm = int(tagJson['curConfirm']) - summary.curConfirmRelative = int(tagJson['curConfirmRelative']) - summary.died = int(tagJson['died']) - summary.diedRelative = int(tagJson['diedRelative']) - summary.cured = int(tagJson['cured']) - summary.curedRelative = int(tagJson['curedRelative']) - summary.asymptomatic = int(tagJson['asymptomatic']) - summary.asymptomaticRelative = int(tagJson['asymptomaticRelative']) - summary.unconfirmed = int(tagJson['unconfirmed']) - summary.unconfirmedRelative = int(tagJson['unconfirmedRelative']) - summary.icu = int(tagJson['icu']) - summary.icuRelative = int(tagJson['icuRelative']) - summary.overseasInput = int(tagJson['overseasInput']) - summary.overseasInputRelative = int(tagJson['overseasInputRelative']) - summary.unOverseasInputCumulative = int(tagJson['unOverseasInputCumulative']) - summary.unOverseasInputNewAdd = int(tagJson['unOverseasInputNewAdd']) - summary.icuDisable = int(tagJson['icuDisable']) - - - #print('---------------------------------------------------------') - #print(' 更新时间:%s'%(time)) - #print('---------------------------------------------------------') - #print(summary) - diff --git a/class Summary.txt b/class Summary.txt deleted file mode 100644 index 88aef4a..0000000 --- a/class Summary.txt +++ /dev/null @@ -1,30 +0,0 @@ -class Summary: - import time - - def __init__(self): - self.confirmed = 0 - self.confirmedRelative = 0 - self.curConfirm = 0 - self.curConfirmRelative = 0 - self.died = 0 - self.diedRelative = 0 - self.cured = 0 - self.curedRelative = 0 - self.asymptomatic = 0 - self.asymptomaticRelative = 0 - self.unconfirmed = 0 - self.unconfirmedRelative = 0 - self.icu = 0 - self.icuRelative = 0 - self.overseasInput = 0 - self.overseasInputRelative = 0 - self.unOverseasInputCumulative = 0 - self.unOverseasInputNewAdd = 0 - self.icuDisable = 0 - - - def __str__(self): - return '\n累计确诊:%d\n累计确诊增加:%d\n现有确诊:%d\n新增确诊(较昨日):%d\n累计死亡:%d\n新增死亡:%d\n累计治愈:%d\n新增治愈:%d\n\ -累计无症状感染者:%d\n新增无症状感染者:%d\n现有疑似:%d\n新增疑似:%d\n现有重症:%d\n新增重症:%d\n累计境外输入:%d\n\ -新增境外输入:%d\n累计非境外输入的确诊:%d\n非境外输入的新增确诊:%d\n???:%d'\ - %(self.confirmed,self.confirmedRelative,self.curConfirm,self.curConfirmRelative,self.died,self.diedRelative,self.cured,self.curedRelative,self.asymptomatic,self.asymptomaticRelative,self.unconfirmed,self.unconfirmedRelative,self.icu,self.icuRelative,self.overseasInput,self.overseasInputRelative,self.unOverseasInputCumulative,self.unOverseasInputNewAdd,self.icuDisable) \ No newline at end of file diff --git a/业务逻辑类.txt b/业务逻辑类.txt new file mode 100644 index 0000000..a7f7c81 --- /dev/null +++ b/业务逻辑类.txt @@ -0,0 +1,97 @@ +import requests +import re +from bs4 import BeautifulSoup +import json + +class DataService: + def __init__(self): + self.url = 'https://voice.baidu.com/act/newpneumonia/newpneumonia' + self.db = MyDB('localhost', 'root', 'lujian123','covid19_datas_guangxi') + + + # 抓取网页 + def fetch_html_page(self): + res = requests.get(self.url) + res = res.content.decode('utf-8') + return res + + # 解析网页 + def parse_target_page(self, html): + soup = BeautifulSoup(html) + tag = soup.find('script', attrs={'id':'captain-config'}) + tagStr = tag.string + tagDict = json.loads(tagStr) + + # 提取数据更新时间 + self.pub_date = tagDict['component'][0]['mapLastUpdatedTime'] + + # 获取国内各省份各地级市的数据 + self.insideDatas = tagDict['component'][0]['caseList'] # [dict, dict, ....] + + + # 处理字符串 + def process_str(self, s): + ret = '' + if s is None or s == '': + ret ='0' + else: + ret = s + return ret + + + # 提取各个省份数据 + def fetch_province_datas(self): + all_provinces = [] + for item in self.insideDatas: + # item : dict + province = Province() + province.confirmed = int(item['confirmed']) + province.died = int(item.get('died','0')) + province.crued = int(item['crued']) + province.relativeTime = int(item['relativeTime']) + province.confirmedRelative = int(item['confirmedRelative']) + province.diedRelative = int(item['diedRelative']) + province.curedRelative = int(item['curedRelative']) + province.asymptomaticRelative = int(self.process_str(item.get('asymptomaticRelative', '0'))) + province.asymptomatic = int(self.process_str(item.get('asymptomatic', '0'))) + province.curConfirm = int(item['curConfirm']) + province.curConfirmRelative = int(item['curConfirmRelative']) + province.icuDisable = int(item['icuDisable']) + province.area = item['area'] + province.pub_date = self.pub_date + + # 提取各个地级市的数据 + for cityItem in item['subList']: # subList: [dict, dict, ...] + city = City() + city.city = cityItem['city'] + city.confirmed = int(self.process_str(cityItem['confirmed'])) + city.died = int(self.process_str(cityItem.get('died','0'))) + city.crued = int(self.process_str(cityItem['crued'])) + city.confirmedRelative = int(self.process_str(cityItem['confirmedRelative'])) + city.curConfirm = int(self.process_str(cityItem.get('curConfirm','0'))) + city.pub_date = self.pub_date + city.province = province.area + province.subList.append(city) + + all_provinces.append(province) + return all_provinces + + + # 提取内容生成对象 + def fetch_page_datas(self): + all_provinces = self.fetch_province_datas() + return all_provinces + + # 业务函数 + def process_data(self): + html = self.fetch_html_page() + self.parse_target_page(html) + all_provinces = self.fetch_page_datas() + + # 保存省份数据 + self.db.save_province_datas(all_provinces) + + +# 创建Dataservice对象 +ds = DataService() +ds.process_data() \ No newline at end of file diff --git a/各字段对应属性.txt b/各字段对应属性.txt deleted file mode 100644 index f8f6ccc..0000000 --- a/各字段对应属性.txt +++ /dev/null @@ -1,31 +0,0 @@ -【summaryDataIn】 - -"confirmed": 累计确诊:%d\n -"confirmedRelative": 累计确诊增加:%d\n - -"curConfirm": 现有确诊:%d\n -"curConfirmRelative": 新增确诊(较昨日):%d\n - -"died": 累计死亡:%d\n -"diedRelative": 新增死亡:%d\n - -"cured": 累计治愈:%d\n -"curedRelative": 新增治愈:%d\n - -"asymptomatic": 累计无症状感染者:%d\n -"asymptomaticRelative": 新增无症状感染者:%d\n - -"unconfirmed": 现有疑似:%d\n -"unconfirmedRelative": 新增疑似:%d\n - -"icu": 现有重症:%d\n -"icuRelative": 新增重症:%d\n - -"overseasInput": 累计境外输入:%d\n -"overseasInputRelative": 新增境外输入:%d\n - -"unOverseasInputCumulative": 累计非境外输入的确诊:%d\n -"unOverseasInputNewAdd": 非境外输入的新增确诊:%d\n - -"relativeTime": 时间:%d\n -"icuDisable": ??? diff --git a/国内各省份以及地级市疫情数据.txt b/国内各省份以及地级市疫情数据.txt deleted file mode 100644 index 2039b97..0000000 --- a/国内各省份以及地级市疫情数据.txt +++ /dev/null @@ -1,120 +0,0 @@ -class Province: - def __init__(self): - self.confirmed = 0# - self.died = 0 - self.crued = 0 - self.confirmedRelative = 0 - self.diedRelative = 0 - self.curedRelative = 0 - self.asymptomaticRelative = 0 - self.asymptomatic = 0 - self.curConfirm = 0 - self.curConfirmRelative = 0 - self.area = '' - self.pub_date = '' - self.subList = [] - - def __str__(self): - return '%s\n省份: %s\n累计确诊: %s(较昨日: %+d)\n累计死亡: %d(较昨日: %+d)\n\ -累计治愈: %d(较昨日: %+d)\n现有确诊: %d(较昨日: %+d) \n无症状感染者: %d(较昨日: %+d)\n'% (self.pub_date,self.area, self.confirmed,self.confirmedRelative, - self.died,self.diedRelative,self.crued, self.curedRelative, - self.curConfirm,self.curConfirmRelative,self.asymptomatic,self.asymptomaticRelative ) - -class City(): - def __init__(self): - self.city = '' - self.confirmed = 0 - self.died = 0 - self.crued = 0 -# self.confirmedRelative = 0 - self.curConfirm = 0 - - def __str__(self): - return '%s\n累计确诊: %d\n累计死亡: %d\n累计治愈: \ -%d\n现存确诊: %d'% (self.city,self.confirmed, self.died, - self.crued,self.curConfirm) - -import requests -from bs4 import BeautifulSoup -import re -import json - -def getOrElse(a,key): - ret = '' - if a.get(key) != None: - ret = a[key] - else: - ret = '' - return ret - -def processStr(s): - ret = '' - if s==None or s == '': - ret = '0' - else: - ret = s - return ret - -def Print(a): - for p in a: - print(p) - if len(p.subList) != 0: - print('- - - - - - - - - - - - - - - - -' * 4) - print('省内各地级市:\n') - else: - pass - for c in p.subList: - print(c) - print() - print('---------------------------------' * 4) - - -res = requests.get('https://voice.baidu.com/act/newpneumonia/newpneumonia/') - -res = res.text - -soup = BeautifulSoup(res,'html.parser') - -tag = soup.find('script',attrs={'id':'captain-config'}) - -tagstr = tag.string - -tagjson = json.loads(tagstr) - -dateinfos = tagjson['component'] - -insidedateinfos = dateinfos[0]['caseList'] - -pub_date = tagjson['component'][0]['mapLastUpdatedTime'] - -all_provinces = [] -for item in insidedateinfos: - province = Province() - province.confirmed = int(processStr(getOrElse(item,'confirmed'))) - province.died = int(processStr(getOrElse(item,'died'))) - province.crued = int(processStr(getOrElse(item,'crued'))) - province.confirmedRelative = int(processStr(getOrElse(item,'confirmedRelative'))) - province.diedRelative = int(processStr(getOrElse(item,'diedRelative'))) - province.curedRelative = int(processStr(getOrElse(item,'curedRelative'))) - province.asymptomaticRelative = int(processStr(getOrElse(item,'asymptomaticRelative'))) - province.asymptomatic = int(processStr(getOrElse(item,'asymptomatic'))) - province.curConfirm = int(processStr(getOrElse(item,'curConfirm'))) - province.curConfirmRelative = int(processStr(getOrElse(item,'curConfirmRelative'))) - province.pub_date = pub_date - province.area = item['area'] - - sublist = item['subList'] - for cityitem in sublist: - city = City() - city.city = getOrElse(cityitem,'city') - city.confirmed = int(processStr(getOrElse(cityitem,'confirmed'))) - city.died = int(processStr(getOrElse(cityitem, 'died'))) - city.crued = int(processStr(getOrElse(cityitem,'crued'))) - city.confirmedRelative = int(processStr(getOrElse(cityitem, 'confirmedRelative'))) - city.curConfirm = int(processStr(getOrElse(cityitem,'curConfirm'))) - province.subList.append(city) - - all_provinces.append(province) - -Print(all_provinces) - \ No newline at end of file diff --git a/数据库实体类.txt b/数据库实体类.txt new file mode 100644 index 0000000..44b5370 --- /dev/null +++ b/数据库实体类.txt @@ -0,0 +1,40 @@ +import pymysql + +class MyDB: + def __init__(self, host, user, passwd, db): + self.conn = pymysql.connect(host, user, passwd, db) + self.cursor = self.conn.cursor() + + def get_province_list_tuple(self, all_provinces): + info_tuple = [] + for item in all_provinces: + info_tuple.append(item.get_info_tuple()) + return info_tuple + + # 保存省份数据 + def save_province_datas(self, all_provinces): + curdate = all_provinces[0].pub_date + sql = 'delete from province_daily_datas where pub_date like "%s"'%(curdate[:10] + '%') + + try: + self.cursor.execute(sql) + self.conn.commit() + except Exception as e: + print(e) + + + sql = 'insert into province_daily_datas(curConfirm,curConfirmRelative,confirmed,confirmedRelative,died,diedRelative,crued,curedRelative,area,asymptomatic,asymptomaticRelative,pub_date) values(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)' + res = self.get_province_list_tuple(all_provinces) + + print('+++ save_province_datas, data len: %d' % len(res)) + try: + self.cursor.executemany(sql, res) + self.conn.commit() + except Exception as e: + print(e) + print('+++ save_province_datas is over.') + + + def __del__(self): + if self.conn is not None: + self.conn.close() \ No newline at end of file diff --git a/省份和地级市的实体类.txt b/省份和地级市的实体类.txt new file mode 100644 index 0000000..dd52978 --- /dev/null +++ b/省份和地级市的实体类.txt @@ -0,0 +1,43 @@ +# 地级市实体类 +class City: + def __init__(self): + self.city = '' + self.confirmed = 0 + self.died = 0 + self.crued = 0 + self.confirmedRelative = 0 + self.curConfirm = 0 + self.pub_date = '' + self.province = '' + + def __str__(self): + return 'city: %s,confirmed: %d,died: %d,crued: %d,confirmedRelative: %d,curConfirm: %d,pub_date:%s' % (self.city, self.confirmed, self.died, self.crued, self.confirmedRelative, self.curConfirm, self.pub_date) + +# 国内省份疫情实体类 +class Province: + def __init__(self): + self.confirmed = 0 + self.died = 0 + self.crued = 0 + self.relativeTime = 0 + self.confirmedRelative = 0 + self.diedRelative = 0 + self.curedRelative = 0 + self.asymptomaticRelative = 0 + self.asymptomatic = 0 + self.curConfirm = 0 + self.curConfirmRelative = 0 + self.icuDisable = 0 + self.area = '' + self.pub_date = '' + self.subList = [] + + def __str__(self): + return 'area: %s, confirmed:%s, died: %d, crued: %d, relativeTime: %d, confirmedRelative: %d, diedRelative: %d, \ + curedRelative: %d, asymptomaticRelative: %d, curConfirm: %d, curConfirmRelative: %d, icuDisable: %d, pub_date: %s' % \ + (self.area, self.confirmed, self.died, self.crued, self.relativeTime, self.confirmedRelative, \ + self.diedRelative, self.curedRelative, self.asymptomaticRelative, self.curConfirm, \ + self.curConfirmRelative, self.icuDisable, self.pub_date) + + def get_info_tuple(self): + return ((self.curConfirm,self.curConfirmRelative,self.confirmed,self.confirmedRelative,self.died,self.diedRelative,self.crued,self.curedRelative,self.area,self.asymptomatic,self.asymptomaticRelative,self.pub_date)) \ No newline at end of file