import requests from bs4 import BeautifulSoup import re import json import time import pymysql class Outside: def __init__(self): self.confirmed = 0 self.died = 0 self.crued = 0 self.relativeTime = 0 self.confirmedRelative = 0 #self.asymptomatic = 0 self.curConfirm = 0 self.icuDisable = 0 self.area = '' self.subList = [] def __str__(self): return '地区: %s\t累计确诊:%s \t死亡: %d \t治愈: %d \t时间: %s \t新增: %s\t 现有: %d' % (self.area,self.confirmed, self.died, self.crued, self.relativeTime, self.confirmedRelative, self.curConfirm, ) def get_db_tuple(self): return (self.confirmed,self.died,self.crued,self.relativeTime,self.confirmedRelative,self.curConfirm,self.icuDisable,self.area) def delete_sql(self,con): cal=self.relativeTime sql=""" delete from outsideCountry where area='%s' and relativeTime like '%s' """%(self.area,cal) try: cursor=con.cursor() cursor.execute(sql) except Exception as e: print(e) print('delete outside country failed.') else: print('delete outside country successfully') def insert_sql(self,con): self.delete_sql(con) sql="""insert into outsideCountry( confirmed,died,crued,relativeTime, confirmedRelative,curConfirm,icuDisable,area) values(%s,%s,%s,%s,%s,%s,%s,%s) """ try: con.cursor().execute(sql,self.get_db_tuple()) con.commit() except Exception as e: print(e) print('Insert outsideCountry failed.') else: print('Inserted outsideCountry successfully') class City: def __init__(self): self.confirmed = 0 self.died = 0 self.crued = 0 self.curConfirm = 0 self.city = '' def __str__(self): return '城市:%s\t累计确诊:%d\t死亡:%d\t治愈:%d\t现有:%d'%(self.city,self.confirmed,self.died,self.crued,self.curConfirm) def getOrElse(target, key): ret = '' if target.get(key) != None: ret = target[key] else: ret = '' return ret def processStr(s): ret = [] if s==None or s == '': ret = "0" else: ret = s return ret # 打印信息 def print_all_infos(infos): for p in infos: print(p) for c in p.subList: print(c) print('+++++++++++++++++++++++++++++++++' * 3) #爬取页面 def getouside(): res=requests.get('https://voice.baidu.com/act/newpneumonia/newpneumonia') # res = res.content.decode('utf-8') res=res.text soup = BeautifulSoup(res,'html.parser') #print(soup) tag = soup.find('script',attrs={'id':'captain-config'}) tagstr=tag.string tagJson=json.loads(tagstr) alleDataInfos = tagJson['component'] # results = re.findall('(\{"died".*?"subList".*?\})',tagstr) # print(len(results)) # print(results) outsideDataInfos = alleDataInfos[0]['caseOutsideList'] all_outside = [] for item in outsideDataInfos: outside = Outside() outside.confirmed = int(item['confirmed']) outside.died = int(item['died']) outside.crued = int(item['crued']) outside.relativeTime = time.strftime("%Y-%m-%d", time.localtime( int(item['relativeTime']))) outside.confirmedRelative = int(item['confirmedRelative']) #self.asymptomatic = 0 outside.curConfirm = int(item['curConfirm']) outside.area = item['area'] sublist = item['subList'] for cityitem in sublist: city = City() city.city = getOrElse(cityitem, 'city') city.confirmed = int(processStr(getOrElse(cityitem, 'confirmed'))) city.died = int(processStr(getOrElse(cityitem, 'died'))) city.crued = int(processStr(getOrElse(cityitem, 'crued'))) city.curConfirm = int(processStr(getOrElse(cityitem, 'curConfirm'))) outside.subList.append(city) all_outside.append(outside) return all_outside def printStart(): all_outside=(getouside()) print_all_infos(all_outside) def mysqlStart(con): all_outside=getouside() for c in all_outside: c.insert_sql(con)