From c6eb5471bbf1f21c7ac5d99f6041f038771a9da8 Mon Sep 17 00:00:00 2001 From: ppco9heus <3100279293@qq.com> Date: Sun, 24 Apr 2022 10:38:09 +0800 Subject: [PATCH] Delete 'getdata.py' --- getdata.py | 99 ------------------------------------------------------ 1 file changed, 99 deletions(-) delete mode 100644 getdata.py diff --git a/getdata.py b/getdata.py deleted file mode 100644 index bf45d2d..0000000 --- a/getdata.py +++ /dev/null @@ -1,99 +0,0 @@ -# -*- coding: utf-8 -*- -from bs4 import BeautifulSoup -import re -import urllib.request, urllib.error -import csv - -findbd = re.compile(r'(.*?)
', re.S) -finddate = re.compile(r'(\d{1,2}月\d{1,2}日|\d{1,2}月\d{1,2}月)') -findtime = re.compile(r'(\d{1,2}:\d{2}-\d{1,2}:\d{2}|\d{1,2}:\d{2}-\d{1,2}:\d{2}|\d{1,2}:\d{2}-\d{1,2}:\d{2}|\d{1,2}:\d{2}|\d{1,2}:\d{2})') -findevent = re.compile(r'(\d{1,2}:\d{2}-\d{1,2}:\d{2}.*?;|\d{1,2}:\d{2}-\d{1,2}:\d{2}.*?;|\d{1,2}:\d{2}-\d{1,2}:\d{2}.*?;|\d{1,2}:\d{2}.*?;|\d{1,2}:\d{2}.*?;|\d{1,2}:\d{2}-\d{1,2}:\d{2}.*?。|\d{1,2}:\d{2}-\d{1,2}:\d{2}.*?。|\d{1,2}:\d{2}.*?。|\d{1,2}:\d{2}.*?。)') - -def main(): - baseurl = "https://mp.weixin.qq.com/s/K0u_qPFQtWuH4hk5K2xWfQ" - datalist = getData(baseurl) - saveData(datalist) - -def getData(baseurl): - datalist=[] - data=[] - case=[[''],['病例1:'],['病例2:'],['病例3:']] - j=0 - url = baseurl - html = askURL(url) - soup = BeautifulSoup(html, "html.parser") - rich=soup.find_all('div',id="js_content") - rich=str(rich) - bd = re.findall(findbd,rich) - x=len(bd) - for i in range(0,x): - date=re.findall(finddate,bd[i]) - data.append(date) - time=re.findall(findtime,bd[i]) - if len(time)==0: - j=j+1 - if j<=3: - data.append(case[j]) - data.append(time) - event=re.findall(findevent,bd[i]) - x=len(event) - for i in range(0,x): - event[i]=re.sub('-',"",event[i]) - event[i]=re.sub('\d{1,2}',"",event[i]) - event[i]=re.sub(':',"",event[i]) - event[i]=re.sub(':',"",event[i]) - event[i]=re.sub(';',"",event[i]) - event[i]=re.sub('。',"",event[i]) - event[i].replace(" ", "") - data.append(event) - datalist.append(data) - data=[] - return datalist - -def askURL(url): - head = { - "User-Agent": "Mozilla / 5.0(Windows NT 10.0; Win64; x64) AppleWebKit / 537.36(KHTML, like Gecko) Chrome / 80.0.3987.122 Safari / 537.36" - } - request = urllib.request.Request(url, headers=head) - html = "" - try: - response = urllib.request.urlopen(request) - html = response.read().decode("utf-8") - except urllib.error.URLError as e: - if hasattr(e, "code"): - print(e.code) - if hasattr(e, "reason"): - print(e.reason) - return html - -def saveData(datalist): - x=len(datalist) - tcmp=[] - for i in range(0,x-3): - temp=[] - ab=datalist[i] - j=0 - if ab[3]: - a=ab[0][0] - b=ab[1][0] - c=ab[2] - d=ab[3] - y=len(d) - for item in c: - temp=[] - temp.append(b) - temp.append(a) - temp.append(a) - temp.append(item) - temp.append(d[j]) - if j