You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

65 lines
2.0 KiB

3 years ago
import csv
import requests
import re, os
from bs4 import BeautifulSoup
url = ' https://mp.weixin.qq.com/s/K0u_qPFQtWuH4hk5K2xWfQ'
response = requests.get(url)
response.encoding = response.apparent_encoding
response.encoding = 'utf-8'
html = response.text
soup = BeautifulSoup(html, 'html.parser')
ans = soup.select('div.rich_media > div.rich_media_inner ')
ans1 = ans[0].text.encode()
def openreadtxt(file_name):
data = []
file = open(file_name, 'r', encoding='utf-8') # 打开文件
file_data = file.readlines() # 读取所有行
for row in file_data:
tmp_list = row.split(' ') # 按‘,’切分每行的数据
tmp_list[-1] = tmp_list[-1].replace('\n', '') # 去掉换行符
data.append(tmp_list) # 将每行数据插入data中
file.close()
return data
def updateFile(file, old_str, new_str):
with open(file, "r", encoding="utf-8") as f1, open("%s.bak" % file, "w", encoding="utf-8") as f2:
for line in f1:
f2.write(re.sub(old_str, new_str, line))
os.remove(file)
os.rename("%s.bak" % file, file)
f2.close()
f1.close()
updateFile(r"test.txt", "", "")
updateFile(r"test.txt", "", ":")
updateFile(r"test.txt", "-", "--")
updateFile(r"test.txt", "确诊病例", "")
updateFile(r"test.txt", "病例轨迹", "")
updateFile(r"test.txt", "病例", "&&病例")
updateFile(r"test.txt", "呼和浩特市应对新型冠状病毒感染", "end")
data = openreadtxt('test.txt')
data = str(data)
result = re.findall("病例\d:.*?(?=&&|end)", data)
name = []
date = []
time = []
through = []
Num = 0
for i in result:
NUM = 0
3 years ago
f = open('20.txt', 'w', encoding="utf-8")
f.write(i)
f.close()
data = openreadtxt("20.txt")
data = str(data)
name1 = re.findall("病例\d", data)
getOne = re.findall("\d{1,2}月\d{1,2}?日.*?。+(?=\d{1,2}月\d{1,2}日)|\d{1,2}月\d{1,2}?日.*?。+(?=病例)", data)
numname = 0
for i in getOne:
NUM += 1
3 years ago
f = open('10.txt', 'w', encoding="utf-8")
f.write(i)
f.close()