From 515c6878279a92418dafbd6d7dc7a63e37a8d0ae Mon Sep 17 00:00:00 2001 From: Qw37tgf5k <1879847326@qq.com> Date: Wed, 3 Nov 2021 17:00:58 +0800 Subject: [PATCH] =?UTF-8?q?=E6=95=B0=E6=8D=AE=E5=A4=84=E7=90=86=EF=BC=8C?= =?UTF-8?q?=E8=BF=94=E5=9B=9Etxt=E6=96=87=E4=BB=B6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- 002文件处理.py | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) create mode 100644 002文件处理.py diff --git a/002文件处理.py b/002文件处理.py new file mode 100644 index 0000000..e879bfe --- /dev/null +++ b/002文件处理.py @@ -0,0 +1,22 @@ +# @Time : 2021/11/1 22:47 +# @Author :wenkaic +# @File : 002文件处理 +# @Project : python爬虫 + +import re +def find_chinese(file): + pattern = re.compile(r'[^\u4e00-\u9fa5\n\\,\\:\\。\\!]') + chinese = re.sub(pattern, '', file) + chinese = re.sub('\n+','\n',chinese) + chinese = re.sub('章','章 ',chinese) + chinese = re.sub('第',' 第',chinese) + + print(chinese) + return chinese + +fp = open('003斗破苍穹.json', 'r', encoding='utf-8') +content=fp.read() +content1=find_chinese(content) +fp1 = open('004斗破苍穹.txt','w',encoding='utf-8') +fp1.write(content1) +fp1.close() \ No newline at end of file