diff --git a/002文件处理.py b/002文件处理.py new file mode 100644 index 0000000..e879bfe --- /dev/null +++ b/002文件处理.py @@ -0,0 +1,22 @@ +# @Time : 2021/11/1 22:47 +# @Author :wenkaic +# @File : 002文件处理 +# @Project : python爬虫 + +import re +def find_chinese(file): + pattern = re.compile(r'[^\u4e00-\u9fa5\n\\,\\:\\。\\!]') + chinese = re.sub(pattern, '', file) + chinese = re.sub('\n+','\n',chinese) + chinese = re.sub('章','章 ',chinese) + chinese = re.sub('第',' 第',chinese) + + print(chinese) + return chinese + +fp = open('003斗破苍穹.json', 'r', encoding='utf-8') +content=fp.read() +content1=find_chinese(content) +fp1 = open('004斗破苍穹.txt','w',encoding='utf-8') +fp1.write(content1) +fp1.close() \ No newline at end of file