|
|
@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
# @Time : 2021/11/1 22:47
|
|
|
|
|
|
|
|
# @Author :wenkaic
|
|
|
|
|
|
|
|
# @File : 002文件处理
|
|
|
|
|
|
|
|
# @Project : python爬虫
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import re
|
|
|
|
|
|
|
|
def find_chinese(file):
|
|
|
|
|
|
|
|
pattern = re.compile(r'[^\u4e00-\u9fa5\n\\,\\:\\。\\!]')
|
|
|
|
|
|
|
|
chinese = re.sub(pattern, '', file)
|
|
|
|
|
|
|
|
chinese = re.sub('\n+','\n',chinese)
|
|
|
|
|
|
|
|
chinese = re.sub('章','章 ',chinese)
|
|
|
|
|
|
|
|
chinese = re.sub('第',' 第',chinese)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
print(chinese)
|
|
|
|
|
|
|
|
return chinese
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
fp = open('003斗破苍穹.json', 'r', encoding='utf-8')
|
|
|
|
|
|
|
|
content=fp.read()
|
|
|
|
|
|
|
|
content1=find_chinese(content)
|
|
|
|
|
|
|
|
fp1 = open('004斗破苍穹.txt','w',encoding='utf-8')
|
|
|
|
|
|
|
|
fp1.write(content1)
|
|
|
|
|
|
|
|
fp1.close()
|