You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

112 lines
3.2 KiB

# -*- coding: utf-8 -*-
import os
import jieba
def Create_path():
'''
当前目录下创建一个新目录 Result 存放 Jieba.txt , Sort.txt , 词云图
:return: 创建的目录
'''
try:
newPath = os.getcwd() + "\\" + 'Result' + "\\"
if not os.path.exists(newPath):
os.makedirs(newPath)
return newPath
except BaseException as msg:
print("新建目录失败:" + msg)
def Jieba(fileName,filePath):
'''
调用jieba库对输入文件进行分词,结果保存至Jieba.txt文件
:param fileName: 传入文件的名字
:param filePath: 结果要保存到的目录
:return: Jieba.txt文件
'''
with open(fileName, 'r', encoding='utf-8') as f:
cutWord = jieba.cut(f.read())
result = " ".join(cutWord)
result = result.replace(' ', ' ')
result = result.replace(' ', ' ')
result = result.replace(' ', ' ')
result = result.replace('', ' ')
result = result.replace('', ' ')
result = result.replace('', ' ')
result = result.replace('', ' ')
result = result.replace('', ' ')
result = result.replace(' - ', ' ')
result = result.replace(' , ', ' ')
result = result.replace(' . ', ' ')
result = result.replace(' ', ' ')
result = result.replace('', ' ')
result = result.replace('', ' ')
result = result.replace('', ' ')
result = result.replace(' ', ' ')
result = result.replace(' ', ' ')
result = result.replace(' ', ' ')
os.chdir(filePath) # 修改当前目录
fp = open('Jieba.txt', 'w', encoding='utf-8')
fp.write(result)
fp.close()
def Turn():
'''
Jieba_txt文件 分组转化为列表
:return: 结果列表 wordList
'''
with open('Jieba.txt', 'r', encoding='utf-8') as f:
wordList = []
for wordStr in f.readlines():
wordStr = wordStr.strip()
wordLine = wordStr.split(' ')
wordList.extend(wordLine)
return wordList
def Account(wordList):
'''
统计 wordList 的词频
:param wordList: 待统计的列表
:return: 统计的结果
'''
accountDict = {}
for i in wordList:
accountDict[i] = wordList.count(i)
return accountDict
def Sort(accountDict):
'''
对字典进行排序,并保存到Sort.txt中
:param accountDict: 排序前的字典
:return: 排序后的字典
'''
sortDict = sorted(accountDict.items(), key=lambda d: d[1], reverse=True)
sortDict = dict(sortDict)
Print_sortDict(sortDict)
clearStr = str(sortDict)
clearStr = clearStr.replace('\'','')
fp = open('Sort.txt', 'w', encoding='utf-8')
fp.write(clearStr)
fp.close()
def Print_sortDict(sortDict):
'''
打印统计排序的结果
:param sortDict: 排序后的字典
'''
i = 0
print('\n======统计结果:======')
for x, y in sortDict.items():
if i < len(sortDict):
print('(\'%s\',%s)'%(x,y))
i += 1
continue
else:
break