You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

112 lines
3.2 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

# -*- coding: utf-8 -*-
import os
import jieba
def Create_path():
'''
当前目录下创建一个新目录 Result 存放 Jieba.txt , Sort.txt , 词云图
:return: 创建的目录
'''
try:
newPath = os.getcwd() + "\\" + 'Result' + "\\"
if not os.path.exists(newPath):
os.makedirs(newPath)
return newPath
except BaseException as msg:
print("新建目录失败:" + msg)
def Jieba(fileName,filePath):
'''
调用jieba库对输入文件进行分词,结果保存至Jieba.txt文件
:param fileName: 传入文件的名字
:param filePath: 结果要保存到的目录
:return: Jieba.txt文件
'''
with open(fileName, 'r', encoding='utf-8') as f:
cutWord = jieba.cut(f.read())
result = " ".join(cutWord)
result = result.replace(' ', ' ')
result = result.replace(' ', ' ')
result = result.replace(' ', ' ')
result = result.replace('', ' ')
result = result.replace('', ' ')
result = result.replace('', ' ')
result = result.replace('', ' ')
result = result.replace('', ' ')
result = result.replace(' - ', ' ')
result = result.replace(' , ', ' ')
result = result.replace(' . ', ' ')
result = result.replace(' ', ' ')
result = result.replace('', ' ')
result = result.replace('', ' ')
result = result.replace('', ' ')
result = result.replace(' ', ' ')
result = result.replace(' ', ' ')
result = result.replace(' ', ' ')
os.chdir(filePath) # 修改当前目录
fp = open('Jieba.txt', 'w', encoding='utf-8')
fp.write(result)
fp.close()
def Turn():
'''
将 Jieba_txt文件 分组转化为列表
:return: 结果列表 wordList
'''
with open('Jieba.txt', 'r', encoding='utf-8') as f:
wordList = []
for wordStr in f.readlines():
wordStr = wordStr.strip()
wordLine = wordStr.split(' ')
wordList.extend(wordLine)
return wordList
def Account(wordList):
'''
统计 wordList 的词频
:param wordList: 待统计的列表
:return: 统计的结果
'''
accountDict = {}
for i in wordList:
accountDict[i] = wordList.count(i)
return accountDict
def Sort(accountDict):
'''
对字典进行排序,并保存到Sort.txt中
:param accountDict: 排序前的字典
:return: 排序后的字典
'''
sortDict = sorted(accountDict.items(), key=lambda d: d[1], reverse=True)
sortDict = dict(sortDict)
Print_sortDict(sortDict)
clearStr = str(sortDict)
clearStr = clearStr.replace('\'','')
fp = open('Sort.txt', 'w', encoding='utf-8')
fp.write(clearStr)
fp.close()
def Print_sortDict(sortDict):
'''
打印统计排序的结果
:param sortDict: 排序后的字典
'''
i = 0
print('\n======统计结果:======')
for x, y in sortDict.items():
if i < len(sortDict):
print('(\'%s\',%s)'%(x,y))
i += 1
continue
else:
break