# -*- coding: utf-8 -*- import os import jieba def Create_path(): ''' 当前目录下创建一个新目录 Result 存放 Jieba.txt , Sort.txt , 词云图 :return: 创建的目录 ''' try: newPath = os.getcwd() + "\\" + 'Result' + "\\" if not os.path.exists(newPath): os.makedirs(newPath) return newPath except BaseException as msg: print("新建目录失败:" + msg) def Jieba(fileName,filePath): ''' 调用jieba库,对输入文件进行分词,结果保存至Jieba.txt文件 :param fileName: 传入文件的名字 :param filePath: 结果要保存到的目录 :return: Jieba.txt文件 ''' with open(fileName, 'r', encoding='utf-8') as f: cutWord = jieba.cut(f.read()) result = " ".join(cutWord) result = result.replace(' : ', ' ') result = result.replace(' ; ', ' ') result = result.replace(' ! ', ' ') result = result.replace(' 、 ', ' ') result = result.replace(' ” ', ' ') result = result.replace(' “ ', ' ') result = result.replace(' — ', ' ') result = result.replace(' — ', ' ') result = result.replace(' - ', ' ') result = result.replace(' , ', ' ') result = result.replace(' . ', ' ') result = result.replace(' , ', ' ') result = result.replace(' 。 ', ' ') result = result.replace(' 》 ', ' ') result = result.replace(' 《 ', ' ') result = result.replace(' ) ', ' ') result = result.replace(' ( ', ' ') result = result.replace(' ', ' ') os.chdir(filePath) # 修改当前目录 fp = open('Jieba.txt', 'w', encoding='utf-8') fp.write(result) fp.close() def Turn(): ''' 将 Jieba_txt文件 分组转化为列表 :return: 结果列表 wordList ''' with open('Jieba.txt', 'r', encoding='utf-8') as f: wordList = [] for wordStr in f.readlines(): wordStr = wordStr.strip() wordLine = wordStr.split(' ') wordList.extend(wordLine) return wordList def Account(wordList): ''' 统计 wordList 的词频 :param wordList: 待统计的列表 :return: 统计的结果 ''' accountDict = {} for i in wordList: accountDict[i] = wordList.count(i) return accountDict def Sort(accountDict): ''' 对字典进行排序,并保存到Sort.txt中 :param accountDict: 排序前的字典 :return: 排序后的字典 ''' sortDict = sorted(accountDict.items(), key=lambda d: d[1], reverse=True) sortDict = dict(sortDict) Print_sortDict(sortDict) clearStr = str(sortDict) clearStr = clearStr.replace('\'','') fp = open('Sort.txt', 'w', encoding='utf-8') fp.write(clearStr) fp.close() def Print_sortDict(sortDict): ''' 打印统计排序的结果 :param sortDict: 排序后的字典 ''' i = 0 print('\n======统计结果:======') for x, y in sortDict.items(): if i < len(sortDict): print('(\'%s\',%s)'%(x,y)) i += 1 continue else: break