from collections import Counter from wordcloud import WordCloud import matplotlib.pyplot as plt # 绘制图像的模块 import jieba # jieba分词 import numpy as np from pylab import mpl from os import listdir from os.path import join, isfile, isdir from matplotlib.font_manager import FontProperties font = FontProperties(fname=r"FZYTK.TTF", size=14) class ChooseDir: def __init__(self,director): # self.__director=director self.__count=0 self.__path=[] self.__listDir(director) def __listDir(self,director): for subPath in listdir(director): path = join(director, subPath) if isfile(path): self.__count += 1 print(self.__count, '---', path) self.__path.append(path) elif isdir(path): # print(path) self.__listDir(path) def choose_path(self): selected_path=int(input('Select a path: ')) selected_path=self.__path[selected_path-1] print('the path you selecting: ',selected_path) return selected_path class Myword: def __init__(self, url): self.url = url def word_cloud(self): f = open(self.url, 'r', encoding='utf8').read() # 结巴分词,生成字符串,wordcloud无法直接生成正确的中文词云 cut_text = " ".join(jieba.cut(f)) # cut_text = jieba.cut(f) cut_text_1=jieba.cut(f) tongji = Counter(cut_text_1).most_common(20) d = {key: value for (key, value) in tongji} rem = [',', '、', '。', '的', '和', '\u3000', '图', '串', '“', '”', ' ', '与', '是', '端', '在', '中', '了', '\n'] for i in list(d.keys()): if i in rem: d.pop(i) print(d) label = list(d.keys()) y = list(d.values()) idx = np.arange(len(y)) barh = plt.barh(idx, y) plt.bar_label(barh) plt.yticks(idx + 0.4, label, fontproperties=font) plt.xlabel('出现次数', fontsize=20, labelpad=5, fontproperties=font) plt.ylabel('关键词', fontsize=20, labelpad=5, fontproperties=font) plt.savefig('输出词频图标') plt.show() wordcloud = WordCloud( # 设置字体,不然会出现口字乱码,文字的路径是电脑的字体一般路径,可以换成别的 font_path="FZYTK.TTF", # 设置了背景,宽高 background_color="white", width=1000, height=880).generate(cut_text) plt.imshow(wordcloud, interpolation="bilinear") plt.axis("off") plt.show() wordcloud.to_file("词云图.png") if __name__ == '__main__': input_dir=input('input the absolute path of dir: ') choose_dir = ChooseDir(input_dir) url=choose_dir.choose_path() s = Myword(url) # s.statistics() s.word_cloud()