|
|
from collections import Counter
|
|
|
|
|
|
from wordcloud import WordCloud
|
|
|
import matplotlib.pyplot as plt # 绘制图像的模块
|
|
|
import jieba # jieba分词
|
|
|
import numpy as np
|
|
|
from pylab import mpl
|
|
|
from os import listdir
|
|
|
from os.path import join, isfile, isdir
|
|
|
|
|
|
from matplotlib.font_manager import FontProperties
|
|
|
font = FontProperties(fname=r"FZYTK.TTF", size=14)
|
|
|
|
|
|
class ChooseDir:
|
|
|
def __init__(self,director):
|
|
|
# self.__director=director
|
|
|
self.__count=0
|
|
|
self.__path=[]
|
|
|
self.__listDir(director)
|
|
|
|
|
|
def __listDir(self,director):
|
|
|
|
|
|
for subPath in listdir(director):
|
|
|
|
|
|
path = join(director, subPath)
|
|
|
if isfile(path):
|
|
|
self.__count += 1
|
|
|
print(self.__count, '---', path)
|
|
|
self.__path.append(path)
|
|
|
elif isdir(path):
|
|
|
# print(path)
|
|
|
self.__listDir(path)
|
|
|
|
|
|
def choose_path(self):
|
|
|
selected_path=int(input('Select a path: '))
|
|
|
selected_path=self.__path[selected_path-1]
|
|
|
print('the path you selecting: ',selected_path)
|
|
|
return selected_path
|
|
|
|
|
|
|
|
|
class Myword:
|
|
|
def __init__(self, url):
|
|
|
self.url = url
|
|
|
|
|
|
def word_cloud(self):
|
|
|
f = open(self.url, 'r', encoding='utf8').read()
|
|
|
|
|
|
# 结巴分词,生成字符串,wordcloud无法直接生成正确的中文词云
|
|
|
cut_text = " ".join(jieba.cut(f))
|
|
|
# cut_text = jieba.cut(f)
|
|
|
cut_text_1=jieba.cut(f)
|
|
|
tongji = Counter(cut_text_1).most_common(20)
|
|
|
d = {key: value for (key, value) in tongji}
|
|
|
rem = [',', '、', '。', '的', '和', '\u3000', '图', '串', '“', '”', ' ', '与', '是', '端', '在', '中', '了', '\n']
|
|
|
for i in list(d.keys()):
|
|
|
if i in rem:
|
|
|
d.pop(i)
|
|
|
|
|
|
print(d)
|
|
|
label = list(d.keys())
|
|
|
y = list(d.values())
|
|
|
idx = np.arange(len(y))
|
|
|
barh = plt.barh(idx, y)
|
|
|
plt.bar_label(barh)
|
|
|
plt.yticks(idx + 0.4, label, fontproperties=font)
|
|
|
plt.xlabel('出现次数', fontsize=20, labelpad=5, fontproperties=font)
|
|
|
plt.ylabel('关键词', fontsize=20, labelpad=5, fontproperties=font)
|
|
|
plt.savefig('输出词频图标')
|
|
|
plt.show()
|
|
|
|
|
|
wordcloud = WordCloud(
|
|
|
# 设置字体,不然会出现口字乱码,文字的路径是电脑的字体一般路径,可以换成别的
|
|
|
font_path="FZYTK.TTF",
|
|
|
# 设置了背景,宽高
|
|
|
background_color="white", width=1000, height=880).generate(cut_text)
|
|
|
|
|
|
plt.imshow(wordcloud, interpolation="bilinear")
|
|
|
plt.axis("off")
|
|
|
plt.show()
|
|
|
wordcloud.to_file("词云图.png")
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
input_dir=input('input the absolute path of dir: ')
|
|
|
choose_dir = ChooseDir(input_dir)
|
|
|
url=choose_dir.choose_path()
|
|
|
s = Myword(url)
|
|
|
# s.statistics()
|
|
|
s.word_cloud() |