You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

89 lines
2.9 KiB

3 years ago
from collections import Counter
from wordcloud import WordCloud
import matplotlib.pyplot as plt # 绘制图像的模块
import jieba # jieba分词
import numpy as np
from pylab import mpl
from os import listdir
from os.path import join, isfile, isdir
from matplotlib.font_manager import FontProperties
font = FontProperties(fname=r"FZYTK.TTF", size=14)
class ChooseDir:
def __init__(self,director):
# self.__director=director
self.__count=0
self.__path=[]
self.__listDir(director)
def __listDir(self,director):
for subPath in listdir(director):
path = join(director, subPath)
if isfile(path):
self.__count += 1
print(self.__count, '---', path)
self.__path.append(path)
elif isdir(path):
# print(path)
self.__listDir(path)
def choose_path(self):
selected_path=int(input('Select a path: '))
selected_path=self.__path[selected_path-1]
print('the path you selecting: ',selected_path)
return selected_path
class Myword:
def __init__(self, url):
self.url = url
def word_cloud(self):
f = open(self.url, 'r', encoding='utf8').read()
# 结巴分词生成字符串wordcloud无法直接生成正确的中文词云
cut_text = " ".join(jieba.cut(f))
# cut_text = jieba.cut(f)
cut_text_1=jieba.cut(f)
tongji = Counter(cut_text_1).most_common(20)
d = {key: value for (key, value) in tongji}
rem = ['', '', '', '', '', '\u3000', '', '', '', '', ' ', '', '', '', '', '', '', '\n']
for i in list(d.keys()):
if i in rem:
d.pop(i)
print(d)
label = list(d.keys())
y = list(d.values())
idx = np.arange(len(y))
barh = plt.barh(idx, y)
plt.bar_label(barh)
plt.yticks(idx + 0.4, label, fontproperties=font)
plt.xlabel('出现次数', fontsize=20, labelpad=5, fontproperties=font)
plt.ylabel('关键词', fontsize=20, labelpad=5, fontproperties=font)
plt.savefig('输出词频图标')
plt.show()
wordcloud = WordCloud(
# 设置字体,不然会出现口字乱码,文字的路径是电脑的字体一般路径,可以换成别的
font_path="FZYTK.TTF",
# 设置了背景,宽高
background_color="white", width=1000, height=880).generate(cut_text)
plt.imshow(wordcloud, interpolation="bilinear")
plt.axis("off")
plt.show()
wordcloud.to_file("词云图.png")
if __name__ == '__main__':
input_dir=input('input the absolute path of dir: ')
choose_dir = ChooseDir(input_dir)
url=choose_dir.choose_path()
s = Myword(url)
# s.statistics()
s.word_cloud()