You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

89 lines
2.9 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

from collections import Counter
from wordcloud import WordCloud
import matplotlib.pyplot as plt # 绘制图像的模块
import jieba # jieba分词
import numpy as np
from pylab import mpl
from os import listdir
from os.path import join, isfile, isdir
from matplotlib.font_manager import FontProperties
font = FontProperties(fname=r"FZYTK.TTF", size=14)
class ChooseDir:
def __init__(self,director):
# self.__director=director
self.__count=0
self.__path=[]
self.__listDir(director)
def __listDir(self,director):
for subPath in listdir(director):
path = join(director, subPath)
if isfile(path):
self.__count += 1
print(self.__count, '---', path)
self.__path.append(path)
elif isdir(path):
# print(path)
self.__listDir(path)
def choose_path(self):
selected_path=int(input('Select a path: '))
selected_path=self.__path[selected_path-1]
print('the path you selecting: ',selected_path)
return selected_path
class Myword:
def __init__(self, url):
self.url = url
def word_cloud(self):
f = open(self.url, 'r', encoding='utf8').read()
# 结巴分词生成字符串wordcloud无法直接生成正确的中文词云
cut_text = " ".join(jieba.cut(f))
# cut_text = jieba.cut(f)
cut_text_1=jieba.cut(f)
tongji = Counter(cut_text_1).most_common(20)
d = {key: value for (key, value) in tongji}
rem = ['', '', '', '', '', '\u3000', '', '', '', '', ' ', '', '', '', '', '', '', '\n']
for i in list(d.keys()):
if i in rem:
d.pop(i)
print(d)
label = list(d.keys())
y = list(d.values())
idx = np.arange(len(y))
barh = plt.barh(idx, y)
plt.bar_label(barh)
plt.yticks(idx + 0.4, label, fontproperties=font)
plt.xlabel('出现次数', fontsize=20, labelpad=5, fontproperties=font)
plt.ylabel('关键词', fontsize=20, labelpad=5, fontproperties=font)
plt.savefig('输出词频图标')
plt.show()
wordcloud = WordCloud(
# 设置字体,不然会出现口字乱码,文字的路径是电脑的字体一般路径,可以换成别的
font_path="FZYTK.TTF",
# 设置了背景,宽高
background_color="white", width=1000, height=880).generate(cut_text)
plt.imshow(wordcloud, interpolation="bilinear")
plt.axis("off")
plt.show()
wordcloud.to_file("词云图.png")
if __name__ == '__main__':
input_dir=input('input the absolute path of dir: ')
choose_dir = ChooseDir(input_dir)
url=choose_dir.choose_path()
s = Myword(url)
# s.statistics()
s.word_cloud()