from collections import Counter

from wordcloud import WordCloud
import matplotlib.pyplot as plt  # 绘制图像的模块
import jieba  # jieba分词
import numpy as np
from pylab import mpl
from os import listdir
from os.path import join, isfile, isdir

from matplotlib.font_manager import FontProperties
font = FontProperties(fname=r"FZYTK.TTF", size=14)

class ChooseDir:
    def __init__(self,director):
        # self.__director=director
        self.__count=0
        self.__path=[]
        self.__listDir(director)

    def __listDir(self,director):

        for subPath in listdir(director):

            path = join(director, subPath)
            if isfile(path):
                self.__count += 1
                print(self.__count, '---', path)
                self.__path.append(path)
            elif isdir(path):
                # print(path)
                self.__listDir(path)

    def choose_path(self):
        selected_path=int(input('Select a path: '))
        selected_path=self.__path[selected_path-1]
        print('the path you selecting: ',selected_path)
        return selected_path


class Myword:
    def __init__(self, url):
        self.url = url

    def word_cloud(self):
        f = open(self.url, 'r', encoding='utf8').read()

        # 结巴分词，生成字符串，wordcloud无法直接生成正确的中文词云
        cut_text = " ".join(jieba.cut(f))
        # cut_text = jieba.cut(f)
        cut_text_1=jieba.cut(f)
        tongji = Counter(cut_text_1).most_common(20)
        d = {key: value for (key, value) in tongji}
        rem = ['，', '、', '。', '的', '和', '\u3000', '图', '串', '“', '”', ' ', '与', '是', '端', '在', '中', '了', '\n']
        for i in list(d.keys()):
            if i in rem:
                d.pop(i)

        print(d)
        label = list(d.keys())
        y = list(d.values())
        idx = np.arange(len(y))
        barh = plt.barh(idx, y)
        plt.bar_label(barh)
        plt.yticks(idx + 0.4, label, fontproperties=font)
        plt.xlabel('出现次数', fontsize=20, labelpad=5, fontproperties=font)
        plt.ylabel('关键词', fontsize=20, labelpad=5, fontproperties=font)
        plt.savefig('输出词频图标')
        plt.show()

        wordcloud = WordCloud(
            # 设置字体，不然会出现口字乱码，文字的路径是电脑的字体一般路径，可以换成别的
            font_path="FZYTK.TTF",
            # 设置了背景，宽高
            background_color="white", width=1000, height=880).generate(cut_text)

        plt.imshow(wordcloud, interpolation="bilinear")
        plt.axis("off")
        plt.show()
        wordcloud.to_file("词云图.png")


if __name__ == '__main__':
    input_dir=input('input the absolute path of dir: ')
    choose_dir = ChooseDir(input_dir)
    url=choose_dir.choose_path()
    s = Myword(url)
    # s.statistics()
    s.word_cloud()