diff --git a/barrage/wordCloud.py b/barrage/wordCloud.py new file mode 100644 index 0000000..233fb9a --- /dev/null +++ b/barrage/wordCloud.py @@ -0,0 +1,66 @@ +''' +生成词云图片 + +filePath词云所需的词语文本路径 +maskImgPath蒙版图片路径,如果无蒙版图片则置为'' +saveImgPath保存图片的路径 +width生成图片的横向尺寸 +height生成图片的纵向尺寸 +save是否保存图片 +''' + +import jieba +import numpy as np +import matplotlib.pyplot as plt +from PIL import Image +from wordcloud import WordCloud, ImageColorGenerator +from sklearn.feature_extraction.text import TfidfVectorizer + +def ReadAndCutWords(filePath): + with open(filePath, 'r', encoding='utf-8') as file: + text = file.read() + words = jieba.cut(text, cut_all=False) + word_list = ' '.join(words) + return word_list + +def ChangeToFreq(word_list): + documents = [word_list] + vectorizer = TfidfVectorizer() + tfidf_matrix = vectorizer.fit_transform(documents) + feature_names = vectorizer.get_feature_names_out() + word_freq = dict(zip(feature_names, tfidf_matrix.toarray().sum(axis=0))) + return word_freq + +def CreateWordCloud(word_freq, width, height, maskImgPath, saveImgPath, save=False): + if maskImgPath == '': + mask = None + else: + mask = np.array(Image.open(maskImgPath)) + wordcloud = WordCloud(font_path='simhei.ttf', + mask= mask, + width=width, + height=height, + background_color='white').generate_from_frequencies(word_freq) + if maskImgPath != '': + image_colors = ImageColorGenerator(mask) + wordcloud.recolor(color_func=image_colors) + plt.imshow(wordcloud, interpolation='bilinear') + plt.axis('off') + plt.show() + if save: + wordcloud.to_file(saveImgPath) + +def main(): + filePath = './docs/allBarrage.txt' + maskImgPath = './docs/maskImg.png' + saveImgPath = './docs/wordCloud.png' + word_list = ReadAndCutWords(filePath) + word_freq = ChangeToFreq(word_list) + width = 1920 + height = 1440 + isSave = True + CreateWordCloud(word_freq, width, height, maskImgPath, saveImgPath, isSave) + + +if __name__ == '__main__': + main() diff --git a/docs/wordCloud.png b/docs/wordCloud.png new file mode 100644 index 0000000..27c6625 Binary files /dev/null and b/docs/wordCloud.png differ