|
|
|
|
'''
|
|
|
|
|
生成词云图片
|
|
|
|
|
|
|
|
|
|
filePath词云所需的词语文本路径
|
|
|
|
|
maskImgPath蒙版图片路径,如果无蒙版图片则置为''
|
|
|
|
|
saveImgPath保存图片的路径
|
|
|
|
|
width生成图片的横向尺寸
|
|
|
|
|
height生成图片的纵向尺寸
|
|
|
|
|
save是否保存图片
|
|
|
|
|
'''
|
|
|
|
|
|
|
|
|
|
import jieba
|
|
|
|
|
import numpy as np
|
|
|
|
|
import matplotlib.pyplot as plt
|
|
|
|
|
from PIL import Image
|
|
|
|
|
from wordcloud import WordCloud, ImageColorGenerator
|
|
|
|
|
from sklearn.feature_extraction.text import TfidfVectorizer
|
|
|
|
|
|
|
|
|
|
# 将弹幕文本分隔成易于处理的字词
|
|
|
|
|
def ReadAndCutWords(filePath):
|
|
|
|
|
with open(filePath, 'r', encoding='utf-8') as file:
|
|
|
|
|
text = file.read()
|
|
|
|
|
words = jieba.cut(text, cut_all=False)
|
|
|
|
|
word_list = ' '.join(words)
|
|
|
|
|
return word_list
|
|
|
|
|
|
|
|
|
|
# 利用TF-IDF将字词按频率划分
|
|
|
|
|
def ChangeToFreq(word_list):
|
|
|
|
|
documents = [word_list]
|
|
|
|
|
vectorizer = TfidfVectorizer()
|
|
|
|
|
tfidf_matrix = vectorizer.fit_transform(documents)
|
|
|
|
|
feature_names = vectorizer.get_feature_names_out()
|
|
|
|
|
word_freq = dict(zip(feature_names, tfidf_matrix.toarray().sum(axis=0)))
|
|
|
|
|
return word_freq
|
|
|
|
|
|
|
|
|
|
# 根据字词频率来生成图云
|
|
|
|
|
def CreateWordCloud(word_freq, width, height, maskImgPath, saveImgPath, save=False):
|
|
|
|
|
if maskImgPath == '':
|
|
|
|
|
mask = None
|
|
|
|
|
else:
|
|
|
|
|
mask = np.array(Image.open(maskImgPath))
|
|
|
|
|
wordcloud = WordCloud(font_path='simhei.ttf',
|
|
|
|
|
mask= mask,
|
|
|
|
|
width=width,
|
|
|
|
|
height=height,
|
|
|
|
|
background_color='white').generate_from_frequencies(word_freq)
|
|
|
|
|
if maskImgPath != '':
|
|
|
|
|
image_colors = ImageColorGenerator(mask)
|
|
|
|
|
wordcloud.recolor(color_func=image_colors)
|
|
|
|
|
plt.imshow(wordcloud, interpolation='bilinear')
|
|
|
|
|
plt.axis('off')
|
|
|
|
|
plt.show()
|
|
|
|
|
if save:
|
|
|
|
|
wordcloud.to_file(saveImgPath)
|
|
|
|
|
|
|
|
|
|
def main():
|
|
|
|
|
filePath = './docs/allBarrage.txt'
|
|
|
|
|
maskImgPath = './docs/maskImg.png'
|
|
|
|
|
saveImgPath = './docs/wordCloud.png'
|
|
|
|
|
word_list = ReadAndCutWords(filePath)
|
|
|
|
|
word_freq = ChangeToFreq(word_list)
|
|
|
|
|
width = 1920
|
|
|
|
|
height = 1440
|
|
|
|
|
isSave = True
|
|
|
|
|
CreateWordCloud(word_freq, width, height, maskImgPath, saveImgPath, isSave)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
|
main()
|