import collections import matplotlib.pyplot as plt from wordcloud import WordCloud from PIL import Image import numpy as np # 读取弹幕文件 def read_comments(file_path): with open(file_path, 'r', encoding='utf-8') as file: comments = file.readlines() # 移除每条弹幕的换行符 cleaned_comments = [comment.strip() for comment in comments] return cleaned_comments # 统计词频 def count_words(comments): word_counter = collections.Counter() for comment in comments: words = comment.split() # 简单分词,按空格切分词语 word_counter.update(words) return word_counter # 生成词云图 def generate_wordcloud(word_frequencies, output_file, mask_image=None): wordcloud = WordCloud( width=1600, # 提高图像分辨率 height=800, # 提高图像分辨率 background_color='white', # 设置背景为白色 max_words=150, # 减少最大词数,避免文字重叠 min_font_size=10, # 确保最小字体大小不会太小 relative_scaling=0.5, # 调整字体大小比例 colormap='cool', # 使用冷色调配色方案 mask=mask_image, # 可选:使用自定义形状的mask contour_width=3, # 设置轮廓宽度 contour_color='steelblue', # 轮廓颜色 font_path='C:/Windows/Fonts/simhei.ttf' # Windows下的黑体字体 ).generate_from_frequencies(word_frequencies) # 显示词云图 plt.figure(figsize=(12, 6)) # 设置展示窗口的尺寸 plt.imshow(wordcloud, interpolation='bilinear') plt.axis('off') # 不显示坐标轴 plt.show() # 保存词云图到文件 wordcloud.to_file(output_file) print(f"词云图已保存至 {output_file}") # 主函数:读取文件、统计词频、生成词云 file_path = '弹幕.txt' # 弹幕文件的路径 output_wordcloud_file = 'bullet_wordcloud.png' # 输出的词云图文件名 # 1. 读取所有弹幕 comments = read_comments(file_path) # 2. 统计词频 word_frequencies = count_words(comments) # 3. (可选) 使用自定义图片作为词云形状 mask_image = None # 如果不使用自定义形状,设为 None # 4. 生成并展示词云图 generate_wordcloud(word_frequencies, output_wordcloud_file, mask_image)