diff --git a/generate_bullet_wordcloud.py b/generate_bullet_wordcloud.py new file mode 100644 index 0000000..2bce63d --- /dev/null +++ b/generate_bullet_wordcloud.py @@ -0,0 +1,68 @@ +import collections +import matplotlib.pyplot as plt +from wordcloud import WordCloud +from PIL import Image +import numpy as np + + +# 读取弹幕文件 +def read_comments(file_path): + with open(file_path, 'r', encoding='utf-8') as file: + comments = file.readlines() + + # 移除每条弹幕的换行符 + cleaned_comments = [comment.strip() for comment in comments] + return cleaned_comments + + +# 统计词频 +def count_words(comments): + word_counter = collections.Counter() + for comment in comments: + words = comment.split() # 简单分词,按空格切分词语 + word_counter.update(words) + return word_counter + + +# 生成词云图 +def generate_wordcloud(word_frequencies, output_file, mask_image=None): + wordcloud = WordCloud( + width=1600, # 提高图像分辨率 + height=800, # 提高图像分辨率 + background_color='white', # 设置背景为白色 + max_words=150, # 减少最大词数,避免文字重叠 + min_font_size=10, # 确保最小字体大小不会太小 + relative_scaling=0.5, # 调整字体大小比例 + colormap='cool', # 使用冷色调配色方案 + mask=mask_image, # 可选:使用自定义形状的mask + contour_width=3, # 设置轮廓宽度 + contour_color='steelblue', # 轮廓颜色 + font_path='C:/Windows/Fonts/simhei.ttf' # Windows下的黑体字体 + ).generate_from_frequencies(word_frequencies) + + # 显示词云图 + plt.figure(figsize=(12, 6)) # 设置展示窗口的尺寸 + plt.imshow(wordcloud, interpolation='bilinear') + plt.axis('off') # 不显示坐标轴 + plt.show() + + # 保存词云图到文件 + wordcloud.to_file(output_file) + print(f"词云图已保存至 {output_file}") + + +# 主函数:读取文件、统计词频、生成词云 +file_path = '弹幕.txt' # 弹幕文件的路径 +output_wordcloud_file = 'bullet_wordcloud.png' # 输出的词云图文件名 + +# 1. 读取所有弹幕 +comments = read_comments(file_path) + +# 2. 统计词频 +word_frequencies = count_words(comments) + +# 3. (可选) 使用自定义图片作为词云形状 +mask_image = None # 如果不使用自定义形状,设为 None + +# 4. 生成并展示词云图 +generate_wordcloud(word_frequencies, output_wordcloud_file, mask_image)