diff --git a/generate_bullet_wordcloud.py b/generate_bullet_wordcloud.py deleted file mode 100644 index 2bce63d..0000000 --- a/generate_bullet_wordcloud.py +++ /dev/null @@ -1,68 +0,0 @@ -import collections -import matplotlib.pyplot as plt -from wordcloud import WordCloud -from PIL import Image -import numpy as np - - -# 读取弹幕文件 -def read_comments(file_path): - with open(file_path, 'r', encoding='utf-8') as file: - comments = file.readlines() - - # 移除每条弹幕的换行符 - cleaned_comments = [comment.strip() for comment in comments] - return cleaned_comments - - -# 统计词频 -def count_words(comments): - word_counter = collections.Counter() - for comment in comments: - words = comment.split() # 简单分词,按空格切分词语 - word_counter.update(words) - return word_counter - - -# 生成词云图 -def generate_wordcloud(word_frequencies, output_file, mask_image=None): - wordcloud = WordCloud( - width=1600, # 提高图像分辨率 - height=800, # 提高图像分辨率 - background_color='white', # 设置背景为白色 - max_words=150, # 减少最大词数,避免文字重叠 - min_font_size=10, # 确保最小字体大小不会太小 - relative_scaling=0.5, # 调整字体大小比例 - colormap='cool', # 使用冷色调配色方案 - mask=mask_image, # 可选:使用自定义形状的mask - contour_width=3, # 设置轮廓宽度 - contour_color='steelblue', # 轮廓颜色 - font_path='C:/Windows/Fonts/simhei.ttf' # Windows下的黑体字体 - ).generate_from_frequencies(word_frequencies) - - # 显示词云图 - plt.figure(figsize=(12, 6)) # 设置展示窗口的尺寸 - plt.imshow(wordcloud, interpolation='bilinear') - plt.axis('off') # 不显示坐标轴 - plt.show() - - # 保存词云图到文件 - wordcloud.to_file(output_file) - print(f"词云图已保存至 {output_file}") - - -# 主函数:读取文件、统计词频、生成词云 -file_path = '弹幕.txt' # 弹幕文件的路径 -output_wordcloud_file = 'bullet_wordcloud.png' # 输出的词云图文件名 - -# 1. 读取所有弹幕 -comments = read_comments(file_path) - -# 2. 统计词频 -word_frequencies = count_words(comments) - -# 3. (可选) 使用自定义图片作为词云形状 -mask_image = None # 如果不使用自定义形状,设为 None - -# 4. 生成并展示词云图 -generate_wordcloud(word_frequencies, output_wordcloud_file, mask_image)