From 2b5fec9b1791929fc1606a65c6aa2cc3ce4e119b Mon Sep 17 00:00:00 2001 From: pc7si35ku <282589624@qq.com> Date: Tue, 17 Sep 2024 23:11:03 +0800 Subject: [PATCH] =?UTF-8?q?=E5=A2=9E=E5=8A=A0=E7=BB=9F=E8=AE=A1AI=E6=8A=80?= =?UTF-8?q?=E6=9C=AF=E5=BA=94=E7=94=A8=E6=96=B9=E9=9D=A2=E7=9A=84=E6=AF=8F?= =?UTF-8?q?=E7=A7=8D=E5=BC=B9=E5=B9=95=E6=95=B0=E9=87=8F=EF=BC=8C=E5=B9=B6?= =?UTF-8?q?=E8=BE=93=E5=87=BA=E6=95=B0=E9=87=8F=E6=8E=92=E5=90=8D=E5=89=8D?= =?UTF-8?q?8=E7=9A=84=E5=BC=B9=E5=B9=95=E5=8A=9F=E8=83=BD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- generate_bullet_wordcloud.py | 68 ++++++++++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) create mode 100644 generate_bullet_wordcloud.py diff --git a/generate_bullet_wordcloud.py b/generate_bullet_wordcloud.py new file mode 100644 index 0000000..2bce63d --- /dev/null +++ b/generate_bullet_wordcloud.py @@ -0,0 +1,68 @@ +import collections +import matplotlib.pyplot as plt +from wordcloud import WordCloud +from PIL import Image +import numpy as np + + +# 读取弹幕文件 +def read_comments(file_path): + with open(file_path, 'r', encoding='utf-8') as file: + comments = file.readlines() + + # 移除每条弹幕的换行符 + cleaned_comments = [comment.strip() for comment in comments] + return cleaned_comments + + +# 统计词频 +def count_words(comments): + word_counter = collections.Counter() + for comment in comments: + words = comment.split() # 简单分词,按空格切分词语 + word_counter.update(words) + return word_counter + + +# 生成词云图 +def generate_wordcloud(word_frequencies, output_file, mask_image=None): + wordcloud = WordCloud( + width=1600, # 提高图像分辨率 + height=800, # 提高图像分辨率 + background_color='white', # 设置背景为白色 + max_words=150, # 减少最大词数,避免文字重叠 + min_font_size=10, # 确保最小字体大小不会太小 + relative_scaling=0.5, # 调整字体大小比例 + colormap='cool', # 使用冷色调配色方案 + mask=mask_image, # 可选:使用自定义形状的mask + contour_width=3, # 设置轮廓宽度 + contour_color='steelblue', # 轮廓颜色 + font_path='C:/Windows/Fonts/simhei.ttf' # Windows下的黑体字体 + ).generate_from_frequencies(word_frequencies) + + # 显示词云图 + plt.figure(figsize=(12, 6)) # 设置展示窗口的尺寸 + plt.imshow(wordcloud, interpolation='bilinear') + plt.axis('off') # 不显示坐标轴 + plt.show() + + # 保存词云图到文件 + wordcloud.to_file(output_file) + print(f"词云图已保存至 {output_file}") + + +# 主函数:读取文件、统计词频、生成词云 +file_path = '弹幕.txt' # 弹幕文件的路径 +output_wordcloud_file = 'bullet_wordcloud.png' # 输出的词云图文件名 + +# 1. 读取所有弹幕 +comments = read_comments(file_path) + +# 2. 统计词频 +word_frequencies = count_words(comments) + +# 3. (可选) 使用自定义图片作为词云形状 +mask_image = None # 如果不使用自定义形状,设为 None + +# 4. 生成并展示词云图 +generate_wordcloud(word_frequencies, output_wordcloud_file, mask_image)