From 2b5fec9b1791929fc1606a65c6aa2cc3ce4e119b Mon Sep 17 00:00:00 2001
From: pc7si35ku <282589624@qq.com>
Date: Tue, 17 Sep 2024 23:11:03 +0800
Subject: [PATCH] =?UTF-8?q?=E5=A2=9E=E5=8A=A0=E7=BB=9F=E8=AE=A1AI=E6=8A=80?=
 =?UTF-8?q?=E6=9C=AF=E5=BA=94=E7=94=A8=E6=96=B9=E9=9D=A2=E7=9A=84=E6=AF=8F?=
 =?UTF-8?q?=E7=A7=8D=E5=BC=B9=E5=B9=95=E6=95=B0=E9=87=8F=EF=BC=8C=E5=B9=B6?=
 =?UTF-8?q?=E8=BE=93=E5=87=BA=E6=95=B0=E9=87=8F=E6=8E=92=E5=90=8D=E5=89=8D?=
 =?UTF-8?q?8=E7=9A=84=E5=BC=B9=E5=B9=95=E5=8A=9F=E8=83=BD?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 generate_bullet_wordcloud.py | 68 ++++++++++++++++++++++++++++++++++++
 1 file changed, 68 insertions(+)
 create mode 100644 generate_bullet_wordcloud.py

diff --git a/generate_bullet_wordcloud.py b/generate_bullet_wordcloud.py
new file mode 100644
index 0000000..2bce63d
--- /dev/null
+++ b/generate_bullet_wordcloud.py
@@ -0,0 +1,68 @@
+import collections
+import matplotlib.pyplot as plt
+from wordcloud import WordCloud
+from PIL import Image
+import numpy as np
+
+
+# 读取弹幕文件
+def read_comments(file_path):
+    with open(file_path, 'r', encoding='utf-8') as file:
+        comments = file.readlines()
+
+    # 移除每条弹幕的换行符
+    cleaned_comments = [comment.strip() for comment in comments]
+    return cleaned_comments
+
+
+# 统计词频
+def count_words(comments):
+    word_counter = collections.Counter()
+    for comment in comments:
+        words = comment.split()  # 简单分词，按空格切分词语
+        word_counter.update(words)
+    return word_counter
+
+
+# 生成词云图
+def generate_wordcloud(word_frequencies, output_file, mask_image=None):
+    wordcloud = WordCloud(
+        width=1600,  # 提高图像分辨率
+        height=800,  # 提高图像分辨率
+        background_color='white',  # 设置背景为白色
+        max_words=150,  # 减少最大词数，避免文字重叠
+        min_font_size=10,  # 确保最小字体大小不会太小
+        relative_scaling=0.5,  # 调整字体大小比例
+        colormap='cool',  # 使用冷色调配色方案
+        mask=mask_image,  # 可选：使用自定义形状的mask
+        contour_width=3,  # 设置轮廓宽度
+        contour_color='steelblue',  # 轮廓颜色
+        font_path='C:/Windows/Fonts/simhei.ttf'  # Windows下的黑体字体
+    ).generate_from_frequencies(word_frequencies)
+
+    # 显示词云图
+    plt.figure(figsize=(12, 6))  # 设置展示窗口的尺寸
+    plt.imshow(wordcloud, interpolation='bilinear')
+    plt.axis('off')  # 不显示坐标轴
+    plt.show()
+
+    # 保存词云图到文件
+    wordcloud.to_file(output_file)
+    print(f"词云图已保存至 {output_file}")
+
+
+# 主函数：读取文件、统计词频、生成词云
+file_path = '弹幕.txt'  # 弹幕文件的路径
+output_wordcloud_file = 'bullet_wordcloud.png'  # 输出的词云图文件名
+
+# 1. 读取所有弹幕
+comments = read_comments(file_path)
+
+# 2. 统计词频
+word_frequencies = count_words(comments)
+
+# 3. (可选) 使用自定义图片作为词云形状
+mask_image = None  # 如果不使用自定义形状，设为 None
+
+# 4. 生成并展示词云图
+generate_wordcloud(word_frequencies, output_wordcloud_file, mask_image)