Update Scraping Bilibili Danmaku (bullet comments).py

10 months ago · 29677d320b
parent fc3864a0e0
commit 29677d320b
1 changed files with 300 additions and 286 deletions
--- a/comments).py
+++ b/comments).py
@ -186,7 +186,7 @@ def analyze_keywords_in_comments(comments_file, keywords_file, output_excel_file



-def generate_wordcloud(text_file, stopwords_file, output_image_file, font_path='msyh.ttc'):
+def generate_wordcloud(text_file, stopwords_file, output_image_file, mask_image_file=None, font_path='msyh.ttc'):
    # 加载停用词
    def load_stopwords(file_path):
        with open(file_path, encoding='utf-8') as f:
@ -207,15 +207,29 @@ def generate_wordcloud(text_file, stopwords_file, output_image_file, font_path='
    # 将处理后的词汇拼接成字符串
    word_string = ' '.join(filtered_words)

-    # 生成词云
-    wc = wordcloud.WordCloud(
-        width=700,
-        height=700,
-        background_color='white',
-        font_path=font_path
-    )
-    wc.generate(word_string)
+    # 设置词云参数
+    wc_kwargs = {
+        'width': 700,
+        'height': 700,
+        'background_color': 'white',
+        'font_path': font_path,
+    }
+
+    if mask_image_file:
+        # 读取掩模图像并确保其为二值图像
+        mask_image = np.array(Image.open(mask_image_file).convert('L'))

+        # 反转黑白区域，黑色区域变为0，白色区域变为255
+        mask = np.where(mask_image == 0, 0, 255).astype(np.uint8)
+
+        wc_kwargs.update({
+            'mask': mask,  # 应用掩模图像
+            'contour_color': 'white',  # 轮廓颜色
+            'contour_width': 0,
+        })
+
+    wc = wordcloud.WordCloud(**wc_kwargs)
+    wc.generate(word_string)
    # 保存词云图
    wc.to_file(output_image_file)

@ -259,7 +273,7 @@ def main():
    analyze_keywords_in_comments('cleaned_comment.txt', 'keywords.txt', 'ai_technologies_count.xlsx')
    #输出词云图
    print("开始构建词云图")
-    generate_wordcloud('cleaned_comment.txt', 'stopwords.txt', '词云.png')
+    generate_wordcloud('cleaned_comment.txt', 'stopwords.txt', '词云.png', 'img.png')
    print("构建词云图完毕")
 if __name__ == "__main__":
    main()