Update 弹幕.py

1 month ago · e6336739bb
parent 5829915597
commit e6336739bb
1 changed files with 101 additions and 2 deletions
--- a/弹幕.py
+++ b/弹幕.py
@ -68,7 +68,7 @@ def fetch_danmakus(aid):
        return []

 def get_top_videos_aids(keyword, max_videos=120):
-    """根据关键词获取综合排序前120条视频的AID"""
+    """根据关键词获取综合排序前N条视频的AID"""
    aids = []
    page = 1
    page_size = 30
@ -272,7 +272,75 @@ def write_to_excel(analysis_result, danmu_list, output_path):
    
    wb.save(output_path)
    print(f"\nExcel文件已保存至：{output_path}")
+
+# 模块5：词云可视化
+def generate_beautiful_wordcloud(danmu_list, output_path, font_path):
+    text = " ".join(danmu_list)
+    # 自定义停用词
+    stopwords = set([
+        "工具", "使用", "应用", "怎么", "如何","领到了","已三连",
+        "可以", "能够", "觉得", "真的", "太", "很", "非常", "一下", "一个", "什么", "哪里", "时候"
+    ])
+    
+    def create_rounded_rect_mask(width, height, radius=80):
+        mask = np.ones((height, width), dtype=np.uint8) * 255
+        for y in range(radius, height - radius):
+            for x in range(radius, width - radius):
+                mask[y, x] = 0
+        return mask
+    
+    mask = create_rounded_rect_mask(1200, 800)
+    
+    def gradient_color(word, font_size, position, orientation, random_state, **kwargs):
+        hue = 200 + random_state.randint(0, 50)  
+        saturation = 70 + random_state.randint(0, 30)  
+        lightness = 40 + (font_size / 100) * 20 
+        return f"hsl({hue}, {saturation}%, {lightness}%)"
    
+    wc = WordCloud(
+        width=1200, height=800,
+        font_path=font_path, 
+        mask=mask,
+        background_color="#f8f9fa",
+        stopwords=stopwords,
+        max_words=300,
+        font_step=3,
+        random_state=42,
+        relative_scaling=0.6,
+        color_func=gradient_color,
+        prefer_horizontal=0.7
+    ).generate(text)
+    
+    plt.figure(figsize=(15, 10), facecolor="#f8f9fa")
+    ax = plt.gca()
+    ax.imshow(wc, interpolation="bilinear")
+    ax.axis("off")
+    
+    from matplotlib.font_manager import FontProperties 
+    title_font = FontProperties(fname=font_path, size=28)  
+    plt.title(
+        "B站LLM相关弹幕词云分析",
+        fontproperties=title_font, 
+        fontweight="bold",
+        pad=30,
+        color="#2c3e50"
+    )
+    
+    for spine in ax.spines.values():
+        spine.set_visible(True)
+        spine.set_color("#dee2e6")
+        spine.set_linewidth(2)
+    plt.tight_layout(pad=3.0)
+    plt.savefig(
+        output_path,
+        dpi=300,
+        bbox_inches="tight",
+        facecolor="#f8f9fa",
+        edgecolor="none"
+    )
+    plt.show()
+    print(f"词云图已保存至：{output_path}")
+
 if __name__ == "__main__":
    print("="*50)
    print("开始B站LLM相关弹幕分析任务（AID爬取版）")
@ -321,4 +389,35 @@ if __name__ == "__main__":

    # 步骤5：写入Excel
    print("\n【步骤5/6】写入Excel文件...")
-    write_to_excel(analysis_result, all_cleaned_danmu, OUTPUT_EXCEL)
+    write_to_excel(analysis_result, all_cleaned_danmu, OUTPUT_EXCEL)
+    # 步骤6：生成词云图
+    print("\n【步骤6/6】生成词云可视化...")
+    generate_beautiful_wordcloud(all_cleaned_danmu, WORDCLOUD_OUTPUT, FONT_PATH)
+    
+    # 核心结论输出
+    print("\n" + "="*50)
+    print("核心结论：B站用户对大语言模型技术的主流看法")
+    print("="*50)
+    opinion_stats = analysis_result["用户看法统计"]
+    
+    cost_low = opinion_stats["应用成本低"]["提及次数"]
+    cost_high = opinion_stats["应用成本高"]["提及次数"]
+    if cost_low > cost_high:
+        print(f"1. 应用成本：{cost_low}次提及“应用成本低”，{cost_high}次提及“应用成本高”，用户普遍认为LLM门槛低、易获取")
+    else:
+        print(f"1. 应用成本：{cost_high}次提及“应用成本高”，{cost_low}次提及“应用成本低”，部分用户对付费模式有顾虑")
+    
+    domain_count = opinion_stats["潜在应用领域"]["提及次数"]
+    print(f"2. 潜在应用领域：{domain_count}次提及，集中在办公、学习、创作、编程等高频场景，实用性认知强烈")
+    
+    positive = opinion_stats["正面影响"]["提及次数"]
+    negative = opinion_stats["不利影响"]["提及次数"]
+    print(f"3. 利弊认知：{positive}次正面反馈（高效、省事），{negative}次负面担忧（依赖、隐私泄露），整体以正面评价为主")
+    
+    expect = opinion_stats["技术期待"]["提及次数"]
+    doubt = opinion_stats["技术质疑"]["提及次数"]
+    print(f"4. 技术态度：{expect}次表达期待（更智能、多模态），{doubt}次提出质疑（不实用、夸大），多数用户对技术发展持乐观态度")
+
+print("\n任务全部完成！输出文件：")
+print(f"- Excel分析结果：{OUTPUT_EXCEL}")
+print(f"- 词云图：{WORDCLOUD_OUTPUT}")