From e6336739bb7965bf962aa09c1e4e0ee2daa68f6e Mon Sep 17 00:00:00 2001 From: fzu102301128 <1148297047@qq.com> Date: Sun, 16 Nov 2025 20:37:48 +0800 Subject: [PATCH] =?UTF-8?q?Update=20=E5=BC=B9=E5=B9=95.py?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- 弹幕.py | 103 ++++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 101 insertions(+), 2 deletions(-) diff --git a/弹幕.py b/弹幕.py index eda8a20..ff22c13 100644 --- a/弹幕.py +++ b/弹幕.py @@ -68,7 +68,7 @@ def fetch_danmakus(aid): return [] def get_top_videos_aids(keyword, max_videos=120): - """根据关键词获取综合排序前120条视频的AID""" + """根据关键词获取综合排序前N条视频的AID""" aids = [] page = 1 page_size = 30 @@ -272,7 +272,75 @@ def write_to_excel(analysis_result, danmu_list, output_path): wb.save(output_path) print(f"\nExcel文件已保存至:{output_path}") + +# 模块5:词云可视化 +def generate_beautiful_wordcloud(danmu_list, output_path, font_path): + text = " ".join(danmu_list) + # 自定义停用词 + stopwords = set([ + "工具", "使用", "应用", "怎么", "如何","领到了","已三连", + "可以", "能够", "觉得", "真的", "太", "很", "非常", "一下", "一个", "什么", "哪里", "时候" + ]) + + def create_rounded_rect_mask(width, height, radius=80): + mask = np.ones((height, width), dtype=np.uint8) * 255 + for y in range(radius, height - radius): + for x in range(radius, width - radius): + mask[y, x] = 0 + return mask + + mask = create_rounded_rect_mask(1200, 800) + + def gradient_color(word, font_size, position, orientation, random_state, **kwargs): + hue = 200 + random_state.randint(0, 50) + saturation = 70 + random_state.randint(0, 30) + lightness = 40 + (font_size / 100) * 20 + return f"hsl({hue}, {saturation}%, {lightness}%)" + wc = WordCloud( + width=1200, height=800, + font_path=font_path, + mask=mask, + background_color="#f8f9fa", + stopwords=stopwords, + max_words=300, + font_step=3, + random_state=42, + relative_scaling=0.6, + color_func=gradient_color, + prefer_horizontal=0.7 + ).generate(text) + + plt.figure(figsize=(15, 10), facecolor="#f8f9fa") + ax = plt.gca() + ax.imshow(wc, interpolation="bilinear") + ax.axis("off") + + from matplotlib.font_manager import FontProperties + title_font = FontProperties(fname=font_path, size=28) + plt.title( + "B站LLM相关弹幕词云分析", + fontproperties=title_font, + fontweight="bold", + pad=30, + color="#2c3e50" + ) + + for spine in ax.spines.values(): + spine.set_visible(True) + spine.set_color("#dee2e6") + spine.set_linewidth(2) + plt.tight_layout(pad=3.0) + plt.savefig( + output_path, + dpi=300, + bbox_inches="tight", + facecolor="#f8f9fa", + edgecolor="none" + ) + plt.show() + print(f"词云图已保存至:{output_path}") + if __name__ == "__main__": print("="*50) print("开始B站LLM相关弹幕分析任务(AID爬取版)") @@ -321,4 +389,35 @@ if __name__ == "__main__": # 步骤5:写入Excel print("\n【步骤5/6】写入Excel文件...") - write_to_excel(analysis_result, all_cleaned_danmu, OUTPUT_EXCEL) \ No newline at end of file + write_to_excel(analysis_result, all_cleaned_danmu, OUTPUT_EXCEL) + # 步骤6:生成词云图 + print("\n【步骤6/6】生成词云可视化...") + generate_beautiful_wordcloud(all_cleaned_danmu, WORDCLOUD_OUTPUT, FONT_PATH) + + # 核心结论输出 + print("\n" + "="*50) + print("核心结论:B站用户对大语言模型技术的主流看法") + print("="*50) + opinion_stats = analysis_result["用户看法统计"] + + cost_low = opinion_stats["应用成本低"]["提及次数"] + cost_high = opinion_stats["应用成本高"]["提及次数"] + if cost_low > cost_high: + print(f"1. 应用成本:{cost_low}次提及“应用成本低”,{cost_high}次提及“应用成本高”,用户普遍认为LLM门槛低、易获取") + else: + print(f"1. 应用成本:{cost_high}次提及“应用成本高”,{cost_low}次提及“应用成本低”,部分用户对付费模式有顾虑") + + domain_count = opinion_stats["潜在应用领域"]["提及次数"] + print(f"2. 潜在应用领域:{domain_count}次提及,集中在办公、学习、创作、编程等高频场景,实用性认知强烈") + + positive = opinion_stats["正面影响"]["提及次数"] + negative = opinion_stats["不利影响"]["提及次数"] + print(f"3. 利弊认知:{positive}次正面反馈(高效、省事),{negative}次负面担忧(依赖、隐私泄露),整体以正面评价为主") + + expect = opinion_stats["技术期待"]["提及次数"] + doubt = opinion_stats["技术质疑"]["提及次数"] + print(f"4. 技术态度:{expect}次表达期待(更智能、多模态),{doubt}次提出质疑(不实用、夸大),多数用户对技术发展持乐观态度") + +print("\n任务全部完成!输出文件:") +print(f"- Excel分析结果:{OUTPUT_EXCEL}") +print(f"- 词云图:{WORDCLOUD_OUTPUT}") \ No newline at end of file