Update 弹幕.py

main
fzu102301128 1 month ago
parent 5829915597
commit e6336739bb

@ -68,7 +68,7 @@ def fetch_danmakus(aid):
return []
def get_top_videos_aids(keyword, max_videos=120):
"""根据关键词获取综合排序前120条视频的AID"""
"""根据关键词获取综合排序前N条视频的AID"""
aids = []
page = 1
page_size = 30
@ -272,7 +272,75 @@ def write_to_excel(analysis_result, danmu_list, output_path):
wb.save(output_path)
print(f"\nExcel文件已保存至{output_path}")
# 模块5词云可视化
def generate_beautiful_wordcloud(danmu_list, output_path, font_path):
text = " ".join(danmu_list)
# 自定义停用词
stopwords = set([
"工具", "使用", "应用", "怎么", "如何","领到了","已三连",
"可以", "能够", "觉得", "真的", "", "", "非常", "一下", "一个", "什么", "哪里", "时候"
])
def create_rounded_rect_mask(width, height, radius=80):
mask = np.ones((height, width), dtype=np.uint8) * 255
for y in range(radius, height - radius):
for x in range(radius, width - radius):
mask[y, x] = 0
return mask
mask = create_rounded_rect_mask(1200, 800)
def gradient_color(word, font_size, position, orientation, random_state, **kwargs):
hue = 200 + random_state.randint(0, 50)
saturation = 70 + random_state.randint(0, 30)
lightness = 40 + (font_size / 100) * 20
return f"hsl({hue}, {saturation}%, {lightness}%)"
wc = WordCloud(
width=1200, height=800,
font_path=font_path,
mask=mask,
background_color="#f8f9fa",
stopwords=stopwords,
max_words=300,
font_step=3,
random_state=42,
relative_scaling=0.6,
color_func=gradient_color,
prefer_horizontal=0.7
).generate(text)
plt.figure(figsize=(15, 10), facecolor="#f8f9fa")
ax = plt.gca()
ax.imshow(wc, interpolation="bilinear")
ax.axis("off")
from matplotlib.font_manager import FontProperties
title_font = FontProperties(fname=font_path, size=28)
plt.title(
"B站LLM相关弹幕词云分析",
fontproperties=title_font,
fontweight="bold",
pad=30,
color="#2c3e50"
)
for spine in ax.spines.values():
spine.set_visible(True)
spine.set_color("#dee2e6")
spine.set_linewidth(2)
plt.tight_layout(pad=3.0)
plt.savefig(
output_path,
dpi=300,
bbox_inches="tight",
facecolor="#f8f9fa",
edgecolor="none"
)
plt.show()
print(f"词云图已保存至:{output_path}")
if __name__ == "__main__":
print("="*50)
print("开始B站LLM相关弹幕分析任务AID爬取版")
@ -321,4 +389,35 @@ if __name__ == "__main__":
# 步骤5写入Excel
print("\n【步骤5/6】写入Excel文件...")
write_to_excel(analysis_result, all_cleaned_danmu, OUTPUT_EXCEL)
write_to_excel(analysis_result, all_cleaned_danmu, OUTPUT_EXCEL)
# 步骤6生成词云图
print("\n【步骤6/6】生成词云可视化...")
generate_beautiful_wordcloud(all_cleaned_danmu, WORDCLOUD_OUTPUT, FONT_PATH)
# 核心结论输出
print("\n" + "="*50)
print("核心结论B站用户对大语言模型技术的主流看法")
print("="*50)
opinion_stats = analysis_result["用户看法统计"]
cost_low = opinion_stats["应用成本低"]["提及次数"]
cost_high = opinion_stats["应用成本高"]["提及次数"]
if cost_low > cost_high:
print(f"1. 应用成本:{cost_low}次提及“应用成本低”,{cost_high}次提及“应用成本高”用户普遍认为LLM门槛低、易获取")
else:
print(f"1. 应用成本:{cost_high}次提及“应用成本高”,{cost_low}次提及“应用成本低”,部分用户对付费模式有顾虑")
domain_count = opinion_stats["潜在应用领域"]["提及次数"]
print(f"2. 潜在应用领域:{domain_count}次提及,集中在办公、学习、创作、编程等高频场景,实用性认知强烈")
positive = opinion_stats["正面影响"]["提及次数"]
negative = opinion_stats["不利影响"]["提及次数"]
print(f"3. 利弊认知:{positive}次正面反馈(高效、省事),{negative}次负面担忧(依赖、隐私泄露),整体以正面评价为主")
expect = opinion_stats["技术期待"]["提及次数"]
doubt = opinion_stats["技术质疑"]["提及次数"]
print(f"4. 技术态度:{expect}次表达期待(更智能、多模态),{doubt}次提出质疑(不实用、夸大),多数用户对技术发展持乐观态度")
print("\n任务全部完成!输出文件:")
print(f"- Excel分析结果{OUTPUT_EXCEL}")
print(f"- 词云图:{WORDCLOUD_OUTPUT}")
Loading…
Cancel
Save