|
|
|
|
@ -68,7 +68,7 @@ def fetch_danmakus(aid):
|
|
|
|
|
return []
|
|
|
|
|
|
|
|
|
|
def get_top_videos_aids(keyword, max_videos=120):
|
|
|
|
|
"""根据关键词获取综合排序前120条视频的AID"""
|
|
|
|
|
"""根据关键词获取综合排序前N条视频的AID"""
|
|
|
|
|
aids = []
|
|
|
|
|
page = 1
|
|
|
|
|
page_size = 30
|
|
|
|
|
@ -272,7 +272,75 @@ def write_to_excel(analysis_result, danmu_list, output_path):
|
|
|
|
|
|
|
|
|
|
wb.save(output_path)
|
|
|
|
|
print(f"\nExcel文件已保存至:{output_path}")
|
|
|
|
|
|
|
|
|
|
# 模块5:词云可视化
|
|
|
|
|
def generate_beautiful_wordcloud(danmu_list, output_path, font_path):
|
|
|
|
|
text = " ".join(danmu_list)
|
|
|
|
|
# 自定义停用词
|
|
|
|
|
stopwords = set([
|
|
|
|
|
"工具", "使用", "应用", "怎么", "如何","领到了","已三连",
|
|
|
|
|
"可以", "能够", "觉得", "真的", "太", "很", "非常", "一下", "一个", "什么", "哪里", "时候"
|
|
|
|
|
])
|
|
|
|
|
|
|
|
|
|
def create_rounded_rect_mask(width, height, radius=80):
|
|
|
|
|
mask = np.ones((height, width), dtype=np.uint8) * 255
|
|
|
|
|
for y in range(radius, height - radius):
|
|
|
|
|
for x in range(radius, width - radius):
|
|
|
|
|
mask[y, x] = 0
|
|
|
|
|
return mask
|
|
|
|
|
|
|
|
|
|
mask = create_rounded_rect_mask(1200, 800)
|
|
|
|
|
|
|
|
|
|
def gradient_color(word, font_size, position, orientation, random_state, **kwargs):
|
|
|
|
|
hue = 200 + random_state.randint(0, 50)
|
|
|
|
|
saturation = 70 + random_state.randint(0, 30)
|
|
|
|
|
lightness = 40 + (font_size / 100) * 20
|
|
|
|
|
return f"hsl({hue}, {saturation}%, {lightness}%)"
|
|
|
|
|
|
|
|
|
|
wc = WordCloud(
|
|
|
|
|
width=1200, height=800,
|
|
|
|
|
font_path=font_path,
|
|
|
|
|
mask=mask,
|
|
|
|
|
background_color="#f8f9fa",
|
|
|
|
|
stopwords=stopwords,
|
|
|
|
|
max_words=300,
|
|
|
|
|
font_step=3,
|
|
|
|
|
random_state=42,
|
|
|
|
|
relative_scaling=0.6,
|
|
|
|
|
color_func=gradient_color,
|
|
|
|
|
prefer_horizontal=0.7
|
|
|
|
|
).generate(text)
|
|
|
|
|
|
|
|
|
|
plt.figure(figsize=(15, 10), facecolor="#f8f9fa")
|
|
|
|
|
ax = plt.gca()
|
|
|
|
|
ax.imshow(wc, interpolation="bilinear")
|
|
|
|
|
ax.axis("off")
|
|
|
|
|
|
|
|
|
|
from matplotlib.font_manager import FontProperties
|
|
|
|
|
title_font = FontProperties(fname=font_path, size=28)
|
|
|
|
|
plt.title(
|
|
|
|
|
"B站LLM相关弹幕词云分析",
|
|
|
|
|
fontproperties=title_font,
|
|
|
|
|
fontweight="bold",
|
|
|
|
|
pad=30,
|
|
|
|
|
color="#2c3e50"
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
for spine in ax.spines.values():
|
|
|
|
|
spine.set_visible(True)
|
|
|
|
|
spine.set_color("#dee2e6")
|
|
|
|
|
spine.set_linewidth(2)
|
|
|
|
|
plt.tight_layout(pad=3.0)
|
|
|
|
|
plt.savefig(
|
|
|
|
|
output_path,
|
|
|
|
|
dpi=300,
|
|
|
|
|
bbox_inches="tight",
|
|
|
|
|
facecolor="#f8f9fa",
|
|
|
|
|
edgecolor="none"
|
|
|
|
|
)
|
|
|
|
|
plt.show()
|
|
|
|
|
print(f"词云图已保存至:{output_path}")
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
|
print("="*50)
|
|
|
|
|
print("开始B站LLM相关弹幕分析任务(AID爬取版)")
|
|
|
|
|
@ -321,4 +389,35 @@ if __name__ == "__main__":
|
|
|
|
|
|
|
|
|
|
# 步骤5:写入Excel
|
|
|
|
|
print("\n【步骤5/6】写入Excel文件...")
|
|
|
|
|
write_to_excel(analysis_result, all_cleaned_danmu, OUTPUT_EXCEL)
|
|
|
|
|
write_to_excel(analysis_result, all_cleaned_danmu, OUTPUT_EXCEL)
|
|
|
|
|
# 步骤6:生成词云图
|
|
|
|
|
print("\n【步骤6/6】生成词云可视化...")
|
|
|
|
|
generate_beautiful_wordcloud(all_cleaned_danmu, WORDCLOUD_OUTPUT, FONT_PATH)
|
|
|
|
|
|
|
|
|
|
# 核心结论输出
|
|
|
|
|
print("\n" + "="*50)
|
|
|
|
|
print("核心结论:B站用户对大语言模型技术的主流看法")
|
|
|
|
|
print("="*50)
|
|
|
|
|
opinion_stats = analysis_result["用户看法统计"]
|
|
|
|
|
|
|
|
|
|
cost_low = opinion_stats["应用成本低"]["提及次数"]
|
|
|
|
|
cost_high = opinion_stats["应用成本高"]["提及次数"]
|
|
|
|
|
if cost_low > cost_high:
|
|
|
|
|
print(f"1. 应用成本:{cost_low}次提及“应用成本低”,{cost_high}次提及“应用成本高”,用户普遍认为LLM门槛低、易获取")
|
|
|
|
|
else:
|
|
|
|
|
print(f"1. 应用成本:{cost_high}次提及“应用成本高”,{cost_low}次提及“应用成本低”,部分用户对付费模式有顾虑")
|
|
|
|
|
|
|
|
|
|
domain_count = opinion_stats["潜在应用领域"]["提及次数"]
|
|
|
|
|
print(f"2. 潜在应用领域:{domain_count}次提及,集中在办公、学习、创作、编程等高频场景,实用性认知强烈")
|
|
|
|
|
|
|
|
|
|
positive = opinion_stats["正面影响"]["提及次数"]
|
|
|
|
|
negative = opinion_stats["不利影响"]["提及次数"]
|
|
|
|
|
print(f"3. 利弊认知:{positive}次正面反馈(高效、省事),{negative}次负面担忧(依赖、隐私泄露),整体以正面评价为主")
|
|
|
|
|
|
|
|
|
|
expect = opinion_stats["技术期待"]["提及次数"]
|
|
|
|
|
doubt = opinion_stats["技术质疑"]["提及次数"]
|
|
|
|
|
print(f"4. 技术态度:{expect}次表达期待(更智能、多模态),{doubt}次提出质疑(不实用、夸大),多数用户对技术发展持乐观态度")
|
|
|
|
|
|
|
|
|
|
print("\n任务全部完成!输出文件:")
|
|
|
|
|
print(f"- Excel分析结果:{OUTPUT_EXCEL}")
|
|
|
|
|
print(f"- 词云图:{WORDCLOUD_OUTPUT}")
|