You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
29 lines
981 B
29 lines
981 B
5 months ago
|
import pandas as pd
|
||
|
from collections import defaultdict
|
||
|
|
||
|
# 假设已经有了获取弹幕的函数 get_danmaku(video_id) 和一个存储视频 ID 的列表 video_ids
|
||
|
|
||
|
# 定义一个关键词列表,用于判断弹幕是否与 AI 技术应用相关
|
||
|
ai_keywords = ["AI", "人工智能", "AI 技术"]
|
||
|
|
||
|
# 统计每种弹幕的数量
|
||
|
danmaku_count = defaultdict(int)
|
||
|
for video_id in video_ids:
|
||
|
danmaku_list = get_danmaku(video_id)
|
||
|
for danmaku in danmaku_list:
|
||
|
for keyword in ai_keywords:
|
||
|
if keyword in danmaku:
|
||
|
danmaku_count[danmaku] += 1
|
||
|
|
||
|
# 将统计结果转换为 DataFrame
|
||
|
data = {'弹幕内容': list(danmaku_count.keys()), '数量': list(danmaku_count.values())}
|
||
|
df = pd.DataFrame(data)
|
||
|
|
||
|
# 按照数量进行降序排序
|
||
|
sorted_df = df.sort_values(by='数量', ascending=False)
|
||
|
|
||
|
# 取前 8 条数据
|
||
|
top_8_df = sorted_df.head(8)
|
||
|
|
||
|
# 将结果写入 Excel 表
|
||
|
top_8_df.to_excel('ai_danmaku.xlsx', index=False)
|