You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

33 lines
1.3 KiB

from get_danmu import get_danmu
def is_related(danmu, keyword_strings):
# 判断弹幕是否与任意一个关键字相关。
return any(keyword in danmu for keyword in keyword_strings)
def danmu_about_AI(urls, keywords):
#根据提供的关键字对弹幕进行分类和统计,并返回前八个关键字及其相关弹幕。
all_danmu = get_danmu(urls)
# 初始化保存结果的字典
keyword_count = {keyword: 0 for keyword in keywords} # 保存每个关键字的数量
keyword_dist = {keyword: [] for keyword in keywords} # 保存与每个关键字相关的弹幕
# 遍历每一个弹幕,若与某一个关键字相关则统计
for danmu in all_danmu:
for keyword, keyword_strings in keywords.items():
if is_related(danmu, keyword_strings):
keyword_count[keyword] += 1
keyword_dist[keyword].append(danmu)
break
# 得到前八个关键字及其对应计数
top_8_AI = sorted(keyword_count.items(), key=lambda item: item[1], reverse=True)[:8]
# 统计得到前八个相关技术的所有弹幕
top_8_danmu = []
for keyword, _ in top_8_AI:
top_8_danmu.extend(keyword_dist[keyword])
return top_8_danmu, top_8_AI