You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
33 lines
1.3 KiB
33 lines
1.3 KiB
from get_danmu import get_danmu
|
|
|
|
def is_related(danmu, keyword_strings):
|
|
# 判断弹幕是否与任意一个关键字相关。
|
|
return any(keyword in danmu for keyword in keyword_strings)
|
|
|
|
def danmu_about_AI(urls, keywords):
|
|
#根据提供的关键字对弹幕进行分类和统计,并返回前八个关键字及其相关弹幕。
|
|
|
|
all_danmu = get_danmu(urls)
|
|
|
|
# 初始化保存结果的字典
|
|
keyword_count = {keyword: 0 for keyword in keywords} # 保存每个关键字的数量
|
|
keyword_dist = {keyword: [] for keyword in keywords} # 保存与每个关键字相关的弹幕
|
|
|
|
# 遍历每一个弹幕,若与某一个关键字相关则统计
|
|
for danmu in all_danmu:
|
|
for keyword, keyword_strings in keywords.items():
|
|
if is_related(danmu, keyword_strings):
|
|
keyword_count[keyword] += 1
|
|
keyword_dist[keyword].append(danmu)
|
|
break
|
|
|
|
# 得到前八个关键字及其对应计数
|
|
top_8_AI = sorted(keyword_count.items(), key=lambda item: item[1], reverse=True)[:8]
|
|
|
|
# 统计得到前八个相关技术的所有弹幕
|
|
top_8_danmu = []
|
|
for keyword, _ in top_8_AI:
|
|
top_8_danmu.extend(keyword_dist[keyword])
|
|
|
|
return top_8_danmu, top_8_AI
|