From 6b79f3cc5ae638e56572cb96161eb55415abf6d8 Mon Sep 17 00:00:00 2001 From: pjmw9izve <2308014474@qq.com> Date: Tue, 17 Sep 2024 12:43:02 +0800 Subject: [PATCH] ADD file via upload --- get_danmu_about_AI.py | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100644 get_danmu_about_AI.py diff --git a/get_danmu_about_AI.py b/get_danmu_about_AI.py new file mode 100644 index 0000000..df36942 --- /dev/null +++ b/get_danmu_about_AI.py @@ -0,0 +1,32 @@ +from get_danmu import get_danmu + +def is_related(danmu, keyword_strings): +# 判断弹幕是否与任意一个关键字相关。 + return any(keyword in danmu for keyword in keyword_strings) + +def danmu_about_AI(urls, keywords): + #根据提供的关键字对弹幕进行分类和统计,并返回前八个关键字及其相关弹幕。 + + all_danmu = get_danmu(urls) + + # 初始化保存结果的字典 + keyword_count = {keyword: 0 for keyword in keywords} # 保存每个关键字的数量 + keyword_dist = {keyword: [] for keyword in keywords} # 保存与每个关键字相关的弹幕 + + # 遍历每一个弹幕,若与某一个关键字相关则统计 + for danmu in all_danmu: + for keyword, keyword_strings in keywords.items(): + if is_related(danmu, keyword_strings): + keyword_count[keyword] += 1 + keyword_dist[keyword].append(danmu) + break + + # 得到前八个关键字及其对应计数 + top_8_AI = sorted(keyword_count.items(), key=lambda item: item[1], reverse=True)[:8] + + # 统计得到前八个相关技术的所有弹幕 + top_8_danmu = [] + for keyword, _ in top_8_AI: + top_8_danmu.extend(keyword_dist[keyword]) + + return top_8_danmu, top_8_AI