diff --git a/get_danmu_about_AI.py b/get_danmu_about_AI.py new file mode 100644 index 0000000..7b8ffc4 --- /dev/null +++ b/get_danmu_about_AI.py @@ -0,0 +1,37 @@ +from get_danmu import get_danmu +#def get_danmu(urls): + + +def is_related(danmu, keyword_string): #判断弹幕是否和keyword相关 + for string in keyword_string: + if string in danmu: + return True + return False + + +def danmu_about_AI(urls,keywords): + all_danmu = get_danmu(urls) + + #初始化保存结果的字典 + keyword_count = {i: 0 for i in keywords} #保存数量 + keyword_dist = {i: [] for i in keywords} #保存相关弹幕 + + + #遍历每一个弹幕,若和某一个关键字相关则统计 + for danmu in all_danmu: + for keyword, keyword_string in keywords.items(): + if is_related(danmu, keyword_string): + keyword_count[keyword] += 1 + keyword_dist[keyword].append(danmu) + break + else: + continue + #得到前八个keyword和他们的count + top_8_AI = sorted(keyword_count.items(), key=lambda item: item[1], reverse=True)[:8] + + #统计得到前八个相关技术的所有弹幕 + top_8_danmu = [] + for keyword,_ in top_8_AI: + for danmu in keyword_dist[keyword]: + top_8_danmu.append(danmu) + return top_8_danmu,top_8_AI \ No newline at end of file