parent
6ddbae357c
commit
d7b14b7bf7
@ -0,0 +1,37 @@
|
|||||||
|
from get_danmu import get_danmu
|
||||||
|
#def get_danmu(urls):
|
||||||
|
|
||||||
|
|
||||||
|
def is_related(danmu, keyword_string): #判断弹幕是否和keyword相关
|
||||||
|
for string in keyword_string:
|
||||||
|
if string in danmu:
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def danmu_about_AI(urls,keywords):
|
||||||
|
all_danmu = get_danmu(urls)
|
||||||
|
|
||||||
|
#初始化保存结果的字典
|
||||||
|
keyword_count = {i: 0 for i in keywords} #保存数量
|
||||||
|
keyword_dist = {i: [] for i in keywords} #保存相关弹幕
|
||||||
|
|
||||||
|
|
||||||
|
#遍历每一个弹幕,若和某一个关键字相关则统计
|
||||||
|
for danmu in all_danmu:
|
||||||
|
for keyword, keyword_string in keywords.items():
|
||||||
|
if is_related(danmu, keyword_string):
|
||||||
|
keyword_count[keyword] += 1
|
||||||
|
keyword_dist[keyword].append(danmu)
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
continue
|
||||||
|
#得到前八个keyword和他们的count
|
||||||
|
top_8_AI = sorted(keyword_count.items(), key=lambda item: item[1], reverse=True)[:8]
|
||||||
|
|
||||||
|
#统计得到前八个相关技术的所有弹幕
|
||||||
|
top_8_danmu = []
|
||||||
|
for keyword,_ in top_8_AI:
|
||||||
|
for danmu in keyword_dist[keyword]:
|
||||||
|
top_8_danmu.append(danmu)
|
||||||
|
return top_8_danmu,top_8_AI
|
Loading…
Reference in new issue