parent
712ab4fb0f
commit
872a9d2fde
@ -0,0 +1,20 @@
|
|||||||
|
import pandas as pd
|
||||||
|
from collections import Counter
|
||||||
|
|
||||||
|
# 读入所有弹幕
|
||||||
|
with open('all_content.txt', mode='r', encoding='utf-8') as f:
|
||||||
|
data_list = f.readlines()
|
||||||
|
# 六项球类关键词
|
||||||
|
keywords = ['乒乓球','羽毛球','排球','篮球','足球','网球']
|
||||||
|
# 筛选有关球类的弹幕
|
||||||
|
selectdanmu = [danmu for danmu in data_list if any(keyword in danmu for keyword in keywords)]
|
||||||
|
|
||||||
|
# 统计弹幕数量
|
||||||
|
num = Counter(selectdanmu)
|
||||||
|
top_common = num.most_common(20)
|
||||||
|
# 展示数量前八条弹幕
|
||||||
|
print(top_common)
|
||||||
|
t = pd.DataFrame(top_common, columns=['弹幕内容', '数量'])
|
||||||
|
# 导出excel文件
|
||||||
|
excel_path = 'top_ball_danmu.xlsx'
|
||||||
|
t.to_excel(excel_path, index=False)
|
Loading…
Reference in new issue