diff --git a/ballgame.py b/ballgame.py new file mode 100644 index 0000000..5aa5b9c --- /dev/null +++ b/ballgame.py @@ -0,0 +1,20 @@ +import pandas as pd +from collections import Counter + +# 读入所有弹幕 +with open('all_content.txt', mode='r', encoding='utf-8') as f: + data_list = f.readlines() +# 六项球类关键词 +keywords = ['乒乓球','羽毛球','排球','篮球','足球','网球'] +# 筛选有关球类的弹幕 +selectdanmu = [danmu for danmu in data_list if any(keyword in danmu for keyword in keywords)] + +# 统计弹幕数量 +num = Counter(selectdanmu) +top_common = num.most_common(20) +# 展示数量前八条弹幕 +print(top_common) +t = pd.DataFrame(top_common, columns=['弹幕内容', '数量']) +# 导出excel文件 +excel_path = 'top_ball_danmu.xlsx' +t.to_excel(excel_path, index=False) \ No newline at end of file