diff --git a/BillBullet_classification.py b/BillBullet_classification.py new file mode 100644 index 0000000..8420df0 --- /dev/null +++ b/BillBullet_classification.py @@ -0,0 +1,24 @@ +from collections import Counter + +# 读取文件内容 +with open('弹幕.txt', 'r', encoding='utf-8') as file: + lines = file.readlines() + +# 定义与AI技术相关的关键字 +keywords = ['AI', '人工智能', '机器学习', '深度学习', '神经网络', '大数据', '强化学习', '计算机视觉', '计算机', + '图形学', '算法', '预测模型', '智能'] + +# 过滤包含关键字的弹幕 +ai_danmus = [line for line in lines if any(keyword in line for keyword in keywords)] + +# 统计弹幕出现次数 +counter = Counter(ai_danmus) + +# 获取数量前8的弹幕 +top_8_danmus = counter.most_common(8) + +# 打印结果 +with open('AI.txt', mode='w', encoding='utf-8') as f: + for danmu, count in top_8_danmus: + print(f"弹幕: {danmu.strip()} | 数量: {count}") + f.write(danmu.strip() + '\n')