ADD file via upload

3 months ago · 4d1bb60e99
parent 89cda1dbb3
commit 4d1bb60e99
1 changed files with 36 additions and 0 deletions
--- a/statistic.py
+++ b/statistic.py
@ -0,0 +1,36 @@
+# -*- coding: utf-8 -*-
+import jieba
+import pandas as pd
+from collections import Counter
+
+class DanmakuStatistic:
+    def __init__(self, danmu_path, excel_path):
+        self.danmu_path = danmu_path
+        self.excel_path = excel_path
+        
+        # 读取弹幕数据
+        with open(danmu_path, "r", encoding="utf-8") as f:
+            self.danmu_list = [line.strip() for line in f if line.strip()]
+        
+        # 自定义停用词（可根据需求补充）
+        self.stop_words = {
+            "已三连", "求资料", "打卡", "不错", "很好", "牛逼", "卧槽",
+            "学习", "分享", "感谢", "点赞", "三连", "教程", "B站", "老师"
+        }
+
+    def count_top8(self):
+        """统计Top8高频词"""
+        words = []
+        for danmu in self.danmu_list:
+            # 分词并过滤
+            seg_words = [w for w in jieba.cut(danmu) if len(w) > 1 and w not in self.stop_words]
+            words.extend(seg_words)
+        return Counter(words).most_common(8)
+
+    def export_to_excel(self):
+        """导出统计结果到Excel"""
+        top8 = self.count_top8()
+        df = pd.DataFrame(top8, columns=["关键词", "词频"])
+        df.to_excel(self.excel_path, index=False)
+        print(f"统计数据已保存至{self.excel_path}")
+        return top8