diff --git a/excel.py b/excel.py new file mode 100644 index 0000000..4fadb71 --- /dev/null +++ b/excel.py @@ -0,0 +1,41 @@ +import pandas as pd +from collections import Counter + +# 读取弹幕文件 +def read_danmakus_from_file(file_path): + with open(file_path, 'r', encoding='utf-8') as file: + return [line.strip() for line in file if line.strip()] # 移除空行并去除每行的前后空白字符 + +# 统计弹幕 +def count_danmakus(danmakus): + counter = Counter(danmakus) + # 返回数量最多的前8项 + return counter.most_common(8) + +# 保存到Excel +def save_to_excel(data, filename): + df = pd.DataFrame(data, columns=['Danmaku', 'Count']) + df.to_excel(filename, index=False) + +# 筛选与关键词相关的弹幕 +def filter_keyword_related_danmakus(danmakus, keywords): + return [danmaku for danmaku in danmakus if any(keyword in danmaku for keyword in keywords)] + +# 读取弹幕数据 +danmaku_file_path = '弹幕.txt' +danmakus = read_danmakus_from_file(danmaku_file_path) + +# 定义关键词列表 +keywords = ['AI', '人工智能', '计算机'] + +# 筛选与关键词相关的弹幕 +keyword_related_danmakus = filter_keyword_related_danmakus(danmakus, keywords) + +# 统计弹幕 +keyword_danmaku_counts = count_danmakus(keyword_related_danmakus) + +# 保存到Excel +excel_file_path = 'keyword_related_danmaku_counts.xlsx' +save_to_excel(keyword_danmaku_counts, excel_file_path) + +print(f"与关键词相关的弹幕数据统计已保存到 {excel_file_path}") \ No newline at end of file