You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

70 lines
2.7 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

import os
import pandas as pd
def merge_csv_to_excel(folder_path, output_file):
# 合并所有弹幕csv文件为一个excel文件
all_data = pd.DataFrame()
# 遍历文件夹中的所有文件
for file_name in os.listdir(folder_path):
if file_name.endswith('.csv'): # 只处理csv文件
file_path = os.path.join(folder_path, file_name) # 获取完整路径
print(f"正在读取文件: {file_path}")
# 读取csv文件并追加到all_data中
df = pd.read_csv(file_path)
all_data = pd.concat([all_data, df], ignore_index=True) # 合并DataFrame
# 将合并后的DataFrame保存到新的excel文件
with pd.ExcelWriter(output_file, engine='xlsxwriter') as writer:
all_data.to_excel(writer, index=False, sheet_name='MergedData')
print(f"所有弹幕csv文件已合并并保存到: {output_file}")
def analyze_danmu(input_file, output_nums_file):
# 分析合并后的excel文件统计与AI相关的弹幕数量
df = pd.read_excel(input_file)
# 假设弹幕内容在名为 '弹幕文本' 的列中
if '弹幕文本' not in df.columns:
print("弹幕数据列未找到,请检查列名。")
print("读取的列名:", df.columns.tolist()) # 打印出读取的列名以帮助调试
return
# 定义与 AI 技术应用相关的关键词
ai_keywords = ['AI', '人工智能', '机器学习', '深度学习']
# 筛选包含 AI 相关关键词的弹幕
filtered_danmu = df[df['弹幕文本'].str.contains('|'.join(ai_keywords), na=False)]
# 统计每种弹幕出现的次数
danmu_counts = filtered_danmu['弹幕文本'].value_counts()
# 获取数量排名前8的弹幕
top_danmu = danmu_counts.head(8)
# 输出结果到控制台
print("数量排名前8的弹幕")
print(top_danmu)
# 将结果写入 ai.xlsx 文件
top_danmu_df = top_danmu.reset_index()
top_danmu_df.columns = ['弹幕文本', '出现次数'] # 重命名列
with pd.ExcelWriter(output_nums_file, engine='xlsxwriter') as writer:
top_danmu_df.to_excel(writer, index=False, sheet_name='TopDanmu')
print(f"分析结果已保存到: {output_nums_file}")
if __name__ == '__main__':
folder_path = '弹幕csv' # 替换为你的 CSV 文件夹路径
output_file = '合并弹幕.xlsx' # 合并后的输出文件名
output_nums_file = 'ai.xlsx' # 分析结果输出文件名
# 合并 CSV 文件为 Excel
merge_csv_to_excel(folder_path, output_file)
# 分析弹幕数据并保存结果
analyze_danmu(output_file, output_nums_file)