diff --git a/弹幕统计.txt b/弹幕统计.txt new file mode 100644 index 0000000..7cc0535 --- /dev/null +++ b/弹幕统计.txt @@ -0,0 +1,41 @@ +import pandas as pd +from xlsxwriter import Workbook + +# 文件路径 +input_file = 'danmu.txt' # 输入文件名 +output_file = '弹幕统计.xlsx' # 输出文件名 + +# 从文本文件中读取弹幕数据 +with open(input_file, 'r', encoding='utf-8') as file: + danmaku_list = [line.strip() for line in file] + +# 创建DataFrame +df = pd.DataFrame(danmaku_list, columns=['弹幕']) + +# 计算每个弹幕的出现次数并获取前20名 +counts = df['弹幕'].value_counts() +top_20 = counts.head(20) + +# 输出统计结果 +print("排名前8的弹幕及其数量:") +print(top_20) + +# 创建Excel文件 +workbook = Workbook(output_file) +worksheet = workbook.add_worksheet() + +# 写入标题行 +worksheet.write(0, 0, "弹幕") +worksheet.write(0, 1, "数量") + +# 写入数据 +row = 1 # 数据开始的行号 +for index, value in top_20.items(): + worksheet.write(row, 0, index) + worksheet.write(row, 1, value) + row += 1 + +# 关闭工作簿 +workbook.close() + +print(f"统计数据已保存至 {output_file}") \ No newline at end of file