You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

41 lines
1.0 KiB

import pandas as pd
from xlsxwriter import Workbook
# 文件路径
input_file = 'danmu.txt' # 输入文件名
output_file = '弹幕统计.xlsx' # 输出文件名
# 从文本文件中读取弹幕数据
with open(input_file, 'r', encoding='utf-8') as file:
danmaku_list = [line.strip() for line in file]
# 创建DataFrame
df = pd.DataFrame(danmaku_list, columns=['弹幕'])
# 计算每个弹幕的出现次数并获取前20名
counts = df['弹幕'].value_counts()
top_20 = counts.head(20)
# 输出统计结果
print("排名前8的弹幕及其数量:")
print(top_20)
# 创建Excel文件
workbook = Workbook(output_file)
worksheet = workbook.add_worksheet()
# 写入标题行
worksheet.write(0, 0, "弹幕")
worksheet.write(0, 1, "数量")
# 写入数据
row = 1 # 数据开始的行号
for index, value in top_20.items():
worksheet.write(row, 0, index)
worksheet.write(row, 1, value)
row += 1
# 关闭工作簿
workbook.close()
print(f"统计数据已保存至 {output_file}")