You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
41 lines
1.0 KiB
41 lines
1.0 KiB
2 months ago
|
import pandas as pd
|
||
|
from xlsxwriter import Workbook
|
||
|
|
||
|
# 文件路径
|
||
|
input_file = 'danmu.txt' # 输入文件名
|
||
|
output_file = '弹幕统计.xlsx' # 输出文件名
|
||
|
|
||
|
# 从文本文件中读取弹幕数据
|
||
|
with open(input_file, 'r', encoding='utf-8') as file:
|
||
|
danmaku_list = [line.strip() for line in file]
|
||
|
|
||
|
# 创建DataFrame
|
||
|
df = pd.DataFrame(danmaku_list, columns=['弹幕'])
|
||
|
|
||
|
# 计算每个弹幕的出现次数并获取前20名
|
||
|
counts = df['弹幕'].value_counts()
|
||
|
top_20 = counts.head(20)
|
||
|
|
||
|
# 输出统计结果
|
||
|
print("排名前8的弹幕及其数量:")
|
||
|
print(top_20)
|
||
|
|
||
|
# 创建Excel文件
|
||
|
workbook = Workbook(output_file)
|
||
|
worksheet = workbook.add_worksheet()
|
||
|
|
||
|
# 写入标题行
|
||
|
worksheet.write(0, 0, "弹幕")
|
||
|
worksheet.write(0, 1, "数量")
|
||
|
|
||
|
# 写入数据
|
||
|
row = 1 # 数据开始的行号
|
||
|
for index, value in top_20.items():
|
||
|
worksheet.write(row, 0, index)
|
||
|
worksheet.write(row, 1, value)
|
||
|
row += 1
|
||
|
|
||
|
# 关闭工作簿
|
||
|
workbook.close()
|
||
|
|
||
|
print(f"统计数据已保存至 {output_file}")
|