You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

78 lines
2.4 KiB

"""
读取弹幕数据并统计频率然后将统计结果保存到Excel文件中
"""
import pandas as pd
def load_danmu(file_path):
"""从文件中读取弹幕数据"""
with open(file_path, 'r', encoding='utf-8') as f:
return f.readlines()
"""
读取弹幕数据并统计频率然后将统计结果保存到Excel文件中
"""
import pandas as pd
from concurrent.futures import ThreadPoolExecutor
def load_danmu(file_path):
"""从文件中读取弹幕数据"""
with open(file_path, 'r', encoding='utf-8') as f:
return f.readlines()
def count_danmu(danmu_list):
"""统计每条弹幕出现的次数"""
all_danmus = {}
with ThreadPoolExecutor() as executor:
for danmu in executor.map(str.strip, danmu_list):
all_danmus[danmu] = all_danmus.get(danmu, 0) + 1
return all_danmus
def save_to_excel(all_danmus, excel_file):
"""将弹幕频率统计结果保存到Excel文件中"""
sorted_danmus = sorted(all_danmus.items(), key=lambda x: x[1], reverse=True)
df = pd.DataFrame(sorted_danmus, columns=['danmu', 'count'])
df.to_excel(excel_file, index=False)
def main():
"""读取弹幕数据、统计频率并保存到Excel"""
danmu_file_path = '/output/danmu.txt'
excel_file = '/output/All_Danmu.xlsx'
danmu_list = load_danmu(danmu_file_path)
all_danmus = count_danmu(danmu_list)
save_to_excel(all_danmus, excel_file)
print("所有弹幕数据统计完成并已保存到Excel表格")
if __name__ == '__main__':
main()
def count_danmu(danmu_list):
"""统计每条弹幕出现的次数"""
all_danmus = {}
for danmu in danmu_list:
danmu = danmu.strip()
all_danmus[danmu] = all_danmus.get(danmu, 0) + 1
return all_danmus
def save_to_excel(all_danmus, excel_file):
"""将弹幕频率统计结果保存到Excel文件中"""
sorted_danmus = sorted(all_danmus.items(), key=lambda x: x[1], reverse=True)
df = pd.DataFrame(sorted_danmus, columns=['danmu', 'count'])
df.to_excel(excel_file, index=False)
def main():
"""读取弹幕数据、统计频率并保存到Excel"""
danmu_file_path = '/output/danmu.txt'
excel_file = '/output/All_Danmu.xlsx'
danmu_list = load_danmu(danmu_file_path)
all_danmus = count_danmu(danmu_list)
save_to_excel(all_danmus, excel_file)
print("所有弹幕数据统计完成并已保存到Excel表格")
if __name__ == '__main__':
main()