parent
b5abdb95f4
commit
e5e11662a5
@ -0,0 +1,41 @@
|
|||||||
|
import pandas as pd
|
||||||
|
from collections import Counter
|
||||||
|
|
||||||
|
# 读取弹幕文件
|
||||||
|
def read_danmakus_from_file(file_path):
|
||||||
|
with open(file_path, 'r', encoding='utf-8') as file:
|
||||||
|
return [line.strip() for line in file if line.strip()] # 移除空行并去除每行的前后空白字符
|
||||||
|
|
||||||
|
# 统计弹幕
|
||||||
|
def count_danmakus(danmakus):
|
||||||
|
counter = Counter(danmakus)
|
||||||
|
# 返回数量最多的前8项
|
||||||
|
return counter.most_common(8)
|
||||||
|
|
||||||
|
# 保存到Excel
|
||||||
|
def save_to_excel(data, filename):
|
||||||
|
df = pd.DataFrame(data, columns=['Danmaku', 'Count'])
|
||||||
|
df.to_excel(filename, index=False)
|
||||||
|
|
||||||
|
# 筛选与关键词相关的弹幕
|
||||||
|
def filter_keyword_related_danmakus(danmakus, keywords):
|
||||||
|
return [danmaku for danmaku in danmakus if any(keyword in danmaku for keyword in keywords)]
|
||||||
|
|
||||||
|
# 读取弹幕数据
|
||||||
|
danmaku_file_path = '弹幕.txt'
|
||||||
|
danmakus = read_danmakus_from_file(danmaku_file_path)
|
||||||
|
|
||||||
|
# 定义关键词列表
|
||||||
|
keywords = ['AI', '人工智能', '计算机']
|
||||||
|
|
||||||
|
# 筛选与关键词相关的弹幕
|
||||||
|
keyword_related_danmakus = filter_keyword_related_danmakus(danmakus, keywords)
|
||||||
|
|
||||||
|
# 统计弹幕
|
||||||
|
keyword_danmaku_counts = count_danmakus(keyword_related_danmakus)
|
||||||
|
|
||||||
|
# 保存到Excel
|
||||||
|
excel_file_path = 'keyword_related_danmaku_counts.xlsx'
|
||||||
|
save_to_excel(keyword_danmaku_counts, excel_file_path)
|
||||||
|
|
||||||
|
print(f"与关键词相关的弹幕数据统计已保存到 {excel_file_path}")
|
Loading…
Reference in new issue