You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

54 lines
2.0 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

ai_list = [] # 存放AI弹幕列表
find = open('爬取的总弹幕.txt',encoding='utf-8')
keywords = ['AI','Ai','ai','智能','人工智能','aI'] # 关键词
for aifind in find:
if any(i in aifind for i in keywords)==True: # 对每个弹幕进行判断,是否至少包含一个关键词
print(aifind)
ai_list.append(aifind) # 写入AI弹幕列表
print(ai_list)
for aifind in ai_list:
with open('AI弹幕.txt',mode='a',encoding='utf-8') as f:
f.write(aifind) # 写入AI弹幕.txt文本
for aifind in ai_list:
with open('AI弹幕.xls',mode='a',encoding='utf-8') as f:
f.write(aifind) # 写入AI弹幕.xls文本
from collections import Counter
# 存放AI弹幕列表
ai_list = []
keywords = ['AI', 'Ai', 'ai', '智能', '人工智能', 'aI'] # 关键词
# 读取弹幕文件并筛选出包含关键词的弹幕
with open('爬取的总弹幕.txt', encoding='utf-8') as find:
for aifind in find:
if any(keyword in aifind for keyword in keywords):
ai_list.append(aifind.strip()) # 去除每行末尾的换行符
# 统计弹幕出现的频率
counter = Counter(ai_list)
# 获取出现频率前8的弹幕
top_8_comments = counter.most_common(8)
# 输出前8个弹幕及其数量
print("前八的弹幕:")
for comment, count in top_8_comments:
print(f"{comment}: {count}")
# 将所有筛选出的AI弹幕写入AI弹幕.txt
with open('AI弹幕.txt', mode='w', encoding='utf-8') as f:
for aifind in ai_list:
f.write(aifind + '\n')
# 将前8个弹幕及其数量写入AI弹幕.txt
with open('前八弹幕.txt', mode='w', encoding='utf-8') as f:
f.write("Top 8 'AI' related comments:\n")
for comment, count in top_8_comments:
f.write(f"{comment}: {count}\n")
# 如果需要写入Excel文件中请使用pandas库
import pandas as pd
# 将统计数据写入Excel文件
df = pd.DataFrame(top_8_comments, columns=['Comment', 'Count'])
df.to_excel('前八弹幕.xlsx', index=False)