parent
1bc34bff47
commit
685ee40a1e
@ -0,0 +1,35 @@
|
||||
import pandas as pd
|
||||
from openpyxl import Workbook
|
||||
import re
|
||||
|
||||
def normalize_bullet_comment(commet):
|
||||
#归一化类似的弹幕
|
||||
if re.search(r'哈{2,}',commet):
|
||||
return '哈哈哈'
|
||||
return commet
|
||||
|
||||
def is_airelated(comment):
|
||||
ai_keywords = ['ai','神经网络','机器学习','AI','人工智能','深度学习']
|
||||
return any(keyword in comment for keyword in ai_keywords)
|
||||
|
||||
#读取弹幕
|
||||
with open('弹幕.txt','r',encoding='utf-8') as file:
|
||||
bullet_comments = file.readlines()
|
||||
|
||||
#处理弹幕
|
||||
bullet_comments = [normalize_bullet_comment(line.strip()) for line in bullet_comments]
|
||||
|
||||
#统计包含关键词的弹幕
|
||||
ai_related_comments = [comment for comment in bullet_comments if is_airelated(comment)]
|
||||
count_series = pd.Series(ai_related_comments).value_counts()
|
||||
top_8 = count_series.head(8)
|
||||
|
||||
#创建DataFrame并保存到Excel
|
||||
df = pd.DataFrame({
|
||||
'弹幕':top_8.index,
|
||||
'数量':top_8.values
|
||||
})
|
||||
|
||||
df.to_excel('aiTop8.xlsx',index=False,sheet_name='Top 8 Bullet Comments')
|
||||
|
||||
|
Loading…
Reference in new issue