102201401/2024巴黎奥运会弹幕前八.py

import pandas as pd
from openpyxl import Workbook
import re

def normalize_bullet_comment(commet):
    #归一化类似的弹幕
    if re.search(r'哈{2,}',commet):
        return '哈哈哈'
    return commet

def is_airelated(comment):
    ai_keywords = ['ai','科技','机器','个性化','人机']
    return any(keyword in comment for keyword in ai_keywords)

#读取弹幕
with open('巴黎弹幕.txt','r',encoding='utf-8') as file:
    bullet_comments = file.readlines()

#处理弹幕
bullet_comments = [normalize_bullet_comment(line.strip()) for line in bullet_comments]

#统计包含关键词的弹幕
ai_related_comments = [comment for comment in bullet_comments if is_airelated(comment)]
count_series = pd.Series(ai_related_comments).value_counts()
top_8 = count_series.head(8)

#创建DataFrame并保存到Excel
df = pd.DataFrame({
    '弹幕':top_8.index,
    '数量':top_8.values
})

df.to_excel('弹幕_aiTop8.xlsx',index=False,sheet_name='Top 8 Bullet Comments')
ADD file via upload 3 months ago			`import pandas as pd`
			`from openpyxl import Workbook`
			`import re`

			`def normalize_bullet_comment(commet):`
			`#归一化类似的弹幕`
			`if re.search(r'哈{2,}',commet):`
			`return '哈哈哈'`
			`return commet`

			`def is_airelated(comment):`
			`ai_keywords = ['ai','科技','机器','个性化','人机']`
			`return any(keyword in comment for keyword in ai_keywords)`

			`#读取弹幕`
			`with open('巴黎弹幕.txt','r',encoding='utf-8') as file:`
			`bullet_comments = file.readlines()`

			`#处理弹幕`
			`bullet_comments = [normalize_bullet_comment(line.strip()) for line in bullet_comments]`

			`#统计包含关键词的弹幕`
			`ai_related_comments = [comment for comment in bullet_comments if is_airelated(comment)]`
			`count_series = pd.Series(ai_related_comments).value_counts()`
			`top_8 = count_series.head(8)`

			`#创建DataFrame并保存到Excel`
			`df = pd.DataFrame({`
			`'弹幕':top_8.index,`
			`'数量':top_8.values`
			`})`

			`df.to_excel('弹幕_aiTop8.xlsx',index=False,sheet_name='Top 8 Bullet Comments')`