import pandas as pd import re from collections import defaultdict, Counter def comments_analysis(search_word, keywords_list): print("正在处理弹幕数据...") # 读取弹幕文件 def read_comments(file_path): with open(file_path, 'r', encoding='utf-8') as f: comments = f.readlines() return [comment.strip() for comment in comments] # 统计与 AI 技术应用相关的弹幕数量,并记录匹配的弹幕 def count_comments(comments, keywords): count = Counter() matched_comments = defaultdict(list) # 存储每个关键词对应的匹配弹幕 # 提前将关键词转化为小写 keywords_lower = {keyword.lower() for keyword in keywords} for comment in comments: comment_lower = comment.lower() # 将弹幕转换为小写 for keyword in keywords_lower: if keyword == 'ai': # 使用正则表达式判断 "AI" 前后是否有英文字母 if re.search(r'(?