import re import pandas as pd # 读取文本 def read_file(file_path): with open(file_path, 'r', encoding='utf-8') as file: return file.read() # 将文本拆分为句子 def split_into_sentences(text): # 使用正则表达式将文本分割 sentences = re.split(r'[.!?。!?]', text) return [sentence.strip() for sentence in sentences if sentence.strip()] # 查找包含关键词的句子并统计关键词出现次数 def find_top_sentences_by_keyword(sentences, keyword, top_n=8): keyword_counts = [] for sentence in sentences: count = sentence.lower().count(keyword.lower()) if count > 0: keyword_counts.append((sentence, count)) # 根据关键词出现次数排序,并取前n个 keyword_counts.sort(key=lambda x: x[1], reverse=True) return [sentence for sentence, _ in keyword_counts[:top_n]] # 将结果保存到Excel文件中 def save_to_excel(file_path, result_dict): writer = pd.ExcelWriter(file_path, engine='openpyxl') for keyword, sentences in result_dict.items(): df = pd.DataFrame(sentences, columns=[f'{keyword} ']) df.to_excel(writer, sheet_name=keyword[:30], index=False) writer.close() def main(): input_file = '3.txt' output_file = 'results.xlsx' # 要查找的关键词列表 keywords = [ 'AI', '人工智能', '机器学习', '深度学习', '神经网络', '自动化', '算法', '数据科学', '自然语言处理', '计算机视觉', '人工智能技术', 'AI技术', 'AI应用', 'AI模型', '大数据', '预测分析', '机器视觉', '自动驾驶', '智能推荐', '计算机科学', '人工智能应用', '数据分析', '智能化', '情感计算', 'ai', '字幕', '推荐', 'gpt', '机器', '直播', '机翻', '实时', '技术' ] # 读取文本并拆分为句子 text = read_file(input_file) sentences = split_into_sentences(text) result_dict = {} # 对每个关键词查找出现次数前八的句子 for keyword in keywords: top_sentences = find_top_sentences_by_keyword(sentences, keyword, top_n=8) result_dict[keyword] = top_sentences # 将结果保存到Excel文件 save_to_excel(output_file, result_dict) if __name__ == "__main__": main() import cProfile import pstats import 输出 profiler = cProfile.Profile() profiler.enable() # 执行主函数 输出.main() profiler.disable() # 输出性能分析结果到文本文件 with open("profile_results1.txt", "w") as f: ps = pstats.Stats(profiler, stream=f) ps.sort_stats('cumulative') ps.print_stats()