diff --git a/excle_new.py b/excle_new.py new file mode 100644 index 0000000..176f27d --- /dev/null +++ b/excle_new.py @@ -0,0 +1,69 @@ +import re +import pandas as pd + +# 读取文本 +def read_file(file_path): + with open(file_path, 'r', encoding='utf-8') as file: + return file.read() + +# 将文本拆分为句子 +def split(text): + # 使用正则表达式将文本分割 + sentences = re.split(r'[.!?。!?]', text) + return [sentence.strip() for sentence in sentences if sentence.strip()] + +# 查找包含关键词的句子并统计关键词出现次数 +def find(sentences, keyword, top_n=8): + keyword_lower = keyword.lower() + keyword_counts = [] + + for sentence in sentences: + sentence_lower = sentence.lower() + count = sentence_lower.count(keyword_lower) + if count > 0: + keyword_counts.append((sentence, count)) + + if not keyword_counts: + return [] + + # 根据关键词出现次数排序,并取前n个 + keyword_counts.sort(key=lambda x: x[1], reverse=True) + return [sentence for sentence, _ in keyword_counts[:top_n]] + +# 将结果保存到Excel文件中 +def save(file_path, result_dict): + with pd.ExcelWriter(file_path, engine='openpyxl') as writer: + for keyword, sentences in result_dict.items(): + # 如果某个关键词没有找到对应的句子,跳过保存 + if sentences: + df = pd.DataFrame(sentences, columns=[f'{keyword}']) + df.to_excel(writer, sheet_name=keyword[:30], index=False) + +def main(): + input_file = '3.txt' + output_file = 'results.xlsx' + + # 要查找的关键词列表 + keywords = [ + 'AI', '人工智能', '机器学习', '深度学习', '神经网络', '自动化', '算法', '数据科学', + '自然语言处理', '计算机视觉', '人工智能技术', 'AI技术', 'AI应用', 'AI模型', + '大数据', '预测分析', '机器视觉', '自动驾驶', + '智能推荐', '计算机科学', '人工智能应用', + '数据分析', '智能化', '情感计算', 'ai', '字幕', '推荐', 'gpt', '机器', '直播', '机翻', '实时', '技术' + ] + + # 读取文本并拆分为句子 + text = read_file(input_file) + sentences = split(text) + result_dict = {} + + # 对每个关键词查找出现次数前八的句子 + for keyword in keywords: + top_sentences = find(sentences, keyword, top_n=8) + result_dict[keyword] = top_sentences + + # 将结果保存到Excel文件 + save(output_file, result_dict) + +if __name__ == "__main__": + main()