生成Excel

2 years ago · 7acc347f8c
parent eafe8d738e
commit 7acc347f8c
1 changed files with 83 additions and 0 deletions
--- a/excle.py
+++ b/excle.py
@ -0,0 +1,83 @@
+import re
+import pandas as pd
+
+# 读取文本
+def read_file(file_path):
+    with open(file_path, 'r', encoding='utf-8') as file:
+        return file.read()
+# 将文本拆分为句子
+def split_into_sentences(text):
+    # 使用正则表达式将文本分割
+    sentences = re.split(r'[.!?。！？]', text)
+    return [sentence.strip() for sentence in sentences if sentence.strip()]
+
+# 查找包含关键词的句子并统计关键词出现次数
+def find_top_sentences_by_keyword(sentences, keyword, top_n=8):
+    keyword_counts = []
+    for sentence in sentences:
+        count = sentence.lower().count(keyword.lower())
+        if count > 0:
+            keyword_counts.append((sentence, count))
+
+    # 根据关键词出现次数排序，并取前n个
+    keyword_counts.sort(key=lambda x: x[1], reverse=True)
+    return [sentence for sentence, _ in keyword_counts[:top_n]]
+
+
+# 将结果保存到Excel文件中
+def save_to_excel(file_path, result_dict):
+    writer = pd.ExcelWriter(file_path, engine='openpyxl')
+
+    for keyword, sentences in result_dict.items():
+        df = pd.DataFrame(sentences, columns=[f'{keyword} '])
+        df.to_excel(writer, sheet_name=keyword[:30], index=False)
+
+    writer.close()
+
+
+
+def main():
+    input_file = '3.txt'
+    output_file = 'results.xlsx'
+
+    # 要查找的关键词列表
+    keywords = [
+        'AI', '人工智能', '机器学习', '深度学习', '神经网络', '自动化', '算法', '数据科学',
+        '自然语言处理', '计算机视觉', '人工智能技术', 'AI技术', 'AI应用', 'AI模型',
+        '大数据', '预测分析', '机器视觉', '自动驾驶',
+        '智能推荐', '计算机科学', '人工智能应用',
+        '数据分析', '智能化', '情感计算', 'ai', '字幕', '推荐', 'gpt', '机器', '直播', '机翻', '实时', '技术'
+    ]
+
+    # 读取文本并拆分为句子
+    text = read_file(input_file)
+    sentences = split_into_sentences(text)
+    result_dict = {}
+
+    # 对每个关键词查找出现次数前八的句子
+    for keyword in keywords:
+        top_sentences = find_top_sentences_by_keyword(sentences, keyword, top_n=8)
+        result_dict[keyword] = top_sentences
+
+    # 将结果保存到Excel文件
+    save_to_excel(output_file, result_dict)
+
+if __name__ == "__main__":
+    main()
+import cProfile
+import pstats
+import 输出
+
+profiler = cProfile.Profile()
+profiler.enable()
+
+# 执行主函数
+输出.main()
+
+profiler.disable()
+
+# 输出性能分析结果到文本文件
+with open("profile_results1.txt", "w") as f:
+    ps = pstats.Stats(profiler, stream=f)
+    ps.sort_stats('cumulative')
+    ps.print_stats()