import re
import pandas as pd

# 读取文本
def read_file(file_path):
    with open(file_path, 'r', encoding='utf-8') as file:
        return file.read()

# 将文本拆分为句子
def split(text):
    # 使用正则表达式将文本分割
    sentences = re.split(r'[.!?。！？]', text)
    return [sentence.strip() for sentence in sentences if sentence.strip()]

# 查找包含关键词的句子并统计关键词出现次数
def find(sentences, keyword, top_n=8):
    keyword_lower = keyword.lower()
    keyword_counts = []

    for sentence in sentences:
        sentence_lower = sentence.lower()
        count = sentence_lower.count(keyword_lower)
        if count > 0:
            keyword_counts.append((sentence, count))

    if not keyword_counts:
        return []

    # 根据关键词出现次数排序，并取前n个
    keyword_counts.sort(key=lambda x: x[1], reverse=True)
    return [sentence for sentence, _ in keyword_counts[:top_n]]

# 将结果保存到Excel文件中
def save(file_path, result_dict):
    with pd.ExcelWriter(file_path, engine='openpyxl') as writer:
        for keyword, sentences in result_dict.items():
            # 如果某个关键词没有找到对应的句子，跳过保存
            if sentences:
                df = pd.DataFrame(sentences, columns=[f'{keyword}'])
                df.to_excel(writer, sheet_name=keyword[:30], index=False)

def main():
    input_file = '3.txt'
    output_file = 'results.xlsx'

    # 要查找的关键词列表
    keywords = [
        'AI', '人工智能', '机器学习', '深度学习', '神经网络', '自动化', '算法', '数据科学',
        '自然语言处理', '计算机视觉', '人工智能技术', 'AI技术', 'AI应用', 'AI模型',
        '大数据', '预测分析', '机器视觉', '自动驾驶',
        '智能推荐', '计算机科学', '人工智能应用',
        '数据分析', '智能化', '情感计算', 'ai', '字幕', '推荐', 'gpt', '机器', '直播', '机翻', '实时', '技术'
    ]

    # 读取文本并拆分为句子
    text = read_file(input_file)
    sentences = split(text)
    result_dict = {}

    # 对每个关键词查找出现次数前八的句子
    for keyword in keywords:
        top_sentences = find(sentences, keyword, top_n=8)
        result_dict[keyword] = top_sentences

    # 将结果保存到Excel文件
    save(output_file, result_dict)

if __name__ == "__main__":
    main()