|
|
@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
import re
|
|
|
|
|
|
|
|
import pandas as pd
|
|
|
|
|
|
|
|
from collections import Counter
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# 读取文本
|
|
|
|
|
|
|
|
def read_file(file_path):
|
|
|
|
|
|
|
|
with open(file_path, 'r', encoding='utf-8') as file:
|
|
|
|
|
|
|
|
return file.read()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# 将文本拆分为句子
|
|
|
|
|
|
|
|
def split_into_sentences(text):
|
|
|
|
|
|
|
|
# 使用正则表达式将文本分割
|
|
|
|
|
|
|
|
sentences = re.split(r'[.!?。!?]', text)
|
|
|
|
|
|
|
|
return [sentence.strip() for sentence in sentences if sentence.strip()]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# 查找包含关键词的句子并统计关键词出现次数
|
|
|
|
|
|
|
|
def find_top_sentences_by_keyword(sentences, keyword, top_n=8):
|
|
|
|
|
|
|
|
keyword_counts = []
|
|
|
|
|
|
|
|
for sentence in sentences:
|
|
|
|
|
|
|
|
count = sentence.lower().count(keyword.lower())
|
|
|
|
|
|
|
|
if count > 0:
|
|
|
|
|
|
|
|
keyword_counts.append((sentence, count))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# 根据关键词出现次数排序,并取前n个
|
|
|
|
|
|
|
|
keyword_counts.sort(key=lambda x: x[1], reverse=True)
|
|
|
|
|
|
|
|
return [sentence for sentence, _ in keyword_counts[:top_n]]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# 将结果保存到TXT文件中
|
|
|
|
|
|
|
|
def save_to_file(file_path, keyword, sentences):
|
|
|
|
|
|
|
|
with open(file_path, 'a', encoding='utf-8') as file:
|
|
|
|
|
|
|
|
file.write(f"关键词: {keyword}\n")
|
|
|
|
|
|
|
|
for i, sentence in enumerate(sentences, 1):
|
|
|
|
|
|
|
|
file.write(f"{i}. {sentence}\n")
|
|
|
|
|
|
|
|
file.write("\n") # 分隔不同关键词的结果
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# 主函数
|
|
|
|
|
|
|
|
def main():
|
|
|
|
|
|
|
|
# 输入TXT文件路径
|
|
|
|
|
|
|
|
input_file = '3.txt'
|
|
|
|
|
|
|
|
output_file = 'output.txt'
|
|
|
|
|
|
|
|
# 要查找的关键词列表
|
|
|
|
|
|
|
|
keywords = [
|
|
|
|
|
|
|
|
'AI', '人工智能', '机器学习', '深度学习', '神经网络', '自动化', '算法', '数据科学',
|
|
|
|
|
|
|
|
'自然语言处理', '计算机视觉', '人工智能技术', 'AI技术', 'AI应用', 'AI模型',
|
|
|
|
|
|
|
|
'大数据', '预测分析', '机器视觉', '自动驾驶',
|
|
|
|
|
|
|
|
'智能推荐', '计算机科学', '人工智能应用',
|
|
|
|
|
|
|
|
'数据分析','智能化', '情感计算','ai','字幕','推荐','gpt','机器','直播','机翻','实时','技术'
|
|
|
|
|
|
|
|
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# 读取文本并拆分为句子
|
|
|
|
|
|
|
|
text = read_file(input_file)
|
|
|
|
|
|
|
|
sentences = split_into_sentences(text)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# 对每个关键词查找出现次数前八的句子
|
|
|
|
|
|
|
|
for keyword in keywords:
|
|
|
|
|
|
|
|
top_sentences = find_top_sentences_by_keyword(sentences, keyword, top_n=8)
|
|
|
|
|
|
|
|
save_to_file(output_file, keyword, top_sentences)
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
|
|
|
|
main()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|