ADD file via upload

10 months ago · 76527cb1aa
parent 475119e39e
commit 76527cb1aa
1 changed files with 48 additions and 0 deletions
--- a/软工个人作业——检测关键词并将其写入Excel的程序.py
+++ b/软工个人作业——检测关键词并将其写入Excel的程序.py
@ -0,0 +1,48 @@
 #软工个人作业——检测并将写入Excel的程序
 import collections
 import re
 import pandas as pd
 # 定义关键词列表
 keywords = ['AI', "人工智能", 'ai']
 # 读取文本文件
 with open('all_danmaku.txt', 'r', encoding='utf-8') as file:
    lines = file.readlines()
 # 初始化一个Counter对象
 counter = collections.Counter()
 # 遍历每一行，检测关键词并计数
 for line in lines:
    line_lower = line.lower()  # 将行转换为小写以进行不区分大小写的匹配
    for keyword in keywords:
        # 使用正则表达式匹配独立的关键词
        if keyword.lower() == 'ai':
            # 只在中文字符的上下文中匹配独立的“ai”单词
            if re.search(r'[\u4e00-\u9fff]ai[\u4e00-\u9fff]', line_lower):
                counter[line.strip()] += 1
                break  # 避免同一行多次计数
        else:
            if keyword.lower() in line_lower:
                counter[line.strip()] += 1
                break  # 避免同一行多次计数
 # 获取出现次数最多的前八个句子
 most_common_lines = counter.most_common(8)
 # 打印结果
 for line, count in most_common_lines:
    print(f"{line}: {count} 次")
 # 创建一个DataFrame
 df = pd.DataFrame(most_common_lines, columns=['句子', '出现次数'])
 # 将DataFrame保存到Excel文件
 df.to_excel('排名表格.xlsx', index=False)
 print("排名表格已保存到排名表格.xlsx")