From 7ab6a698cb695142b8519d194aab3c3bcf75e665 Mon Sep 17 00:00:00 2001
From: pxf746fmv <yyansheng144@qq.com>
Date: Tue, 17 Sep 2024 23:22:23 +0800
Subject: [PATCH] ADD file via upload

---
 ...�关键词并将其写入Excel的程序.py | 50 +++++++++++++++++++
 1 file changed, 50 insertions(+)
 create mode 100644 软工个人作业——检测关键词并将其写入Excel的程序.py

diff --git a/软工个人作业——检测关键词并将其写入Excel的程序.py b/软工个人作业——检测关键词并将其写入Excel的程序.py
new file mode 100644
index 0000000..9d69e9b
--- /dev/null
+++ b/软工个人作业——检测关键词并将其写入Excel的程序.py
@@ -0,0 +1,50 @@
+#软工个人作业——检测并将写入Excel的程序
+import collections
+import re
+import pandas as pd
+
+#检测关键词，打印语句排名和出现次数
+#使用正则表达式来处理单词侦测问题
+
+
+# 定义关键词列表
+keywords = ['AI', "人工智能", 'ai']
+
+# 读取文本文件
+with open('all_danmaku.txt', 'r', encoding='utf-8') as file:
+    lines = file.readlines()
+
+# 初始化一个Counter对象
+counter = collections.Counter()
+
+# 遍历每一行，检测关键词并计数
+for line in lines:
+    line_lower = line.lower()  # 将行转换为小写以进行不区分大小写的匹配
+    for keyword in keywords:
+        # 使用正则表达式匹配独立的关键词
+        if keyword.lower() == 'ai':
+            # 只在中文字符的上下文中匹配独立的“ai”单词
+            if re.search(r'[\u4e00-\u9fff]ai[\u4e00-\u9fff]', line_lower):
+                counter[line.strip()] += 1
+                break  # 避免同一行多次计数
+        else:
+            if keyword.lower() in line_lower:
+                counter[line.strip()] += 1
+                break  # 避免同一行多次计数
+
+# 获取出现次数最多的前二十个句子
+most_common_lines = counter.most_common(20)
+
+# 创建一个DataFrame
+df = pd.DataFrame(most_common_lines, columns=['句子', '出现次数'])
+
+# 添加排名列
+df['排名'] = df['出现次数'].rank(method='dense', ascending=False).astype(int)
+
+# 重新排列列的顺序
+df = df[['排名', '句子', '出现次数']]
+
+# 将DataFrame写入Excel文件
+df.to_excel('关键词检测结果.xlsx', index=False)
+
+print("Excel文件已生成：关键词检测结果.xlsx")