1022011512/软工个人作业——检测关键词并将其写入Excel的程序.py

#软工个人作业——检测并将写入Excel的程序
import collections
import re
import pandas as pd


# 定义关键词列表
keywords = ['AI', "人工智能", 'ai']

# 读取文本文件
with open('all_danmaku.txt', 'r', encoding='utf-8') as file:
    lines = file.readlines()

# 初始化一个Counter对象
counter = collections.Counter()

# 遍历每一行，检测关键词并计数
for line in lines:
    line_lower = line.lower()  # 将行转换为小写以进行不区分大小写的匹配
    for keyword in keywords:
        # 使用正则表达式匹配独立的关键词
        if keyword.lower() == 'ai':
            # 只在中文字符的上下文中匹配独立的“ai”单词
            if re.search(r'[\u4e00-\u9fff]ai[\u4e00-\u9fff]', line_lower):
                counter[line.strip()] += 1
                break  # 避免同一行多次计数
        else:
            if keyword.lower() in line_lower:
                counter[line.strip()] += 1
                break  # 避免同一行多次计数

# 获取出现次数最多的前八个句子
most_common_lines = counter.most_common(8)

# 打印结果
for line, count in most_common_lines:
    print(f"{line}: {count} 次")

# 创建一个DataFrame
df = pd.DataFrame(most_common_lines, columns=['句子', '出现次数'])

# 将DataFrame保存到Excel文件
df.to_excel('排名表格.xlsx', index=False)

print("排名表格已保存到排名表格.xlsx")
ADD file via upload 2 months ago			`#软工个人作业——检测并将写入Excel的程序`
			`import collections`
			`import re`
			`import pandas as pd`



			`# 定义关键词列表`
			`keywords = ['AI', "人工智能", 'ai']`

			`# 读取文本文件`
			`with open('all_danmaku.txt', 'r', encoding='utf-8') as file:`
			`lines = file.readlines()`

			`# 初始化一个Counter对象`
			`counter = collections.Counter()`

			`# 遍历每一行，检测关键词并计数`
			`for line in lines:`
			`line_lower = line.lower() # 将行转换为小写以进行不区分大小写的匹配`
			`for keyword in keywords:`
			`# 使用正则表达式匹配独立的关键词`
			`if keyword.lower() == 'ai':`
			`# 只在中文字符的上下文中匹配独立的“ai”单词`
			`if re.search(r'[\u4e00-\u9fff]ai[\u4e00-\u9fff]', line_lower):`
			`counter[line.strip()] += 1`
			`break # 避免同一行多次计数`
			`else:`
			`if keyword.lower() in line_lower:`
			`counter[line.strip()] += 1`
			`break # 避免同一行多次计数`

			`# 获取出现次数最多的前八个句子`
			`most_common_lines = counter.most_common(8)`

			`# 打印结果`
			`for line, count in most_common_lines:`
			`print(f"{line}: {count} 次")`

			`# 创建一个DataFrame`
			`df = pd.DataFrame(most_common_lines, columns=['句子', '出现次数'])`

			`# 将DataFrame保存到Excel文件`
			`df.to_excel('排名表格.xlsx', index=False)`

			`print("排名表格已保存到排名表格.xlsx")`