parent
f84f95ec88
commit
2a7cee1ecd
@ -0,0 +1,48 @@
|
|||||||
|
import os
|
||||||
|
import re
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
from openpyxl import Workbook
|
||||||
|
|
||||||
|
# 定义文件路径
|
||||||
|
test_path = 'd:\\学习\\软件工程\\swork\\res\\total300.txt'
|
||||||
|
keywords = ['VR', 'ai', '元宇宙', '超高清', '3D', 'gpt', '建模', '大模型', 'nlp', 'cv', 'openai', '智慧体育', '计算机', '虚拟','ai音效','ai视频','AI修复']
|
||||||
|
|
||||||
|
# 初始化一个字典来存储每个关键词的计数
|
||||||
|
keyword_count = {keyword: 0 for keyword in keywords}
|
||||||
|
|
||||||
|
# 检查文件是否存在
|
||||||
|
if os.path.exists(test_path):
|
||||||
|
# 读取原始文本文件
|
||||||
|
with open(test_path, 'r', encoding='utf-8') as file:
|
||||||
|
lines = file.readlines()
|
||||||
|
|
||||||
|
# 检查每一行是否包含关键词,并统计数量
|
||||||
|
for line in lines:
|
||||||
|
for keyword in keywords:
|
||||||
|
# 对于英文关键词,确保前后不是英文字符
|
||||||
|
if re.match(r'^[a-zA-Z]+$', keyword):
|
||||||
|
pattern = r'(?<![a-zA-Z])' + re.escape(keyword) + r'(?![a-zA-Z])'
|
||||||
|
if re.search(pattern, line, re.IGNORECASE):
|
||||||
|
keyword_count[keyword] += 1
|
||||||
|
# 对于中文关键词,直接寻找
|
||||||
|
elif keyword in line:
|
||||||
|
keyword_count[keyword] += 1
|
||||||
|
|
||||||
|
# 创建一个新的 Excel 工作簿
|
||||||
|
wb = Workbook()
|
||||||
|
ws = wb.active
|
||||||
|
ws.title = "AI应用弹幕统计"
|
||||||
|
savepath = "d:\\学习\\软件工程\\swork\\res\\AI应用弹幕统计结果.xlsx"
|
||||||
|
# 添加表头
|
||||||
|
ws.append(['AI应用', '出现数量'])
|
||||||
|
|
||||||
|
# 将关键词和计数写入 Excel 工作表
|
||||||
|
for keyword, count in sorted(keyword_count.items(), key=lambda item: item[1], reverse=True):
|
||||||
|
if count > 0:
|
||||||
|
ws.append([keyword, count])
|
||||||
|
|
||||||
|
# 保存 Excel 文件
|
||||||
|
wb.save(savepath)
|
||||||
|
else:
|
||||||
|
print("文件不存在\n")
|
Loading…
Reference in new issue