You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

66 lines
2.7 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

import os
import re
import os
import re
from openpyxl import Workbook
def writeintxt (words,outpath):
with open(outpath, 'w', encoding='utf-8') as output_file:
for line in words:
output_file.write(line)
def findfrecuency(txt_path,keywords,outpath):
if os.path.exists(txt_path):
# 读取原始文本文件,按行读入lines
resultword = []
with open(txt_path, 'r', encoding='utf-8') as file:
lines = file.readlines()
for line in lines:
for keyword in keywords:
# 对于英文关键词,确保前后不是英文字符
if re.match(r'^[a-zA-Z]+$', keyword):
pattern = r'(?<![a-zA-Z])' + re.escape(keyword) + r'(?![a-zA-Z])'
if re.search(pattern, line, re.IGNORECASE):
keyword_count[keyword] += 1
resultword.append(line)
# 对于中文关键词,直接寻找
elif keyword in line:
keyword_count[keyword] += 1
resultword.append(line)
resultword1 = list(set(resultword))
writeintxt(resultword1,outpath) #去重
return keyword_count#返回字典
else:
print("文件不存在\n")
return {}
def writeinexcel (keyword_count,savepath):
if keyword_count == {}:
print("没有筛选到关键词\n")
else:
# 创建一个新的 Excel 工作簿
wb = Workbook()
ws = wb.active
ws.title = "AI应用弹幕统计"
ws.append(['AI应用', '出现数量'])
# 将关键词和计数写入 Excel 工作表
for keyword, count in sorted(keyword_count.items(), key=lambda item: item[1], reverse=True):
if count > 0:
ws.append([keyword, count])
# 保存 Excel 文件
wb.save(savepath)
if __name__ =='__main__':
# 定义文件路径
txt_path = 'd:\\学习\\软件工程\\swork\\res\\total300_3.txt'#总弹幕数据
keywords = ['VR', 'ai', '元宇宙', '超高清', '3D', 'gpt', '建模', '大模型', 'nlp', 'cv', 'openai', '智慧体育', '计算机', '虚拟','ai音效','ai视频','AI修复']
# 初始化一个字典来存储每个关键词的计数
keyword_count = {keyword: 0 for keyword in keywords}
#定义保存位置
savepath = "d:\\学习\\软件工程\\swork\\res\\AI_dm_rank4.xlsx"#存放排名个数excel
outpath = "d:\\学习\\软件工程\\swork\\res\\select_ai4.txt"#存放有含ai技术评论的评价txt
keyword_count = findfrecuency(txt_path,keywords,outpath) #得到排名字典
writeinexcel (keyword_count,savepath) #将排名写入excel