You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

66 lines
2.7 KiB

2 months ago
import os
import re
import os
import re
from openpyxl import Workbook
def writeintxt (words,outpath):
with open(outpath, 'w', encoding='utf-8') as output_file:
for line in words:
output_file.write(line)
def findfrecuency(txt_path,keywords,outpath):
if os.path.exists(txt_path):
# 读取原始文本文件,按行读入lines
resultword = []
with open(txt_path, 'r', encoding='utf-8') as file:
lines = file.readlines()
for line in lines:
for keyword in keywords:
# 对于英文关键词,确保前后不是英文字符
if re.match(r'^[a-zA-Z]+$', keyword):
pattern = r'(?<![a-zA-Z])' + re.escape(keyword) + r'(?![a-zA-Z])'
if re.search(pattern, line, re.IGNORECASE):
keyword_count[keyword] += 1
resultword.append(line)
# 对于中文关键词,直接寻找
elif keyword in line:
keyword_count[keyword] += 1
resultword.append(line)
resultword1 = list(set(resultword))
writeintxt(resultword1,outpath) #去重
return keyword_count#返回字典
else:
print("文件不存在\n")
return {}
def writeinexcel (keyword_count,savepath):
if keyword_count == {}:
print("没有筛选到关键词\n")
else:
# 创建一个新的 Excel 工作簿
wb = Workbook()
ws = wb.active
ws.title = "AI应用弹幕统计"
ws.append(['AI应用', '出现数量'])
# 将关键词和计数写入 Excel 工作表
for keyword, count in sorted(keyword_count.items(), key=lambda item: item[1], reverse=True):
if count > 0:
ws.append([keyword, count])
# 保存 Excel 文件
wb.save(savepath)
if __name__ =='__main__':
# 定义文件路径
txt_path = 'd:\\学习\\软件工程\\swork\\res\\total300_3.txt'#总弹幕数据
keywords = ['VR', 'ai', '元宇宙', '超高清', '3D', 'gpt', '建模', '大模型', 'nlp', 'cv', 'openai', '智慧体育', '计算机', '虚拟','ai音效','ai视频','AI修复']
# 初始化一个字典来存储每个关键词的计数
keyword_count = {keyword: 0 for keyword in keywords}
#定义保存位置
savepath = "d:\\学习\\软件工程\\swork\\res\\AI_dm_rank4.xlsx"#存放排名个数excel
outpath = "d:\\学习\\软件工程\\swork\\res\\select_ai4.txt"#存放有含ai技术评论的评价txt
keyword_count = findfrecuency(txt_path,keywords,outpath) #得到排名字典
writeinexcel (keyword_count,savepath) #将排名写入excel