|
|
import os
|
|
|
import re
|
|
|
import os
|
|
|
import re
|
|
|
from openpyxl import Workbook
|
|
|
|
|
|
|
|
|
def writeintxt (words,outpath):
|
|
|
with open(outpath, 'w', encoding='utf-8') as output_file:
|
|
|
for line in words:
|
|
|
output_file.write(line)
|
|
|
|
|
|
|
|
|
def findfrecuency(txt_path,keywords,outpath):
|
|
|
if os.path.exists(txt_path):
|
|
|
# 读取原始文本文件,按行读入lines
|
|
|
resultword = []
|
|
|
with open(txt_path, 'r', encoding='utf-8') as file:
|
|
|
lines = file.readlines()
|
|
|
for line in lines:
|
|
|
for keyword in keywords:
|
|
|
# 对于英文关键词,确保前后不是英文字符
|
|
|
if re.match(r'^[a-zA-Z]+$', keyword):
|
|
|
pattern = r'(?<![a-zA-Z])' + re.escape(keyword) + r'(?![a-zA-Z])'
|
|
|
if re.search(pattern, line, re.IGNORECASE):
|
|
|
keyword_count[keyword] += 1
|
|
|
resultword.append(line)
|
|
|
# 对于中文关键词,直接寻找
|
|
|
elif keyword in line:
|
|
|
keyword_count[keyword] += 1
|
|
|
resultword.append(line)
|
|
|
resultword1 = list(set(resultword))
|
|
|
writeintxt(resultword1,outpath) #去重
|
|
|
return keyword_count#返回字典
|
|
|
else:
|
|
|
print("文件不存在\n")
|
|
|
return {}
|
|
|
|
|
|
def writeinexcel (keyword_count,savepath):
|
|
|
if keyword_count == {}:
|
|
|
print("没有筛选到关键词\n")
|
|
|
else:
|
|
|
# 创建一个新的 Excel 工作簿
|
|
|
wb = Workbook()
|
|
|
ws = wb.active
|
|
|
ws.title = "AI应用弹幕统计"
|
|
|
ws.append(['AI应用', '出现数量'])
|
|
|
# 将关键词和计数写入 Excel 工作表
|
|
|
for keyword, count in sorted(keyword_count.items(), key=lambda item: item[1], reverse=True):
|
|
|
if count > 0:
|
|
|
ws.append([keyword, count])
|
|
|
# 保存 Excel 文件
|
|
|
wb.save(savepath)
|
|
|
|
|
|
|
|
|
if __name__ =='__main__':
|
|
|
# 定义文件路径
|
|
|
txt_path = 'd:\\学习\\软件工程\\swork\\res\\total300_3.txt'#总弹幕数据
|
|
|
keywords = ['VR', 'ai', '元宇宙', '超高清', '3D', 'gpt', '建模', '大模型', 'nlp', 'cv', 'openai', '智慧体育', '计算机', '虚拟','ai音效','ai视频','AI修复']
|
|
|
# 初始化一个字典来存储每个关键词的计数
|
|
|
keyword_count = {keyword: 0 for keyword in keywords}
|
|
|
#定义保存位置
|
|
|
savepath = "d:\\学习\\软件工程\\swork\\res\\AI_dm_rank4.xlsx"#存放排名,个数excel
|
|
|
outpath = "d:\\学习\\软件工程\\swork\\res\\select_ai4.txt"#存放有含ai技术评论的评价txt
|
|
|
keyword_count = findfrecuency(txt_path,keywords,outpath) #得到排名字典
|
|
|
writeinexcel (keyword_count,savepath) #将排名写入excel |