|  |  | @ -1,48 +1,66 @@ | 
			
		
	
		
		
			
				
					
					|  |  |  | import os |  |  |  | import os | 
			
				
				
			
		
	
		
		
			
				
					
					|  |  |  | import re |  |  |  | import re | 
			
				
				
			
		
	
		
		
			
				
					
					|  |  |  | import os |  |  |  | import os | 
			
				
				
			
		
	
		
		
			
				
					
					|  |  |  | import re |  |  |  | import re | 
			
				
				
			
		
	
		
		
			
				
					
					|  |  |  | from openpyxl import Workbook |  |  |  | from openpyxl import Workbook | 
			
				
				
			
		
	
		
		
			
				
					
					|  |  |  | 
 |  |  |  | 
 | 
			
				
				
			
		
	
		
		
			
				
					
					|  |  |  | # 定义文件路径 |  |  |  | 
 | 
			
				
				
			
		
	
		
		
			
				
					
					|  |  |  | test_path = 'd:\\学习\\软件工程\\swork\\res\\total300.txt' |  |  |  | def writeintxt (words,outpath): | 
			
				
				
			
		
	
		
		
			
				
					
					|  |  |  | keywords = ['VR', 'ai', '元宇宙', '超高清', '3D', 'gpt', '建模', '大模型', 'nlp', 'cv', 'openai', '智慧体育', '计算机', '虚拟','ai音效','ai视频','AI修复'] |  |  |  |     with open(outpath, 'w', encoding='utf-8') as output_file: | 
			
				
				
			
		
	
		
		
			
				
					
					|  |  |  | 
 |  |  |  |         for line in words: | 
			
				
				
			
		
	
		
		
			
				
					
					|  |  |  | # 初始化一个字典来存储每个关键词的计数 |  |  |  |             output_file.write(line) | 
			
				
				
			
		
	
		
		
			
				
					
					|  |  |  | keyword_count = {keyword: 0 for keyword in keywords} |  |  |  | 
 | 
			
				
				
			
		
	
		
		
			
				
					
					|  |  |  | 
 |  |  |  | 
 | 
			
				
				
			
		
	
		
		
			
				
					
					|  |  |  | # 检查文件是否存在 |  |  |  | def findfrecuency(txt_path,keywords,outpath): | 
			
				
				
			
		
	
		
		
			
				
					
					|  |  |  | if os.path.exists(test_path): |  |  |  |     if os.path.exists(txt_path): | 
			
				
				
			
		
	
		
		
			
				
					
					|  |  |  |     # 读取原始文本文件 |  |  |  |         # 读取原始文本文件,按行读入lines | 
			
				
				
			
		
	
		
		
			
				
					
					|  |  |  |     with open(test_path, 'r', encoding='utf-8') as file: |  |  |  |         resultword = [] | 
			
				
				
			
		
	
		
		
			
				
					
					|  |  |  |         lines = file.readlines() |  |  |  |         with open(txt_path, 'r', encoding='utf-8') as file: | 
			
				
				
			
		
	
		
		
			
				
					
					|  |  |  | 
 |  |  |  |             lines = file.readlines() | 
			
				
				
			
		
	
		
		
			
				
					
					|  |  |  |     # 检查每一行是否包含关键词,并统计数量 |  |  |  |         for line in lines: | 
			
				
				
			
		
	
		
		
			
				
					
					|  |  |  |     for line in lines: |  |  |  |             for keyword in keywords: | 
			
				
				
			
		
	
		
		
			
				
					
					|  |  |  |         for keyword in keywords: |  |  |  |                 # 对于英文关键词,确保前后不是英文字符 | 
			
				
				
			
		
	
		
		
			
				
					
					|  |  |  |             # 对于英文关键词,确保前后不是英文字符 |  |  |  |                 if re.match(r'^[a-zA-Z]+$', keyword): | 
			
				
				
			
		
	
		
		
			
				
					
					|  |  |  |             if re.match(r'^[a-zA-Z]+$', keyword): |  |  |  |                     pattern = r'(?<![a-zA-Z])' + re.escape(keyword) + r'(?![a-zA-Z])' | 
			
				
				
			
		
	
		
		
			
				
					
					|  |  |  |                 pattern = r'(?<![a-zA-Z])' + re.escape(keyword) + r'(?![a-zA-Z])' |  |  |  |                     if re.search(pattern, line, re.IGNORECASE): | 
			
				
				
			
		
	
		
		
			
				
					
					|  |  |  |                 if re.search(pattern, line, re.IGNORECASE): |  |  |  |                         keyword_count[keyword] += 1 | 
			
				
				
			
		
	
		
		
			
				
					
					|  |  |  |                     keyword_count[keyword] += 1 |  |  |  |                         resultword.append(line) | 
			
				
				
			
		
	
		
		
			
				
					
					|  |  |  |             # 对于中文关键词,直接寻找 |  |  |  |                 # 对于中文关键词,直接寻找 | 
			
				
				
			
		
	
		
		
			
				
					
					|  |  |  |             elif keyword in line: |  |  |  |                 elif keyword in line: | 
			
				
				
			
		
	
		
		
			
				
					
					|  |  |  |                 keyword_count[keyword] += 1 |  |  |  |                     keyword_count[keyword] += 1 | 
			
				
				
			
		
	
		
		
			
				
					
					|  |  |  | 
 |  |  |  |                     resultword.append(line) | 
			
				
				
			
		
	
		
		
			
				
					
					|  |  |  |     # 创建一个新的 Excel 工作簿 |  |  |  |         resultword1 = list(set(resultword)) | 
			
				
				
			
		
	
		
		
			
				
					
					|  |  |  |     wb = Workbook() |  |  |  |         writeintxt(resultword1,outpath) #去重 | 
			
				
				
			
		
	
		
		
			
				
					
					|  |  |  |     ws = wb.active |  |  |  |         return keyword_count#返回字典 | 
			
				
				
			
		
	
		
		
			
				
					
					|  |  |  |     ws.title = "AI应用弹幕统计" |  |  |  |     else: | 
			
				
				
			
		
	
		
		
			
				
					
					|  |  |  |     savepath = "d:\\学习\\软件工程\\swork\\res\\AI应用弹幕统计结果.xlsx" |  |  |  |         print("文件不存在\n") | 
			
				
				
			
		
	
		
		
			
				
					
					|  |  |  |     # 添加表头 |  |  |  |         return {} | 
			
				
				
			
		
	
		
		
			
				
					
					|  |  |  |     ws.append(['AI应用', '出现数量']) |  |  |  |      | 
			
				
				
			
		
	
		
		
			
				
					
					|  |  |  | 
 |  |  |  | def writeinexcel (keyword_count,savepath): | 
			
				
				
			
		
	
		
		
			
				
					
					|  |  |  |     # 将关键词和计数写入 Excel 工作表 |  |  |  |     if keyword_count == {}: | 
			
				
				
			
		
	
		
		
			
				
					
					|  |  |  |     for keyword, count in sorted(keyword_count.items(), key=lambda item: item[1], reverse=True): |  |  |  |         print("没有筛选到关键词\n") | 
			
				
				
			
		
	
		
		
			
				
					
					|  |  |  |         if count > 0: |  |  |  |     else: | 
			
				
				
			
		
	
		
		
			
				
					
					|  |  |  |             ws.append([keyword, count]) |  |  |  |         # 创建一个新的 Excel 工作簿 | 
			
				
				
			
		
	
		
		
			
				
					
					|  |  |  | 
 |  |  |  |         wb = Workbook() | 
			
				
				
			
		
	
		
		
			
				
					
					|  |  |  |     # 保存 Excel 文件 |  |  |  |         ws = wb.active | 
			
				
				
			
		
	
		
		
			
				
					
					|  |  |  |     wb.save(savepath) |  |  |  |         ws.title = "AI应用弹幕统计" | 
			
				
				
			
		
	
		
		
			
				
					
					|  |  |  | else: |  |  |  |         ws.append(['AI应用', '出现数量']) | 
			
				
				
			
		
	
		
		
			
				
					
					|  |  |  |     print("文件不存在\n") |  |  |  |         # 将关键词和计数写入 Excel 工作表 | 
			
				
				
			
		
	
		
		
	
		
		
	
		
		
	
		
		
	
		
		
	
		
		
	
		
		
	
		
		
	
		
		
	
		
		
	
		
		
	
		
		
	
		
		
	
		
		
	
		
		
	
		
		
	
		
		
	
		
		
	
		
		
	
		
		
	
		
		
	
		
		
	
		
		
	
		
		
	
		
		
	
		
		
	
		
		
	
		
		
	
		
		
	
		
		
	
		
		
	
		
		
	
		
		
	
		
		
	
		
		
	
		
		
	
		
		
	
		
		
	
		
		
	
		
		
	
		
		
	
		
		
	
		
		
	
		
		
	
		
		
	
		
		
	
		
		
	
		
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |         for keyword, count in sorted(keyword_count.items(), key=lambda item: item[1], reverse=True): | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |             if count > 0: | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |                 ws.append([keyword, count]) | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |         # 保存 Excel 文件 | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |         wb.save(savepath) | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  | 
 | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  | 
 | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  | if __name__ =='__main__': | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |     # 定义文件路径 | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |     txt_path = 'd:\\学习\\软件工程\\swork\\res\\total300_3.txt'#总弹幕数据 | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |     keywords = ['VR', 'ai', '元宇宙', '超高清', '3D', 'gpt', '建模', '大模型', 'nlp', 'cv', 'openai', '智慧体育', '计算机', '虚拟','ai音效','ai视频','AI修复'] | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |     # 初始化一个字典来存储每个关键词的计数 | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |     keyword_count = {keyword: 0 for keyword in keywords} | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |     #定义保存位置 | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |     savepath = "d:\\学习\\软件工程\\swork\\res\\AI_dm_rank4.xlsx"#存放排名,个数excel | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |     outpath = "d:\\学习\\软件工程\\swork\\res\\select_ai4.txt"#存放有含ai技术评论的评价txt | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |     keyword_count = findfrecuency(txt_path,keywords,outpath) #得到排名字典 | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |     writeinexcel (keyword_count,savepath) #将排名写入excel |