parent
							
								
									1bc34bff47
								
							
						
					
					
						commit
						685ee40a1e
					
				| @ -0,0 +1,35 @@ | ||||
| import pandas as pd | ||||
| from openpyxl import Workbook | ||||
| import re | ||||
| 
 | ||||
| def normalize_bullet_comment(commet): | ||||
|     #归一化类似的弹幕 | ||||
|     if re.search(r'哈{2,}',commet): | ||||
|         return '哈哈哈' | ||||
|     return commet | ||||
| 
 | ||||
| def is_airelated(comment): | ||||
|     ai_keywords = ['ai','神经网络','机器学习','AI','人工智能','深度学习'] | ||||
|     return any(keyword in comment for keyword in ai_keywords) | ||||
| 
 | ||||
| #读取弹幕 | ||||
| with open('弹幕.txt','r',encoding='utf-8') as file: | ||||
|     bullet_comments = file.readlines() | ||||
| 
 | ||||
| #处理弹幕 | ||||
| bullet_comments = [normalize_bullet_comment(line.strip()) for line in bullet_comments] | ||||
| 
 | ||||
| #统计包含关键词的弹幕 | ||||
| ai_related_comments = [comment for comment in bullet_comments if is_airelated(comment)] | ||||
| count_series = pd.Series(ai_related_comments).value_counts() | ||||
| top_8 = count_series.head(8) | ||||
| 
 | ||||
| #创建DataFrame并保存到Excel | ||||
| df = pd.DataFrame({ | ||||
|     '弹幕':top_8.index, | ||||
|     '数量':top_8.values | ||||
| }) | ||||
| 
 | ||||
| df.to_excel('aiTop8.xlsx',index=False,sheet_name='Top 8 Bullet Comments') | ||||
| 
 | ||||
| 
 | ||||
					Loading…
					
					
				
		Reference in new issue