From 0c24d95d80d0887fc4f11ba18767799134eac890 Mon Sep 17 00:00:00 2001 From: pzb7h6yxf <1736289433@qq.com> Date: Tue, 17 Sep 2024 10:30:43 +0800 Subject: [PATCH] =?UTF-8?q?=E7=BB=9F=E8=AE=A1AI=E6=8A=80=E6=9C=AF=E5=BA=94?= =?UTF-8?q?=E7=94=A8=E6=96=B9=E9=9D=A2=E7=9A=84=E6=AF=8F=E7=A7=8D=E5=BC=B9?= =?UTF-8?q?=E5=B9=95=E6=95=B0=E9=87=8F=EF=BC=8C=E5=B9=B6=E8=BE=93=E5=87=BA?= =?UTF-8?q?=E6=95=B0=E9=87=8F=E6=8E=92=E5=90=8D=E5=89=8D8=E7=9A=84?= =?UTF-8?q?=E5=BC=B9=E5=B9=95=EF=BC=8C=E5=B9=B6=E5=B0=86=E7=BB=9F=E8=AE=A1?= =?UTF-8?q?=E7=9A=84=E6=95=B0=E6=8D=AE=E5=88=A9=E7=94=A8=E7=BC=96=E7=A8=8B?= =?UTF-8?q?=E5=B7=A5=E5=85=B7=E8=87=AA=E5=8A=A8=E5=86=99=E5=85=A5Excel?= =?UTF-8?q?=E8=A1=A8=E4=B8=AD=E3=80=82?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- 2.2.py | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) create mode 100644 2.2.py diff --git a/2.2.py b/2.2.py new file mode 100644 index 0000000..433e01d --- /dev/null +++ b/2.2.py @@ -0,0 +1,50 @@ +import pandas as pd +from collections import Counter + +# 读取TXT文件并统计AI相关关键词 +def count_ai_keywords(file_path, output_excel): + # 定义与AI技术相关的关键词列表 + ai_keywords = [ + "AI", "人工智能", "Machine learning", "机器学习", "Deep learning", "深度学习", + "Neural network", "神经网络", "自然语言处理", "Natural language processing", + "计算机视觉", "Computer vision", "Robotics", "机器人", "自动化", "Automation", + "人脸识别", "Face recognition", "大数据", "数据挖掘", "智能系统", "自动驾驶", "无人驾驶" + ] + + # 用来存储统计结果的Counter + keyword_count = Counter() + keyword_danmakus = {keyword: [] for keyword in ai_keywords} # 存储含有每个关键词的弹幕 + + # 读取文件 + with open(file_path, 'r', encoding='utf-8') as file: + for line in file: + # 遍历每个关键词,统计弹幕中包含关键词的数量,并记录弹幕 + for keyword in ai_keywords: + if keyword.lower() in line.lower(): # 统计关键词忽略大小写 + keyword_count[keyword] += 1 + keyword_danmakus[keyword].append(line.strip()) # 将弹幕加入对应关键词列表 + + # 获取排名前8的关键词 + top_keywords = [keyword for keyword, _ in keyword_count.most_common(8)] + + print("AI 技术相关的前8条弹幕关键词统计:") + for keyword, count in keyword_count.most_common(8): + print(f"{keyword}: {count} 条弹幕") + + # 创建一个DataFrame,将前8名关键词的弹幕按列存储 + df_dict = {} + for keyword in top_keywords: + df_dict[keyword] = keyword_danmakus[keyword] + + # 将弹幕写入Excel,每个关键词作为一列 + df = pd.DataFrame(dict([(k, pd.Series(v)) for k, v in df_dict.items()])) + df.to_excel(output_excel, index=False) + + print(f"弹幕已保存至 {output_excel}") + +# 文件路径 +file_path = "danmakus_2024_olympics.txt" +output_excel = "top_ai_danmakus.xlsx" + +# 调用函数并统计并保存至Excel +count_ai_keywords(file_path, output_excel)