From a0b2971f40ed8fac577848354b553f531dfff712 Mon Sep 17 00:00:00 2001 From: pioc37juv <1245880206@qq.com> Date: Wed, 18 Sep 2024 19:49:55 +0800 Subject: [PATCH] =?UTF-8?q?Delete=20'=E6=95=B0=E6=8D=AE=E5=88=86=E6=9E=90.?= =?UTF-8?q?py'?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- 数据分析.py | 42 ------------------------------------------ 1 file changed, 42 deletions(-) delete mode 100644 数据分析.py diff --git a/数据分析.py b/数据分析.py deleted file mode 100644 index 643ed36..0000000 --- a/数据分析.py +++ /dev/null @@ -1,42 +0,0 @@ -import pandas as pd -import jieba -from collections import Counter - - -def read_excel_and_count_words(file_path, sheet_name='Sheet1', column_name='内容'): - """读取指定的 Excel 文件并进行词频统计""" - # 1. 读取 Excel 文件的指定表格和列 - df = pd.read_excel(file_path, sheet_name=sheet_name) - - if column_name not in df.columns: - print(f"列名 '{column_name}' 在 Excel 文件中未找到。") - return - - # 2. 获取指定列的文本内容 - report = ' '.join(df[column_name].astype(str).tolist()) # 将所有行合并为一个字符串 - - # 3. 进行分词 - words = jieba.cut(report) - - # 4. 按指定长度提取词 - report_words = [word for word in words if len(word) >= 3] - - # 5. 统计高频词汇 - result = Counter(report_words).most_common(50) - - # 6. 输出结果 - print("高频词汇统计结果:") - for word, count in result: - print(f"{word}: {count}") - - # 7. 保存高频词及其频率至高频词.xlsx - result_df = pd.DataFrame(result, columns=['词汇', '频率']) # 创建 DataFrame - result_df.to_excel('高频词.xlsx', index=False, sheet_name='高频词汇') # 保存到 Excel 文件 - - -if __name__ == '__main__': - file_path = '合并弹幕.xlsx' # 替换为你的 Excel 文件路径 - sheet_name = 'MergedData' # 替换为你需要的工作表名称 - column_name = '弹幕文本' # 替换为你的文本内容所在的列名 - - read_excel_and_count_words(file_path, sheet_name, column_name)