Delete '数据分析.py'

11 months ago · a0b2971f40
parent 1d86f92d6c
commit a0b2971f40
1 changed files with 0 additions and 42 deletions
--- a/数据分析.py
+++ b/数据分析.py
@ -1,42 +0,0 @@
-import pandas as pd
-import jieba
-from collections import Counter
-
-
-def read_excel_and_count_words(file_path, sheet_name='Sheet1', column_name='内容'):
-    """读取指定的 Excel 文件并进行词频统计"""
-    # 1. 读取 Excel 文件的指定表格和列
-    df = pd.read_excel(file_path, sheet_name=sheet_name)
-
-    if column_name not in df.columns:
-        print(f"列名 '{column_name}' 在 Excel 文件中未找到。")
-        return
-
-    # 2. 获取指定列的文本内容
-    report = ' '.join(df[column_name].astype(str).tolist())  # 将所有行合并为一个字符串
-
-    # 3. 进行分词
-    words = jieba.cut(report)
-
-    # 4. 按指定长度提取词
-    report_words = [word for word in words if len(word) >= 3]
-
-    # 5. 统计高频词汇
-    result = Counter(report_words).most_common(50)
-
-    # 6. 输出结果
-    print("高频词汇统计结果：")
-    for word, count in result:
-        print(f"{word}: {count}")
-
-    # 7. 保存高频词及其频率至高频词.xlsx
-    result_df = pd.DataFrame(result, columns=['词汇', '频率'])  # 创建 DataFrame
-    result_df.to_excel('高频词.xlsx', index=False, sheet_name='高频词汇')  # 保存到 Excel 文件
-
-
-if __name__ == '__main__':
-    file_path = '合并弹幕.xlsx'  # 替换为你的 Excel 文件路径
-    sheet_name = 'MergedData'     # 替换为你需要的工作表名称
-    column_name = '弹幕文本'       # 替换为你的文本内容所在的列名
-
-    read_excel_and_count_words(file_path, sheet_name, column_name)