import re
from collections import Counter

# 清洗文本，移除标点符号并转换为小写
def clean_text(text):
    return re.sub(r'[^\w\s]', '', text).lower()

# 统计词频
def count_frequencies(text):
    return Counter(word for word in clean_text(text).split())

# 交互式提示用户输入文件路径和前n个单词的数量
def interactive_mode():
    file_path = input("请输入文件路径 >> ")
    try:
        n = int(input("请输入你想要输出的前n个最常见单词的数量 >> "))
        if n <= 0:
            raise ValueError("数量必须大于0。")
    except ValueError as e:
        print(f"输入错误：{e}")
        return

    try:
        # 打开文件并读取内容
        with open(file_path, 'r', encoding='utf-8') as file:
            text = file.read()

        # 统计词频
        frequencies = count_frequencies(text)

        # 获取前n个最常见的单词
        most_common = frequencies.most_common(n)

        # 输出结果
        for word, freq in most_common:
            print(f"{word}: {freq}")
    except FileNotFoundError:
        print(f"文件未找到: {file_path}")
    except Exception as e:
        print(f"发生错误: {e}")

# 主函数
def main():
    print("欢迎使用词频统计工具。")
    interactive_mode()

if __name__ == "__main__":
    main()