import re from collections import Counter # 清洗文本,移除标点符号并转换为小写 def clean_text(text): return re.sub(r'[^\w\s]', '', text).lower() # 统计词频 def count_frequencies(text): return Counter(word for word in clean_text(text).split()) # 交互式提示用户输入文件路径和前n个单词的数量 def interactive_mode(): file_path = input("请输入文件路径 >> ") try: n = int(input("请输入你想要输出的前n个最常见单词的数量 >> ")) if n <= 0: raise ValueError("数量必须大于0。") except ValueError as e: print(f"输入错误:{e}") return try: # 打开文件并读取内容 with open(file_path, 'r', encoding='utf-8') as file: text = file.read() # 统计词频 frequencies = count_frequencies(text) # 获取前n个最常见的单词 most_common = frequencies.most_common(n) # 输出结果 for word, freq in most_common: print(f"{word}: {freq}") except FileNotFoundError: print(f"文件未找到: {file_path}") except Exception as e: print(f"发生错误: {e}") # 主函数 def main(): print("欢迎使用词频统计工具。") interactive_mode() if __name__ == "__main__": main()