forked from p46318075/CodePattern
				
			
							parent
							
								
									856fdcc1e1
								
							
						
					
					
						commit
						0e55cabe5c
					
				| @ -0,0 +1,48 @@ | ||||
| import re | ||||
| from collections import Counter | ||||
| 
 | ||||
| # 清洗文本,移除标点符号并转换为小写 | ||||
| def clean_text(text): | ||||
|     return re.sub(r'[^\w\s]', '', text).lower() | ||||
| 
 | ||||
| # 统计词频 | ||||
| def count_frequencies(text): | ||||
|     return Counter(word for word in clean_text(text).split()) | ||||
| 
 | ||||
| # 交互式提示用户输入文件路径和前n个单词的数量 | ||||
| def interactive_mode(): | ||||
|     file_path = input("请输入文件路径: ") | ||||
|     try: | ||||
|         n = int(input("请输入你想要输出的前n个最常见单词的数量: ")) | ||||
|         if n <= 0: | ||||
|             raise ValueError("数量必须大于0。") | ||||
|     except ValueError as e: | ||||
|         print(f"输入错误:{e}") | ||||
|         return | ||||
| 
 | ||||
|     try: | ||||
|         # 打开文件并读取内容 | ||||
|         with open(file_path, 'r', encoding='utf-8') as file: | ||||
|             text = file.read() | ||||
| 
 | ||||
|         # 统计词频 | ||||
|         frequencies = count_frequencies(text) | ||||
| 
 | ||||
|         # 获取前n个最常见的单词 | ||||
|         most_common = frequencies.most_common(n) | ||||
| 
 | ||||
|         # 输出结果 | ||||
|         for word, freq in most_common: | ||||
|             print(f"{word}: {freq}") | ||||
|     except FileNotFoundError: | ||||
|         print(f"文件未找到: {file_path}") | ||||
|     except Exception as e: | ||||
|         print(f"发生错误: {e}") | ||||
| 
 | ||||
| # 主函数 | ||||
| def main(): | ||||
|     print("欢迎使用词频统计工具。") | ||||
|     interactive_mode() | ||||
| 
 | ||||
| if __name__ == "__main__": | ||||
|     main() | ||||
					Loading…
					
					
				
		Reference in new issue