forked from p46318075/CodePattern
parent
856fdcc1e1
commit
0e55cabe5c
@ -0,0 +1,48 @@
|
||||
import re
|
||||
from collections import Counter
|
||||
|
||||
# 清洗文本,移除标点符号并转换为小写
|
||||
def clean_text(text):
|
||||
return re.sub(r'[^\w\s]', '', text).lower()
|
||||
|
||||
# 统计词频
|
||||
def count_frequencies(text):
|
||||
return Counter(word for word in clean_text(text).split())
|
||||
|
||||
# 交互式提示用户输入文件路径和前n个单词的数量
|
||||
def interactive_mode():
|
||||
file_path = input("请输入文件路径: ")
|
||||
try:
|
||||
n = int(input("请输入你想要输出的前n个最常见单词的数量: "))
|
||||
if n <= 0:
|
||||
raise ValueError("数量必须大于0。")
|
||||
except ValueError as e:
|
||||
print(f"输入错误:{e}")
|
||||
return
|
||||
|
||||
try:
|
||||
# 打开文件并读取内容
|
||||
with open(file_path, 'r', encoding='utf-8') as file:
|
||||
text = file.read()
|
||||
|
||||
# 统计词频
|
||||
frequencies = count_frequencies(text)
|
||||
|
||||
# 获取前n个最常见的单词
|
||||
most_common = frequencies.most_common(n)
|
||||
|
||||
# 输出结果
|
||||
for word, freq in most_common:
|
||||
print(f"{word}: {freq}")
|
||||
except FileNotFoundError:
|
||||
print(f"文件未找到: {file_path}")
|
||||
except Exception as e:
|
||||
print(f"发生错误: {e}")
|
||||
|
||||
# 主函数
|
||||
def main():
|
||||
print("欢迎使用词频统计工具。")
|
||||
interactive_mode()
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
Loading…
Reference in new issue