print

1 year ago · 0e55cabe5c
parent 856fdcc1e1
commit 0e55cabe5c
8 changed files with 58 additions and 10 deletions
--- a/一盘大棋/A01.py
+++ b/一盘大棋/A01.py
@ -1,5 +1,5 @@
 import string
-from cppy.cp_util import *
+from cppy.cp_util import stopwordfilepath,testfilepath

 # 准备词和停用词表
 word_freqs = []
--- a/一盘大棋/A02.py
+++ b/一盘大棋/A02.py
@ -1,4 +1,5 @@
-from cppy.cp_util import *
+from cppy.cp_util import stopwordfilepath,testfilepath
+import string
 from collections import Counter

 # 准备词和停用词表
--- a/一盘大棋/A03.py
+++ b/一盘大棋/A03.py
@ -1,11 +1,10 @@
-import re, sys, collections
-from cppy.cp_util import *
+import re, collections
+from cppy.cp_util import stopwordfilepath,testfilepath

 stopwords = set(open( stopwordfilepath,encoding = 'utf8' ).read().split(','))
 words = re.findall('[a-z]{2,}', open( testfilepath,encoding = 'utf8').read().lower())
 counts = collections.Counter(w for w in words if w not in stopwords)
-for (w, c) in counts.most_common(10):
-    print(w, '-', c)
+for (w, c) in counts.most_common(10) :  print(w, '-', c)

 '''
 熟练的软件工程师，会如此简单完成任务
--- a/交互/终端/终端命令行/command_line.py
+++ b/交互/终端/终端命令行/command_line.py
@ -0,0 +1,48 @@
+import re
+from collections import Counter
+
+# 清洗文本，移除标点符号并转换为小写
+def clean_text(text):
+    return re.sub(r'[^\w\s]', '', text).lower()
+
+# 统计词频
+def count_frequencies(text):
+    return Counter(word for word in clean_text(text).split())
+
+# 交互式提示用户输入文件路径和前n个单词的数量
+def interactive_mode():
+    file_path = input("请输入文件路径: ")
+    try:
+        n = int(input("请输入你想要输出的前n个最常见单词的数量: "))
+        if n <= 0:
+            raise ValueError("数量必须大于0。")
+    except ValueError as e:
+        print(f"输入错误：{e}")
+        return
+
+    try:
+        # 打开文件并读取内容
+        with open(file_path, 'r', encoding='utf-8') as file:
+            text = file.read()
+
+        # 统计词频
+        frequencies = count_frequencies(text)
+
+        # 获取前n个最常见的单词
+        most_common = frequencies.most_common(n)
+
+        # 输出结果
+        for word, freq in most_common:
+            print(f"{word}: {freq}")
+    except FileNotFoundError:
+        print(f"文件未找到: {file_path}")
+    except Exception as e:
+        print(f"发生错误: {e}")
+
+# 主函数
+def main():
+    print("欢迎使用词频统计工具。")
+    interactive_mode()
+
+if __name__ == "__main__":
+    main()
--- a/对象化/A22.py
+++ b/对象化/A22.py
@ -1,7 +1,7 @@
 from cppy.cp_util import *

 def extract_words(obj, path_to_file):    
-    obj['data'] = re_split( read_file(path_to_file) )
+    obj['data'] = extract_file_words(path_to_file)

 def load_stop_words(obj):    
    obj['stop_words'] = get_stopwords()
--- a/工程化/对象接口/tf-14A.py
+++ b/工程化/对象接口/tf-14A.py
--- a/工程化/对象接口/tf-14B.py
+++ b/工程化/对象接口/tf-14B.py
--- a/计算设备/缓存/84.py
+++ b/计算设备/缓存/84.py
@ -23,9 +23,9 @@ top_10_words = calculate_word_frequency(testfilepath)
 print_word_freqs(top_10_words)

 '''
-python 提供了一种缓存调用函数的机制
-import functools  
-  
+Python 提供了一个缓存调用函数的装饰器
+import functools
+
 # 使用 functools.lru_cache 缓存结果  
@functools.lru_cache(maxsize=None)  
 def calculate_word_frequency(file_path):