print

2 years ago · 0e55cabe5c
parent 856fdcc1e1
commit 0e55cabe5c
8 changed files with 58 additions and 10 deletions
--- a/一盘大棋/A01.py
+++ b/一盘大棋/A01.py
@ -1,5 +1,5 @@
 import string
-from cppy.cp_util import *
+from cppy.cp_util import stopwordfilepath,testfilepath
 # 准备词和停用词表
 word_freqs = []
--- a/一盘大棋/A02.py
+++ b/一盘大棋/A02.py
@ -1,4 +1,5 @@
-from cppy.cp_util import *
+from cppy.cp_util import stopwordfilepath,testfilepath
 import string
 from collections import Counter
 # 准备词和停用词表
--- a/一盘大棋/A03.py
+++ b/一盘大棋/A03.py
@ -1,11 +1,10 @@
-import re, sys, collections
+import re, collections
-from cppy.cp_util import *
+from cppy.cp_util import stopwordfilepath,testfilepath
 stopwords = set(open( stopwordfilepath,encoding = 'utf8' ).read().split(','))
 words = re.findall('[a-z]{2,}', open( testfilepath,encoding = 'utf8').read().lower())
 counts = collections.Counter(w for w in words if w not in stopwords)
-for (w, c) in counts.most_common(10):
+for (w, c) in counts.most_common(10) :  print(w, '-', c)
    print(w, '-', c)
 '''
 熟练的软件工程师，会如此简单完成任务
--- a/交互/终端/终端命令行/command_line.py
+++ b/交互/终端/终端命令行/command_line.py
@ -0,0 +1,48 @@
 import re
 from collections import Counter
 # 清洗文本，移除标点符号并转换为小写
 def clean_text(text):
    return re.sub(r'[^\w\s]', '', text).lower()
 # 统计词频
 def count_frequencies(text):
    return Counter(word for word in clean_text(text).split())
 # 交互式提示用户输入文件路径和前n个单词的数量
 def interactive_mode():
    file_path = input("请输入文件路径: ")
    try:
        n = int(input("请输入你想要输出的前n个最常见单词的数量: "))
        if n <= 0:
            raise ValueError("数量必须大于0。")
    except ValueError as e:
        print(f"输入错误：{e}")
        return
    try:
        # 打开文件并读取内容
        with open(file_path, 'r', encoding='utf-8') as file:
            text = file.read()
        # 统计词频
        frequencies = count_frequencies(text)
        # 获取前n个最常见的单词
        most_common = frequencies.most_common(n)
        # 输出结果
        for word, freq in most_common:
            print(f"{word}: {freq}")
    except FileNotFoundError:
        print(f"文件未找到: {file_path}")
    except Exception as e:
        print(f"发生错误: {e}")
 # 主函数
 def main():
    print("欢迎使用词频统计工具。")
    interactive_mode()
 if __name__ == "__main__":
    main()
--- a/对象化/A22.py
+++ b/对象化/A22.py
@ -1,7 +1,7 @@
 from cppy.cp_util import *
 def extract_words(obj, path_to_file):    
-    obj['data'] = re_split( read_file(path_to_file) )
+    obj['data'] = extract_file_words(path_to_file)
 def load_stop_words(obj):    
    obj['stop_words'] = get_stopwords()
--- a/工程化/对象接口/tf-14A.py
+++ b/工程化/对象接口/tf-14A.py
--- a/工程化/对象接口/tf-14B.py
+++ b/工程化/对象接口/tf-14B.py
--- a/计算设备/缓存/84.py
+++ b/计算设备/缓存/84.py
@ -23,9 +23,9 @@ top_10_words = calculate_word_frequency(testfilepath)
 print_word_freqs(top_10_words)
 '''
-python 提供了一种缓存调用函数的机制
+Python 提供了一个缓存调用函数的装饰器
-import functools  
+import functools
-  
+
 # 使用 functools.lru_cache 缓存结果  
@functools.lru_cache(maxsize=None)  
 def calculate_word_frequency(file_path):