From 3c439ef8d7e1b3024f598995e4795023a33cac39 Mon Sep 17 00:00:00 2001
From: zj3D <flysmart.ww@qq.com>
Date: Sat, 16 Mar 2024 13:59:29 +0800
Subject: [PATCH] update

---
 交互/Agent/80.py                            | 10 +-
 .../{享元模式 => 共享对象}/tf-38.py   | 98 +++++++++----------
 .../函数缓存}/84.py                       |  0
 .../map-reduce}/tf_91.py                      | 42 +++++---
 计算设备/map-reduce/tf_92.py              | 21 ++--
 5 files changed, 90 insertions(+), 81 deletions(-)
 rename 基本结构/{享元模式 => 共享对象}/tf-38.py (97%)
 rename {计算设备/缓存 => 基本结构/函数缓存}/84.py (100%)
 rename {语言特性/异步 => 计算设备/map-reduce}/tf_91.py (50%)

diff --git a/交互/Agent/80.py b/交互/Agent/80.py
index ab6d22a..80ae836 100644
--- a/交互/Agent/80.py
+++ b/交互/Agent/80.py
@@ -27,12 +27,7 @@ def compute_all_word_frequencies(agents):
         future_to_agent = {executor.submit(agent.compute_word_frequency): agent for agent in agents}  
         for future in concurrent.futures.as_completed(future_to_agent):  
             agent = future_to_agent[future]  
-            try:  
-                # 获取计算结果，但不处理异常  
-                data = future.result()  
-            except Exception as exc:  
-                print(f'生成 {agent.text_chunk[:10]}... 的词频时出错: {exc}')  
-            # 词频已经被保存在agent中
+            data = future.result()   # 词频被保存在agent中
 
 
 # 所有Agent计算完成后，合并它们的词频结果
@@ -48,5 +43,4 @@ if __name__ == '__main__':
     agents = create_agents(words)  # 创建代理
     compute_all_word_frequencies(agents)  # 计算
     merged_word_freq = merge_word_frequencies(agents)   # 合并结果    
-    for (w, c) in merged_word_freq.most_common(10):  # 排序输出
-        print(w, '-', c)
\ No newline at end of file
+    util.print_word_freqs(merged_word_freq.most_common(10))  # 排序输出    
\ No newline at end of file
diff --git a/基本结构/享元模式/tf-38.py b/基本结构/共享对象/tf-38.py
similarity index 97%
rename from 基本结构/享元模式/tf-38.py
rename to 基本结构/共享对象/tf-38.py
index 79b8c7d..359d4b7 100644
--- a/基本结构/享元模式/tf-38.py
+++ b/基本结构/共享对象/tf-38.py
@@ -1,50 +1,50 @@
-'''
-享元模式中，对象被设计为可共享的，被多个上下文使用，而不必在每个上下文中都创建新的对象。
-如果我们有大量不同的词频分析需求，有时需要词频前10的单词，有时需要词频前20的单词，有时还需要限定词汇的长度，那就需要创建多个词频统计器，每个词频统
-计器都独立创建并存储其内部状态，在这种情况下，享元模式共享相同类型的词频统计器对象，只需创建一个共享实例，然后通过设置不同的参数个性化每个对象，通过共享相同的内部状态，降低了对象的创建和内存占用成本。
-例如，我需要对3个文件获取词频前十的单词，对另外3个文件获取词频前二十的单词，那么我只需要创建2个词频统计器对象，每个对象存储相同的内部状态，一个对象
-获取前十的单词，一个对象获取前二十的单词，而不用创建6个对象
-'''
-
-from cppy.cp_util import *
-
-
-#定义享元接口
-class WordFrequencyController():
-    def print_word_freqs(self,number):
-        pass
-
-#定义具体的享元类
-class ConcreteWordFrequencyController(WordFrequencyController):
-    def __init__(self, controllertype,filepath):
-        self.word_list = extract_file_words(filepath)
-        self.word_freq = get_frequencies(self.word_list)
-        self.word_freq = sort_dict(self.word_freq)
-    def print_word_freqs(self, number):
-        print_word_freqs( self.word_freq,number)        
-
-#定义享元工厂
-class WordFrequencyControllerFactory():
-    def __init__(self):
-        self.types = {}
-
-    def get_WordFrequencyController(self, controller_type,testfilepath):
-        if controller_type not in self.types:
-            self.types[controller_type] = ConcreteWordFrequencyController(controller_type,testfilepath)
-            #创建新的享元对象        
-        return self.types[controller_type]#重复使用已存在的享元对象
-
-def process_command(factory: WordFrequencyControllerFactory, number: str):
-    controller_type = number
-    WordFrequencyController = factory.get_WordFrequencyController(controller_type,testfilepath)
-    WordFrequencyController.print_word_freqs(int(number))
-
-
-if __name__ == "__main__":
-    factory = WordFrequencyControllerFactory()
-    while True:
-        try:
-            number = input("请输入需要显示词频前几的单词: ")
-            process_command(factory, number)
-        except EOFError:
+'''
+享元模式中，对象被设计为可共享的，被多个上下文使用，而不必在每个上下文中都创建新的对象。
+如果我们有大量不同的词频分析需求，有时需要词频前10的单词，有时需要词频前20的单词，有时还需要限定词汇的长度，那就需要创建多个词频统计器，每个词频统
+计器都独立创建并存储其内部状态，在这种情况下，享元模式共享相同类型的词频统计器对象，只需创建一个共享实例，然后通过设置不同的参数个性化每个对象，通过共享相同的内部状态，降低了对象的创建和内存占用成本。
+例如，我需要对3个文件获取词频前十的单词，对另外3个文件获取词频前二十的单词，那么我只需要创建2个词频统计器对象，每个对象存储相同的内部状态，一个对象
+获取前十的单词，一个对象获取前二十的单词，而不用创建6个对象
+'''
+
+from cppy.cp_util import *
+
+
+#定义享元接口
+class WordFrequencyController():
+    def print_word_freqs(self,number):
+        pass
+
+#定义具体的享元类
+class ConcreteWordFrequencyController(WordFrequencyController):
+    def __init__(self, controllertype,filepath):
+        self.word_list = extract_file_words(filepath)
+        self.word_freq = get_frequencies(self.word_list)
+        self.word_freq = sort_dict(self.word_freq)
+    def print_word_freqs(self, number):
+        print_word_freqs( self.word_freq,number)        
+
+#定义享元工厂
+class WordFrequencyControllerFactory():
+    def __init__(self):
+        self.types = {}
+
+    def get_WordFrequencyController(self, controller_type,testfilepath):
+        if controller_type not in self.types:
+            self.types[controller_type] = ConcreteWordFrequencyController(controller_type,testfilepath)
+            #创建新的享元对象        
+        return self.types[controller_type]#重复使用已存在的享元对象
+
+def process_command(factory: WordFrequencyControllerFactory, number: str):
+    controller_type = number
+    WordFrequencyController = factory.get_WordFrequencyController(controller_type,testfilepath)
+    WordFrequencyController.print_word_freqs(int(number))
+
+
+if __name__ == "__main__":
+    factory = WordFrequencyControllerFactory()
+    while True:
+        try:
+            number = input("请输入需要显示词频前几的单词: ")
+            process_command(factory, number)
+        except EOFError:
             break
\ No newline at end of file
diff --git a/计算设备/缓存/84.py b/基本结构/函数缓存/84.py
similarity index 100%
rename from 计算设备/缓存/84.py
rename to 基本结构/函数缓存/84.py
diff --git a/语言特性/异步/tf_91.py b/计算设备/map-reduce/tf_91.py
similarity index 50%
rename from 语言特性/异步/tf_91.py
rename to 计算设备/map-reduce/tf_91.py
index f0a0475..df5add8 100644
--- a/语言特性/异步/tf_91.py
+++ b/计算设备/map-reduce/tf_91.py
@@ -2,16 +2,23 @@ import threading
 from collections import Counter
 from cppy.cp_util import *
 
-stop_words = get_stopwords()  
-
-# 定义一个函数来计算每个线程的词频
-def count_words(start, end, text, result_index, results):    
-    words = re_split( text[start:end] )
-    words = [w for w in words if not w in stop_words]    
-    result = Counter(words)
-    results[result_index] = result
-
-if __name__ == '__main__':
+#
+# 多线程
+# 
+def process_chunk(start, end, text, result_index, results):   
+    # 切词并过滤停用词            
+    words = extract_str_words( text[start:end] )     
+    results[result_index] = Counter(words)
+
+def merge_counts(counts_list):  
+    # 合并多个Counter对象  
+    total_counts = Counter()  
+    for counts in counts_list:  
+        total_counts += counts  
+    return total_counts  
+
+@timing_decorator
+def main():  
     # 读取文件内容
     text = read_file(testfilepath)    
 
@@ -29,16 +36,19 @@ if __name__ == '__main__':
         start = i * chunk_size
         # 确保最后一个线程能够读取文件的末尾
         end = text_length if i == num_threads - 1 else (i + 1) * chunk_size
-        t = threading.Thread(target=count_words, args=(start, end, text, i, results))
+        t = threading.Thread(target=process_chunk, args=(start, end, text, i, results))
         threads.append(t)
         t.start()
 
     # 等待所有线程完成
     for t in threads: t.join()
 
-    # 合并结果
-    total_count = Counter()
-    for result in results:  total_count += result
+    # 合并计数  
+    total_counts = merge_counts(results)  
 
-    # 打印词频最高的10个单词    
-    print_word_freqs( total_count.most_common(10) )     
\ No newline at end of file
+    # 输出最高频的n个词
+    print_word_freqs( total_counts.most_common(10) )     
+
+    
+if __name__ == '__main__':
+    main()
diff --git a/计算设备/map-reduce/tf_92.py b/计算设备/map-reduce/tf_92.py
index e9a63a8..525181e 100644
--- a/计算设备/map-reduce/tf_92.py
+++ b/计算设备/map-reduce/tf_92.py
@@ -1,10 +1,10 @@
-import re  
 import multiprocessing  
 from collections import Counter
 from cppy.cp_util import *  
 
-stopwords = get_stopwords()
-
+#
+# 多进程
+#
 def process_chunk(chunk):  
     # 切词并过滤停用词       
     words = extract_str_words( chunk.lower() )
@@ -16,8 +16,9 @@ def merge_counts(counts_list):
     for counts in counts_list:  
         total_counts += counts  
     return total_counts    
-  
-if __name__ == '__main__':  
+
+@timing_decorator
+def main():  
     # 读取文件内容  
     content = read_file(testfilepath)    
 
@@ -34,6 +35,10 @@ if __name__ == '__main__':
     # 合并计数  
     total_counts = merge_counts(counts_list)  
   
-    # 输出最高频的n个词          
-    for word, count in total_counts.most_common(10):  
-        print(f"{word}-- {count}")
\ No newline at end of file
+    # 输出最高频的n个词
+    print_word_freqs( total_counts.most_common(10) )        
+
+
+if __name__ == '__main__':  
+    main()
+    
\ No newline at end of file