From a3bc46dae30f9c61917f8c4b52e592d492fd189a Mon Sep 17 00:00:00 2001
From: zj3D <flysmart.ww@qq.com>
Date: Wed, 20 Mar 2024 08:51:30 +0800
Subject: [PATCH] =?UTF-8?q?=E5=A4=A7=E4=BF=AE6?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../尾调用/tf-10.py                        |  0
 .../尾调用/tf-25.py                        |  0
 .../尾调用/tf-26B.py                       |  0
 .../尾调用/模拟管道.py                 |  0
 12 语言特性/反射/反射_类.py          |  2 +-
 13 计算设备/数据分包/mapreduce.py     | 17 ++++---------
 13 计算设备/数据分包/多线程.py     | 20 +++++++--------
 13 计算设备/数据分包/多进程.py     | 14 ++++-------
 13 计算设备/数据分包/抽象并发.py  | 25 ++++++++++---------
 .../restful/{tf-35-app.py => app.py}          |  0
 .../restful/{tf-35-request.py => client.py}   |  0
 16 其它/对象设计模式/观察者.py     |  2 +-
 30 对象设计模式/结构型/170 桥接.py | 10 +++++---
 cppy_/cp_util.py                              |  7 ++++++
 14 files changed, 48 insertions(+), 49 deletions(-)
 rename {12 语言特性 => 11 概念认知}/尾调用/tf-10.py (100%)
 rename {12 语言特性 => 11 概念认知}/尾调用/tf-25.py (100%)
 rename {12 语言特性 => 11 概念认知}/尾调用/tf-26B.py (100%)
 rename {12 语言特性 => 11 概念认知}/尾调用/模拟管道.py (100%)
 rename 15 工程化/松耦合/restful/{tf-35-app.py => app.py} (100%)
 rename 15 工程化/松耦合/restful/{tf-35-request.py => client.py} (100%)

diff --git a/12 语言特性/尾调用/tf-10.py b/11 概念认知/尾调用/tf-10.py
similarity index 100%
rename from 12 语言特性/尾调用/tf-10.py
rename to 11 概念认知/尾调用/tf-10.py
diff --git a/12 语言特性/尾调用/tf-25.py b/11 概念认知/尾调用/tf-25.py
similarity index 100%
rename from 12 语言特性/尾调用/tf-25.py
rename to 11 概念认知/尾调用/tf-25.py
diff --git a/12 语言特性/尾调用/tf-26B.py b/11 概念认知/尾调用/tf-26B.py
similarity index 100%
rename from 12 语言特性/尾调用/tf-26B.py
rename to 11 概念认知/尾调用/tf-26B.py
diff --git a/12 语言特性/尾调用/模拟管道.py b/11 概念认知/尾调用/模拟管道.py
similarity index 100%
rename from 12 语言特性/尾调用/模拟管道.py
rename to 11 概念认知/尾调用/模拟管道.py
diff --git a/12 语言特性/反射/反射_类.py b/12 语言特性/反射/反射_类.py
index cca0d9e..81c93f4 100644
--- a/12 语言特性/反射/反射_类.py	
+++ b/12 语言特性/反射/反射_类.py	
@@ -27,7 +27,7 @@ def handle_task(task_type,*args):
         handler = handler_class() # 实例化处理器类        
         return handler.handle(*args) # 调用处理方法
     else:
-        print(f"No handler found for task type: {task_type}")        
+        print(f"No handler found for task type: {task_type}")       
 
 
 if __name__ == '__main__':   
diff --git a/13 计算设备/数据分包/mapreduce.py b/13 计算设备/数据分包/mapreduce.py
index 46f9288..ead5f8c 100644
--- a/13 计算设备/数据分包/mapreduce.py	
+++ b/13 计算设备/数据分包/mapreduce.py	
@@ -3,31 +3,26 @@ from collections import Counter
 from cppy.cp_util import *
 from functools import reduce
 
+stop_words = get_stopwords()
+
 # map - reduce
 def process_chunk(chunk):
     # 过滤停用词
-    stop_words = get_stopwords()
     words = [ w for w in chunk if ( not w in stop_words ) and len(w) >= 3 ]
     return Counter(words)
 
-
 def merge_counts(count1,count2):
     sum_counts = count1 + count2
     return sum_counts
 
 
 @timing_decorator
-def main():
-    # 读取文件内容
-    content = re_split(read_file(testfilepath))
-
-    # 分割文件内容为多个块，每个块由一个进程处理
-    chunk_size = 1000  # 可以根据实际情况调整块大小
-    chunks = [content[i:i + chunk_size] for i in range(0, len(content), chunk_size)]
+def main():   
+    # 读数据，按1000个词一组分片
+    chunks = get_chunks(testfilepath,1000)
 
     # 使用 map 方法和 process_chunk 函数处理每个分区
     counts_list = list(map(process_chunk, chunks))
-
     #  使用 reduce 和 merge_counts 函数统计所有分区的词频
     total_counts = (reduce(merge_counts,counts_list))
 
@@ -38,5 +33,3 @@ def main():
 if __name__ == '__main__':
     main()
 
-
-
diff --git a/13 计算设备/数据分包/多线程.py b/13 计算设备/数据分包/多线程.py
index cf15f81..a28f00f 100644
--- a/13 计算设备/数据分包/多线程.py	
+++ b/13 计算设备/数据分包/多线程.py	
@@ -7,9 +7,10 @@ from multiprocessing.pool import ThreadPool
 #
 # 多线程
 #
+stop_words = get_stopwords()
+
 def process_chunk(chunk):
-    # 过滤停用词
-    stop_words = get_stopwords()
+    # 过滤停用词    
     words = [ w for w in chunk if ( not w in stop_words ) and len(w) >= 3 ]
     return Counter(words)
 
@@ -28,14 +29,13 @@ def thread_function(chunk, counts_list):
 
 
 @timing_decorator
-def main():
-    # 读取文件内容
-    content = re_split(read_file(testfilepath))
-    chunk_size = 1000 # 可以根据实际情况调整块大小
-    chunks = [content[i:i + chunk_size] for i in range(0, len(content), chunk_size)]
-
-    # 使用多线程池，每个线程处理一个块
-    pool = ThreadPool(len(content)//chunk_size+1)
+def main():    
+    # 读数据，按1000个词一组分片
+    chunks = get_chunks(testfilepath,1000)
+
+    # 线程池    
+    pool = ThreadPool(len(chunks))  # 随意指定的线程数       
+
     counts_list = pool.map(process_chunk, chunks)
     pool.close()
     pool.join()
diff --git a/13 计算设备/数据分包/多进程.py b/13 计算设备/数据分包/多进程.py
index c9b7753..3df8890 100644
--- a/13 计算设备/数据分包/多进程.py	
+++ b/13 计算设备/数据分包/多进程.py	
@@ -7,13 +7,13 @@ from cppy.cp_util import *
 #
 # 多进程
 #
+stop_words = get_stopwords()
+
 def process_chunk(chunk):
-    # 过滤停用词
-    stop_words = get_stopwords()
+    # 过滤停用词    
     words = [ w for w in chunk if ( not w in stop_words ) and len(w) >= 3 ]
     return Counter(words)
 
-
 def merge_counts(counts_list):
     # 合并多个Counter对象
     total_counts = Counter()
@@ -24,12 +24,8 @@ def merge_counts(counts_list):
 
 @timing_decorator
 def main():
-    # 读取文件内容
-    content = re_split(read_file(testfilepath))
-
-    # 分割文件内容为多个块，每个块由一个进程处理
-    chunk_size = 1000  # 可以根据实际情况调整块大小
-    chunks = [content[i:i + chunk_size] for i in range(0, len(content), chunk_size)]
+    # 读取文件内容，分割文件内容为多个块，每个块由一个进程处理    
+    chunks = get_chunks(testfilepath,1000)
 
     # 使用多进程处理每个块
     pool = multiprocessing.Pool(processes=multiprocessing.cpu_count())
diff --git a/13 计算设备/数据分包/抽象并发.py b/13 计算设备/数据分包/抽象并发.py
index 585178a..7fe8533 100644
--- a/13 计算设备/数据分包/抽象并发.py	
+++ b/13 计算设备/数据分包/抽象并发.py	
@@ -7,24 +7,23 @@ concurrent.futures模块为Python中的并发编程提供了一个统一接口,
 这个模块隐藏了低层次的线程和进程创建、同步和清理的细节,提供了一个更高层次的API来处理并发任务。
 当前版本推荐它与asyncio模块结合使用完成Python中的各种异步编程任务。
 '''
+stop_words = util.get_stopwords()
 
 class WordFrequencyAgent:  
     def __init__(self, words):          
         self.words = words  
   
-    def compute_word_frequency(self):          
-        self.word_freq = Counter(self.words)  
+    def compute_word_frequency(self):
+        words = [ w for w in self.words if ( not w in stop_words ) and len(w) >= 3 ]          
+        self.word_freq = Counter( words)  
   
     def get_word_frequency(self):  
         return self.word_freq
 
     
 # 将文本分割成多个部分，并为每个部分创建一个Agent
-def create_agents(words, num_agents = 4 ):  
-    text_chunks = [ words[i::num_agents] for i in range(num_agents) ]  
-    agents = [ WordFrequencyAgent(chunk) for chunk in text_chunks ]
-    return agents    
-
+def create_agents( words ):          
+    return [ WordFrequencyAgent(chunk) for chunk in words ]    
 
 def compute_all_word_frequencies(agents):  
     with concurrent.futures.ThreadPoolExecutor() as executor:  
@@ -34,7 +33,6 @@ def compute_all_word_frequencies(agents):
             agent = future_to_agent[future]  
             data = future.result()   # 词频被保存在agent中
 
-
 # 所有Agent计算完成后，合并它们的词频结果
 def merge_word_frequencies(agents):  
     merged_freq = Counter()  
@@ -42,10 +40,13 @@ def merge_word_frequencies(agents):
         merged_freq.update(agent.get_word_frequency())  
     return merged_freq  
 
-
-if __name__ == '__main__':       
-    words = util.extract_file_words(util.testfilepath) # 从文本抽词
+@util.timing_decorator
+def main():   
+    words = util.get_chunks(util.testfilepath)
     agents = create_agents(words)  # 创建代理
     compute_all_word_frequencies(agents)  # 计算
     merged_word_freq = merge_word_frequencies(agents)   # 合并结果    
-    util.print_word_freqs(merged_word_freq.most_common(10))  # 排序输出    
\ No newline at end of file
+    util.print_word_freqs(merged_word_freq.most_common(10))  # 排序输出  
+
+if __name__ == '__main__':       
+  main()
\ No newline at end of file
diff --git a/15 工程化/松耦合/restful/tf-35-app.py b/15 工程化/松耦合/restful/app.py
similarity index 100%
rename from 15 工程化/松耦合/restful/tf-35-app.py
rename to 15 工程化/松耦合/restful/app.py
diff --git a/15 工程化/松耦合/restful/tf-35-request.py b/15 工程化/松耦合/restful/client.py
similarity index 100%
rename from 15 工程化/松耦合/restful/tf-35-request.py
rename to 15 工程化/松耦合/restful/client.py
diff --git a/16 其它/对象设计模式/观察者.py b/16 其它/对象设计模式/观察者.py
index f2b8ef3..28628c4 100644
--- a/16 其它/对象设计模式/观察者.py	
+++ b/16 其它/对象设计模式/观察者.py	
@@ -45,7 +45,7 @@ def main(testfilepath, top_n = 10 ):
     wordlist = re_split( read_file(testfilepath) )
     for word in wordlist:  
         if word not in stopwords:  
-            subject.notify(word)  
+            subject.notify(word)  # 触发
   
     # 打印最高的N个词频  
     top_words = observer.get_top_n(top_n)  
diff --git a/30 对象设计模式/结构型/170 桥接.py b/30 对象设计模式/结构型/170 桥接.py
index 3ab3a0f..f95ff8f 100644
--- a/30 对象设计模式/结构型/170 桥接.py	
+++ b/30 对象设计模式/结构型/170 桥接.py	
@@ -84,9 +84,11 @@ if __name__ == "__main__":
 
 
 '''
-在这个示例中，IBook 是一个接口，定义了书籍应有的行为（比如获取标题和作者）。NovelBook 是一个具体书籍类，实现了 IBook 接口。BookCategory 是一个书籍分类类，它可以包含多个书籍实例。
-
-DisplayPlatform 是一个抽象展示平台类，定义了如何展示书籍。WebDisplayPlatform 和 MobileDisplayPlatform 是具体展示平台类，分别实现了 DisplayPlatform 接口，以提供不同的展示方式。
-
+在这个示例中，
+IBook 是一个接口，定义了书籍应有的行为（比如获取标题和作者）。
+NovelBook 是一个具体书籍类，实现了 IBook 接口。
+BookCategory 是一个书籍分类类，它可以包含多个书籍实例。
+DisplayPlatform 是一个抽象展示平台类，定义了如何展示书籍。
+WebDisplayPlatform 和 MobileDisplayPlatform 是具体展示平台类，分别实现了 DisplayPlatform 接口，以提供不同的展示方式。
 BookShop 是一个桥接类，它将书籍分类与展示平台连接起来，通过 show_books 方法可以展示分类中的所有书籍。
 '''    
\ No newline at end of file
diff --git a/cppy_/cp_util.py b/cppy_/cp_util.py
index 206ab37..6554c2c 100644
--- a/cppy_/cp_util.py
+++ b/cppy_/cp_util.py
@@ -39,6 +39,13 @@ def get_stopwords( path_to_file = stopwordfilepath ):
     data.extend(list(string.ascii_lowercase))
     return data
 
+def get_chunks( file_path = testfilepath, chunk_size = 1000):
+    # 读取文件内容，分割文件内容为多个块，每个块由一个进程处理
+    # 可以根据实际情况调整块大小
+    content = re_split(read_file(file_path))         
+    chunks = [content[i:i + chunk_size] for i in range(0, len(content), chunk_size)]
+    return chunks
+
 def extract_file_words(path_to_file):
     word_list = re_split( read_file(path_to_file) )
     stop_words = get_stopwords()