From b86f626e947075e3a3862674bac9deec2bc57020 Mon Sep 17 00:00:00 2001
From: zj3D <flysmart.ww@qq.com>
Date: Tue, 19 Mar 2024 12:03:13 +0800
Subject: [PATCH] =?UTF-8?q?=E5=A4=A7=E8=B0=83=E6=95=B42?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 12 语言特性/递归/tf-08.py               |   2 +
 .../mapreduce.py}                             |   0
 .../多线程1.py}                            |   0
 .../任务平行分发/多线程2.py          |   1 +
 .../多进程.py}                             |   0
 13 计算设备/极限内存/tf-01.py         | 109 ------------------
 .../松耦合/restful/tf-35-app.py            |   0
 .../松耦合/restful/tf-35-request.py        |   0
 .../插件/__pycache__/tf-20.cpython-38.pyc   | Bin
 .../松耦合/插件/config.ini               |   0
 .../松耦合/插件/plugins-src/compile.sh   |   0
 .../插件/plugins-src/frequencies1.py        |   0
 .../插件/plugins-src/frequencies2.py        |   0
 .../松耦合/插件/plugins-src/words1.py    |   0
 .../松耦合/插件/plugins-src/words2.py    |   0
 .../松耦合/插件/plugins/frequencies1.pyc | Bin
 .../plugins/frequencies2.cpython-38.pyc       | Bin
 .../松耦合/插件/plugins/words1.pyc       | Bin
 .../插件/plugins/words2.cpython-38.pyc      | Bin
 .../松耦合/插件/tf-20.py                 |   0
 .../松耦合/注册回调.py                 |   8 ++
 .../松耦合/消息订阅.py                 |   6 +
 .../消息驱动的组件/1 指令驱动.py   |  32 ++---
 .../2 仅有消息接口.py                   |  43 ++++---
 .../{函数缓存/84.py => 函数缓存.py}   |   0
 25 files changed, 52 insertions(+), 149 deletions(-)
 rename 13 计算设备/{map-reduce/tf-31.py => 任务平行分发/mapreduce.py} (100%)
 rename 13 计算设备/{map-reduce/tf-91.py => 任务平行分发/多线程1.py} (100%)
 rename 11 概念认知/松耦合/Agent/80.py => 13 计算设备/任务平行分发/多线程2.py (93%)
 rename 13 计算设备/{map-reduce/tf-92.py => 任务平行分发/多进程.py} (100%)
 delete mode 100644 13 计算设备/极限内存/tf-01.py
 rename {11 概念认知 => 15 工程化}/松耦合/restful/tf-35-app.py (100%)
 rename {11 概念认知 => 15 工程化}/松耦合/restful/tf-35-request.py (100%)
 rename {11 概念认知 => 15 工程化}/松耦合/插件/__pycache__/tf-20.cpython-38.pyc (100%)
 rename {11 概念认知 => 15 工程化}/松耦合/插件/config.ini (100%)
 rename {11 概念认知 => 15 工程化}/松耦合/插件/plugins-src/compile.sh (100%)
 rename {11 概念认知 => 15 工程化}/松耦合/插件/plugins-src/frequencies1.py (100%)
 rename {11 概念认知 => 15 工程化}/松耦合/插件/plugins-src/frequencies2.py (100%)
 rename {11 概念认知 => 15 工程化}/松耦合/插件/plugins-src/words1.py (100%)
 rename {11 概念认知 => 15 工程化}/松耦合/插件/plugins-src/words2.py (100%)
 rename {11 概念认知 => 15 工程化}/松耦合/插件/plugins/frequencies1.pyc (100%)
 rename {11 概念认知 => 15 工程化}/松耦合/插件/plugins/frequencies2.cpython-38.pyc (100%)
 rename {11 概念认知 => 15 工程化}/松耦合/插件/plugins/words1.pyc (100%)
 rename {11 概念认知 => 15 工程化}/松耦合/插件/plugins/words2.cpython-38.pyc (100%)
 rename {11 概念认知 => 15 工程化}/松耦合/插件/tf-20.py (100%)
 rename 16 其它/对象设计模式/注册回调/tf-15.py => 15 工程化/松耦合/注册回调.py (90%)
 rename 16 其它/对象设计模式/注册回调_订阅制/tf-16.py => 15 工程化/松耦合/消息订阅.py (92%)
 rename 16 其它/对象设计模式/消息驱动的对象/tf-12.py => 15 工程化/松耦合/消息驱动的组件/1 指令驱动.py (75%)
 rename 16 其它/对象设计模式/消息驱动的对象_线程独立/tf-29.py => 15 工程化/松耦合/消息驱动的组件/2 仅有消息接口.py (72%)
 rename 16 其它/{函数缓存/84.py => 函数缓存.py} (100%)

diff --git a/12 语言特性/递归/tf-08.py b/12 语言特性/递归/tf-08.py
index 2a56d2c..bd228c8 100644
--- a/12 语言特性/递归/tf-08.py	
+++ b/12 语言特性/递归/tf-08.py	
@@ -1,6 +1,8 @@
 import sys
 from cppy.cp_util import *
 
+## 切分任务这个工作，可以统一为一个通用函数。做成一个生成器
+
 script_dir = os.path.dirname(os.path.abspath(__file__))  
 testfile = os.path.join(script_dir, 'test.txt')
 stop_words = get_stopwords()
diff --git a/13 计算设备/map-reduce/tf-31.py b/13 计算设备/任务平行分发/mapreduce.py
similarity index 100%
rename from 13 计算设备/map-reduce/tf-31.py
rename to 13 计算设备/任务平行分发/mapreduce.py
diff --git a/13 计算设备/map-reduce/tf-91.py b/13 计算设备/任务平行分发/多线程1.py
similarity index 100%
rename from 13 计算设备/map-reduce/tf-91.py
rename to 13 计算设备/任务平行分发/多线程1.py
diff --git a/11 概念认知/松耦合/Agent/80.py b/13 计算设备/任务平行分发/多线程2.py
similarity index 93%
rename from 11 概念认知/松耦合/Agent/80.py
rename to 13 计算设备/任务平行分发/多线程2.py
index 80ae836..2485c0f 100644
--- a/11 概念认知/松耦合/Agent/80.py	
+++ b/13 计算设备/任务平行分发/多线程2.py	
@@ -2,6 +2,7 @@ import concurrent.futures
 from collections import Counter  
 import cppy.cp_util as util
 
+# 价值不大，就是多线程的一个表现，说明松耦合不如消息驱动的组件
 
 class WordFrequencyAgent:  
     def __init__(self, words):          
diff --git a/13 计算设备/map-reduce/tf-92.py b/13 计算设备/任务平行分发/多进程.py
similarity index 100%
rename from 13 计算设备/map-reduce/tf-92.py
rename to 13 计算设备/任务平行分发/多进程.py
diff --git a/13 计算设备/极限内存/tf-01.py b/13 计算设备/极限内存/tf-01.py
deleted file mode 100644
index da97aac..0000000
--- a/13 计算设备/极限内存/tf-01.py	
+++ /dev/null
@@ -1,109 +0,0 @@
-import sys, os, string
-from cppy.cp_util import *
-
-def touchopen(filename, *args, **kwargs):
-    try:
-        os.remove(filename)
-    except OSError:
-        pass
-    open(filename, "a",encoding='utf-8').close() # "touch" file
-    return open(filename, *args, **kwargs)
-
-# The constrained memory should have no more than 1024*n cells
-data = []
-n = 10
-
-f = open( stopwordfilepath,encoding='utf-8' )
-data = [f.read(1024*n).split(',')] # data[0] holds the stop words
-f.close()
-
-data.append([])    # data[1] is line (max 80 characters)
-data.append(None)  # data[2] is index of the start_char of word
-data.append(0)     # data[3] is index on characters, i = 0
-data.append(False) # data[4] is flag indicating if word was found
-data.append('')    # data[5] is the word
-data.append('')    # data[6] is word,NNNN
-data.append(0)     # data[7] is frequency
-
-# Open the secondary memory
-word_freqs = touchopen('word_freqs', 'rb+')
-# Open the input file
-f = open( testfilepath , 'r',encoding='utf-8')
-# Loop over input file's lines
-while True:
-    print('.',end='',flush = True)
-    data[1] = [f.readline()] 
-    if data[1] == ['']: # end of input file
-        break
-    if data[1][0][len(data[1][0])-1] != '\n': # If it does not end with \n
-        data[1][0] = data[1][0] + '\n' # Add \n
-    data[2] = None
-    data[3] = 0 
-    # Loop over characters in the line
-    for c in data[1][0]: # elimination of symbol c is exercise
-        if data[2] == None:
-            if c.isalnum():
-                # We found the start of a word
-                data[2] = data[3]
-        else:
-            if not c.isalnum():
-                # We found the end of a word. Process it
-                data[4] = False 
-                data[5] = data[1][0][data[2]:data[3]].lower()
-                # Ignore words with len < 2, and stop words
-                if len(data[5]) >= 2 and data[5] not in data[0]:
-                    # Let's see if it already exists
-                    while True:
-                        data[6] = str(word_freqs.readline().strip(), 'utf-8')
-                        if data[6] == '':
-                            break;
-                        data[7] = int(data[6].split(',')[1])
-                        # word, no white space
-                        data[6] = data[6].split(',')[0].strip() 
-                        if data[5] == data[6]:
-                            data[7] += 1
-                            data[4] = True
-                            break
-                    if not data[4]:
-                        word_freqs.seek(0, 1) # Needed in Windows
-                        word_freqs.write(bytes("%20s,%04d\n" % (data[5], 1), 'utf-8'))
-                    else:
-                        word_freqs.seek(-26, 1)
-                        word_freqs.write(bytes("%20s,%04d\n" % (data[5], data[7]), 'utf-8'))
-                    word_freqs.seek(0,0)
-                # Let's reset
-                data[2] = None
-        data[3] += 1
-# We're done with the input file
-f.close()
-word_freqs.flush()
-
-# PART 2
-# Now we need to find the 25 most frequently occurring words.
-# We don't need anything from the previous values in memory
-del data[:]
-
-# Let's use the first 25 entries for the top 25 words
-data = data + [[]]*(25 - len(data))
-data.append('') # data[25] is word,freq from file
-data.append(0)  # data[26] is freq
-
-# Loop over secondary memory file
-while True:
-    data[25] = str(word_freqs.readline().strip(), 'utf-8')
-    if data[25] == '': # EOF
-        break
-    data[26] = int(data[25].split(',')[1]) # Read it as integer
-    data[25] = data[25].split(',')[0].strip() # word
-    # Check if this word has more counts than the ones in memory
-    for i in range(25): # elimination of symbol i is exercise
-        if data[i] == [] or data[i][1] < data[26]:
-            data.insert(i, [data[25], data[26]]) 
-            del data[26] #  delete the last element
-            break
-            
-for tf in data[0:10]: 
-    if len(tf) == 2:
-        print(tf[0], '-', tf[1])
-
-word_freqs.close()
\ No newline at end of file
diff --git a/11 概念认知/松耦合/restful/tf-35-app.py b/15 工程化/松耦合/restful/tf-35-app.py
similarity index 100%
rename from 11 概念认知/松耦合/restful/tf-35-app.py
rename to 15 工程化/松耦合/restful/tf-35-app.py
diff --git a/11 概念认知/松耦合/restful/tf-35-request.py b/15 工程化/松耦合/restful/tf-35-request.py
similarity index 100%
rename from 11 概念认知/松耦合/restful/tf-35-request.py
rename to 15 工程化/松耦合/restful/tf-35-request.py
diff --git a/11 概念认知/松耦合/插件/__pycache__/tf-20.cpython-38.pyc b/15 工程化/松耦合/插件/__pycache__/tf-20.cpython-38.pyc
similarity index 100%
rename from 11 概念认知/松耦合/插件/__pycache__/tf-20.cpython-38.pyc
rename to 15 工程化/松耦合/插件/__pycache__/tf-20.cpython-38.pyc
diff --git a/11 概念认知/松耦合/插件/config.ini b/15 工程化/松耦合/插件/config.ini
similarity index 100%
rename from 11 概念认知/松耦合/插件/config.ini
rename to 15 工程化/松耦合/插件/config.ini
diff --git a/11 概念认知/松耦合/插件/plugins-src/compile.sh b/15 工程化/松耦合/插件/plugins-src/compile.sh
similarity index 100%
rename from 11 概念认知/松耦合/插件/plugins-src/compile.sh
rename to 15 工程化/松耦合/插件/plugins-src/compile.sh
diff --git a/11 概念认知/松耦合/插件/plugins-src/frequencies1.py b/15 工程化/松耦合/插件/plugins-src/frequencies1.py
similarity index 100%
rename from 11 概念认知/松耦合/插件/plugins-src/frequencies1.py
rename to 15 工程化/松耦合/插件/plugins-src/frequencies1.py
diff --git a/11 概念认知/松耦合/插件/plugins-src/frequencies2.py b/15 工程化/松耦合/插件/plugins-src/frequencies2.py
similarity index 100%
rename from 11 概念认知/松耦合/插件/plugins-src/frequencies2.py
rename to 15 工程化/松耦合/插件/plugins-src/frequencies2.py
diff --git a/11 概念认知/松耦合/插件/plugins-src/words1.py b/15 工程化/松耦合/插件/plugins-src/words1.py
similarity index 100%
rename from 11 概念认知/松耦合/插件/plugins-src/words1.py
rename to 15 工程化/松耦合/插件/plugins-src/words1.py
diff --git a/11 概念认知/松耦合/插件/plugins-src/words2.py b/15 工程化/松耦合/插件/plugins-src/words2.py
similarity index 100%
rename from 11 概念认知/松耦合/插件/plugins-src/words2.py
rename to 15 工程化/松耦合/插件/plugins-src/words2.py
diff --git a/11 概念认知/松耦合/插件/plugins/frequencies1.pyc b/15 工程化/松耦合/插件/plugins/frequencies1.pyc
similarity index 100%
rename from 11 概念认知/松耦合/插件/plugins/frequencies1.pyc
rename to 15 工程化/松耦合/插件/plugins/frequencies1.pyc
diff --git a/11 概念认知/松耦合/插件/plugins/frequencies2.cpython-38.pyc b/15 工程化/松耦合/插件/plugins/frequencies2.cpython-38.pyc
similarity index 100%
rename from 11 概念认知/松耦合/插件/plugins/frequencies2.cpython-38.pyc
rename to 15 工程化/松耦合/插件/plugins/frequencies2.cpython-38.pyc
diff --git a/11 概念认知/松耦合/插件/plugins/words1.pyc b/15 工程化/松耦合/插件/plugins/words1.pyc
similarity index 100%
rename from 11 概念认知/松耦合/插件/plugins/words1.pyc
rename to 15 工程化/松耦合/插件/plugins/words1.pyc
diff --git a/11 概念认知/松耦合/插件/plugins/words2.cpython-38.pyc b/15 工程化/松耦合/插件/plugins/words2.cpython-38.pyc
similarity index 100%
rename from 11 概念认知/松耦合/插件/plugins/words2.cpython-38.pyc
rename to 15 工程化/松耦合/插件/plugins/words2.cpython-38.pyc
diff --git a/11 概念认知/松耦合/插件/tf-20.py b/15 工程化/松耦合/插件/tf-20.py
similarity index 100%
rename from 11 概念认知/松耦合/插件/tf-20.py
rename to 15 工程化/松耦合/插件/tf-20.py
diff --git a/16 其它/对象设计模式/注册回调/tf-15.py b/15 工程化/松耦合/注册回调.py
similarity index 90%
rename from 16 其它/对象设计模式/注册回调/tf-15.py
rename to 15 工程化/松耦合/注册回调.py
index b76b4c5..c4095b9 100644
--- a/16 其它/对象设计模式/注册回调/tf-15.py	
+++ b/15 工程化/松耦合/注册回调.py	
@@ -1,3 +1,10 @@
+'''
+每个组件提供注册消息接口和注册消息动作
+把顺序的流程分解到各个组件内部实现
+这样避免回传到中心控制器
+这是一个示例性质的原型，具体分布式环境下需要调整
+'''
+
 from collections import defaultdict
 from cppy.cp_util import *
 
@@ -81,4 +88,5 @@ if __name__ == '__main__':
     stop_word_filter = StopWordFilter(wfapp)
     data_storage = DataStorage(wfapp, stop_word_filter)
     word_freq_counter = WordFrequencyCounter(wfapp, data_storage)
+
     wfapp.run(testfilepath)
\ No newline at end of file
diff --git a/16 其它/对象设计模式/注册回调_订阅制/tf-16.py b/15 工程化/松耦合/消息订阅.py
similarity index 92%
rename from 16 其它/对象设计模式/注册回调_订阅制/tf-16.py
rename to 15 工程化/松耦合/消息订阅.py
index 29de10a..f638f42 100644
--- a/16 其它/对象设计模式/注册回调_订阅制/tf-16.py	
+++ b/15 工程化/松耦合/消息订阅.py	
@@ -1,4 +1,10 @@
 from cppy.cp_util import *
+'''
+注册回调的一个变体
+提供一个中心消息管理器，统一管理消息的订阅和通知
+这是一个示例性质的原型，具体分布式环境下需要调整
+'''
+
 
 #################################################
 # The event management
diff --git a/16 其它/对象设计模式/消息驱动的对象/tf-12.py b/15 工程化/松耦合/消息驱动的组件/1 指令驱动.py
similarity index 75%
rename from 16 其它/对象设计模式/消息驱动的对象/tf-12.py
rename to 15 工程化/松耦合/消息驱动的组件/1 指令驱动.py
index f859749..d881a63 100644
--- a/16 其它/对象设计模式/消息驱动的对象/tf-12.py	
+++ b/15 工程化/松耦合/消息驱动的组件/1 指令驱动.py	
@@ -1,4 +1,7 @@
-import sys, re, operator, string
+''' 
+依靠给各个组件的 dispatch 调用接口发指令来驱动所有工作
+这是一个示例性质的原型，具体环境下需要调整
+'''
 from cppy.cp_util import *
 
 class DataStorageManager():
@@ -13,19 +16,9 @@ class DataStorageManager():
             return self._words()
         else:
             raise Exception("Message not understood " + message[0])
-    # 使用内省的写法
-    '''
-    def dispatch(self, message):
-        method_name = '_' + message[0]
-        if hasattr(self, method_name):
-            method = getattr(self, method_name)
-            return method(*message[1:])
-        else:
-            raise ValueError(f"DataStorageManager doesn't understand message {message[0]}")
-    '''
 
     def _init(self, path_to_file):
-        self._data = re.findall('\w+', read_file(path_to_file).lower())
+        self._data = re_split( read_file(path_to_file) )
 
     def _words(self):
         return self._data
@@ -62,14 +55,12 @@ class WordFrequencyManager():
         else:
             raise Exception("Message not understood " + message[0])
  
-    def _increment_count(self, word):
-        if word in self._word_freqs:
-            self._word_freqs[word] += 1
-        else:
-            self._word_freqs[word] = 1
-
+    def _increment_count(self, word):    
+        self._word_freqs[word] = self._word_freqs.get(word,0) + 1
+        
     def _sorted(self):
-        return sorted(self._word_freqs.items(), key=operator.itemgetter(1), reverse=True)
+        return sort_dict(self._word_freqs)
+
 
 class WordFrequencyController():
 
@@ -94,8 +85,7 @@ class WordFrequencyController():
                 self._word_freq_manager.dispatch(['increment_count', w])
 
         word_freqs = self._word_freq_manager.dispatch(['sorted'])
-        for (w, c) in word_freqs[0:10]:
-            print(w, '-', c)
+        print_word_freqs(word_freqs)        
 
 
 if __name__ == '__main__':
diff --git a/16 其它/对象设计模式/消息驱动的对象_线程独立/tf-29.py b/15 工程化/松耦合/消息驱动的组件/2 仅有消息接口.py
similarity index 72%
rename from 16 其它/对象设计模式/消息驱动的对象_线程独立/tf-29.py
rename to 15 工程化/松耦合/消息驱动的组件/2 仅有消息接口.py
index eee2f62..5530a6e 100644
--- a/16 其它/对象设计模式/消息驱动的对象_线程独立/tf-29.py	
+++ b/15 工程化/松耦合/消息驱动的组件/2 仅有消息接口.py	
@@ -1,3 +1,9 @@
+''' 
+依靠给各个不同线程组件的队列发指令来驱动所有工作，比较繁琐。
+比较 01.py 的实现，各个组件完全不能互操作，仅依靠队列发消息进行协作
+这是一个示例性质的原型，具体分布式环境下需要调整
+'''
+
 from threading import Thread
 from queue import Queue
 from cppy.cp_util import *
@@ -13,8 +19,8 @@ class ActiveWFObject(Thread):
         while not self._stopMe:
             message = self.queue.get()
             self._dispatch(message)
-            if message[0] == 'die':
-                self._stopMe = True
+            if message[0] == 'over':
+                break
 
 def send(receiver, message):
     receiver.queue.put(message)
@@ -33,15 +39,14 @@ class DataStorageManager(ActiveWFObject):
             send(self._stop_word_manager, message)
  
     def _init(self, message):
-        path_to_file = message[0]
-        self._stop_word_manager = message[1]    
-        self._data = extract_file_words(path_to_file)
+        self._data = extract_file_words(message[0])
+        self._stop_word_manager = message[1]            
 
     def _process_words(self, message):
         recipient = message[0]        
         for w in self._data:
             send(self._stop_word_manager, ['filter', w])
-        send(self._stop_word_manager, ['top10', recipient])
+        send(self._stop_word_manager, ['topWord', recipient])
 
 class StopWordManager(ActiveWFObject):
     """ Models the stop word filter """
@@ -72,24 +77,24 @@ class WordFrequencyManager(ActiveWFObject):
     def _dispatch(self, message):
         if message[0] == 'word':
             self._increment_count(message[1:])
-        elif message[0] == 'top10':
-            self._top10(message[1:])
+        elif message[0] == 'topWord':
+            self._topWord(message[1:])
  
     def _increment_count(self, message):
         word, = message
         self._word_freqs[word] = self._word_freqs.get(word, 0) + 1
 
-    def _top10(self, message):
+    def _topWord(self, message):
         recipient = message[0]
         freqs_sorted = sort_dict ( self._word_freqs )
-        send(recipient, ['top10', freqs_sorted])
+        send(recipient, ['topWord', freqs_sorted])
 
-class WordFrequencyController(ActiveWFObject):
+class MyController(ActiveWFObject):
 
     def _dispatch(self, message):
         if message[0] == 'run':
             self._run(message[1:])
-        elif message[0] == 'top10':
+        elif message[0] == 'topWord':
             self._display(message[1:])
         else:
             raise Exception("Message not understood " + message[0])
@@ -101,20 +106,20 @@ class WordFrequencyController(ActiveWFObject):
     def _display(self, message):
         word_freqs, = message
         print_word_freqs( word_freqs)        
-        send(self._storage_manager, ['die'])
+        send(self._storage_manager, ['over'])
         self._stopMe = True
 
 
-if __name__ == '__main__':
+if __name__ == '__main__':    
     word_freq_manager = WordFrequencyManager()
     stop_word_manager = StopWordManager()
     storage_manager = DataStorageManager()
+    wfcontroller = MyController()
 
-    send(stop_word_manager, ['init', word_freq_manager])
     send(storage_manager, ['init', testfilepath, stop_word_manager])
-
-    wfcontroller = WordFrequencyController()
+    send(stop_word_manager, ['init', word_freq_manager])        
     send(wfcontroller, ['run', storage_manager])
 
-    # Wait for the active objects to finish
-    [t.join() for t in [word_freq_manager, stop_word_manager, storage_manager, wfcontroller]]
\ No newline at end of file
+    # 等待所有管理器完成工作
+    threads = [word_freq_manager, stop_word_manager, storage_manager, wfcontroller]
+    for thread in threads:  thread.join()
\ No newline at end of file
diff --git a/16 其它/函数缓存/84.py b/16 其它/函数缓存.py
similarity index 100%
rename from 16 其它/函数缓存/84.py
rename to 16 其它/函数缓存.py