diff --git a/12 语言特性/递归/tf-08.py b/12 语言特性/递归/tf-08.py index 2a56d2c..bd228c8 100644 --- a/12 语言特性/递归/tf-08.py +++ b/12 语言特性/递归/tf-08.py @@ -1,6 +1,8 @@ import sys from cppy.cp_util import * +## 切分任务这个工作,可以统一为一个通用函数。做成一个生成器 + script_dir = os.path.dirname(os.path.abspath(__file__)) testfile = os.path.join(script_dir, 'test.txt') stop_words = get_stopwords() diff --git a/13 计算设备/map-reduce/tf-31.py b/13 计算设备/任务平行分发/mapreduce.py similarity index 100% rename from 13 计算设备/map-reduce/tf-31.py rename to 13 计算设备/任务平行分发/mapreduce.py diff --git a/13 计算设备/map-reduce/tf-91.py b/13 计算设备/任务平行分发/多线程1.py similarity index 100% rename from 13 计算设备/map-reduce/tf-91.py rename to 13 计算设备/任务平行分发/多线程1.py diff --git a/11 概念认知/松耦合/Agent/80.py b/13 计算设备/任务平行分发/多线程2.py similarity index 93% rename from 11 概念认知/松耦合/Agent/80.py rename to 13 计算设备/任务平行分发/多线程2.py index 80ae836..2485c0f 100644 --- a/11 概念认知/松耦合/Agent/80.py +++ b/13 计算设备/任务平行分发/多线程2.py @@ -2,6 +2,7 @@ import concurrent.futures from collections import Counter import cppy.cp_util as util +# 价值不大,就是多线程的一个表现,说明松耦合不如消息驱动的组件 class WordFrequencyAgent: def __init__(self, words): diff --git a/13 计算设备/map-reduce/tf-92.py b/13 计算设备/任务平行分发/多进程.py similarity index 100% rename from 13 计算设备/map-reduce/tf-92.py rename to 13 计算设备/任务平行分发/多进程.py diff --git a/13 计算设备/极限内存/tf-01.py b/13 计算设备/极限内存/tf-01.py deleted file mode 100644 index da97aac..0000000 --- a/13 计算设备/极限内存/tf-01.py +++ /dev/null @@ -1,109 +0,0 @@ -import sys, os, string -from cppy.cp_util import * - -def touchopen(filename, *args, **kwargs): - try: - os.remove(filename) - except OSError: - pass - open(filename, "a",encoding='utf-8').close() # "touch" file - return open(filename, *args, **kwargs) - -# The constrained memory should have no more than 1024*n cells -data = [] -n = 10 - -f = open( stopwordfilepath,encoding='utf-8' ) -data = [f.read(1024*n).split(',')] # data[0] holds the stop words -f.close() - -data.append([]) # data[1] is line (max 80 characters) -data.append(None) # data[2] is index of the start_char of word -data.append(0) # data[3] is index on characters, i = 0 -data.append(False) # data[4] is flag indicating if word was found -data.append('') # data[5] is the word -data.append('') # data[6] is word,NNNN -data.append(0) # data[7] is frequency - -# Open the secondary memory -word_freqs = touchopen('word_freqs', 'rb+') -# Open the input file -f = open( testfilepath , 'r',encoding='utf-8') -# Loop over input file's lines -while True: - print('.',end='',flush = True) - data[1] = [f.readline()] - if data[1] == ['']: # end of input file - break - if data[1][0][len(data[1][0])-1] != '\n': # If it does not end with \n - data[1][0] = data[1][0] + '\n' # Add \n - data[2] = None - data[3] = 0 - # Loop over characters in the line - for c in data[1][0]: # elimination of symbol c is exercise - if data[2] == None: - if c.isalnum(): - # We found the start of a word - data[2] = data[3] - else: - if not c.isalnum(): - # We found the end of a word. Process it - data[4] = False - data[5] = data[1][0][data[2]:data[3]].lower() - # Ignore words with len < 2, and stop words - if len(data[5]) >= 2 and data[5] not in data[0]: - # Let's see if it already exists - while True: - data[6] = str(word_freqs.readline().strip(), 'utf-8') - if data[6] == '': - break; - data[7] = int(data[6].split(',')[1]) - # word, no white space - data[6] = data[6].split(',')[0].strip() - if data[5] == data[6]: - data[7] += 1 - data[4] = True - break - if not data[4]: - word_freqs.seek(0, 1) # Needed in Windows - word_freqs.write(bytes("%20s,%04d\n" % (data[5], 1), 'utf-8')) - else: - word_freqs.seek(-26, 1) - word_freqs.write(bytes("%20s,%04d\n" % (data[5], data[7]), 'utf-8')) - word_freqs.seek(0,0) - # Let's reset - data[2] = None - data[3] += 1 -# We're done with the input file -f.close() -word_freqs.flush() - -# PART 2 -# Now we need to find the 25 most frequently occurring words. -# We don't need anything from the previous values in memory -del data[:] - -# Let's use the first 25 entries for the top 25 words -data = data + [[]]*(25 - len(data)) -data.append('') # data[25] is word,freq from file -data.append(0) # data[26] is freq - -# Loop over secondary memory file -while True: - data[25] = str(word_freqs.readline().strip(), 'utf-8') - if data[25] == '': # EOF - break - data[26] = int(data[25].split(',')[1]) # Read it as integer - data[25] = data[25].split(',')[0].strip() # word - # Check if this word has more counts than the ones in memory - for i in range(25): # elimination of symbol i is exercise - if data[i] == [] or data[i][1] < data[26]: - data.insert(i, [data[25], data[26]]) - del data[26] # delete the last element - break - -for tf in data[0:10]: - if len(tf) == 2: - print(tf[0], '-', tf[1]) - -word_freqs.close() \ No newline at end of file diff --git a/11 概念认知/松耦合/restful/tf-35-app.py b/15 工程化/松耦合/restful/tf-35-app.py similarity index 100% rename from 11 概念认知/松耦合/restful/tf-35-app.py rename to 15 工程化/松耦合/restful/tf-35-app.py diff --git a/11 概念认知/松耦合/restful/tf-35-request.py b/15 工程化/松耦合/restful/tf-35-request.py similarity index 100% rename from 11 概念认知/松耦合/restful/tf-35-request.py rename to 15 工程化/松耦合/restful/tf-35-request.py diff --git a/11 概念认知/松耦合/插件/__pycache__/tf-20.cpython-38.pyc b/15 工程化/松耦合/插件/__pycache__/tf-20.cpython-38.pyc similarity index 100% rename from 11 概念认知/松耦合/插件/__pycache__/tf-20.cpython-38.pyc rename to 15 工程化/松耦合/插件/__pycache__/tf-20.cpython-38.pyc diff --git a/11 概念认知/松耦合/插件/config.ini b/15 工程化/松耦合/插件/config.ini similarity index 100% rename from 11 概念认知/松耦合/插件/config.ini rename to 15 工程化/松耦合/插件/config.ini diff --git a/11 概念认知/松耦合/插件/plugins-src/compile.sh b/15 工程化/松耦合/插件/plugins-src/compile.sh similarity index 100% rename from 11 概念认知/松耦合/插件/plugins-src/compile.sh rename to 15 工程化/松耦合/插件/plugins-src/compile.sh diff --git a/11 概念认知/松耦合/插件/plugins-src/frequencies1.py b/15 工程化/松耦合/插件/plugins-src/frequencies1.py similarity index 100% rename from 11 概念认知/松耦合/插件/plugins-src/frequencies1.py rename to 15 工程化/松耦合/插件/plugins-src/frequencies1.py diff --git a/11 概念认知/松耦合/插件/plugins-src/frequencies2.py b/15 工程化/松耦合/插件/plugins-src/frequencies2.py similarity index 100% rename from 11 概念认知/松耦合/插件/plugins-src/frequencies2.py rename to 15 工程化/松耦合/插件/plugins-src/frequencies2.py diff --git a/11 概念认知/松耦合/插件/plugins-src/words1.py b/15 工程化/松耦合/插件/plugins-src/words1.py similarity index 100% rename from 11 概念认知/松耦合/插件/plugins-src/words1.py rename to 15 工程化/松耦合/插件/plugins-src/words1.py diff --git a/11 概念认知/松耦合/插件/plugins-src/words2.py b/15 工程化/松耦合/插件/plugins-src/words2.py similarity index 100% rename from 11 概念认知/松耦合/插件/plugins-src/words2.py rename to 15 工程化/松耦合/插件/plugins-src/words2.py diff --git a/11 概念认知/松耦合/插件/plugins/frequencies1.pyc b/15 工程化/松耦合/插件/plugins/frequencies1.pyc similarity index 100% rename from 11 概念认知/松耦合/插件/plugins/frequencies1.pyc rename to 15 工程化/松耦合/插件/plugins/frequencies1.pyc diff --git a/11 概念认知/松耦合/插件/plugins/frequencies2.cpython-38.pyc b/15 工程化/松耦合/插件/plugins/frequencies2.cpython-38.pyc similarity index 100% rename from 11 概念认知/松耦合/插件/plugins/frequencies2.cpython-38.pyc rename to 15 工程化/松耦合/插件/plugins/frequencies2.cpython-38.pyc diff --git a/11 概念认知/松耦合/插件/plugins/words1.pyc b/15 工程化/松耦合/插件/plugins/words1.pyc similarity index 100% rename from 11 概念认知/松耦合/插件/plugins/words1.pyc rename to 15 工程化/松耦合/插件/plugins/words1.pyc diff --git a/11 概念认知/松耦合/插件/plugins/words2.cpython-38.pyc b/15 工程化/松耦合/插件/plugins/words2.cpython-38.pyc similarity index 100% rename from 11 概念认知/松耦合/插件/plugins/words2.cpython-38.pyc rename to 15 工程化/松耦合/插件/plugins/words2.cpython-38.pyc diff --git a/11 概念认知/松耦合/插件/tf-20.py b/15 工程化/松耦合/插件/tf-20.py similarity index 100% rename from 11 概念认知/松耦合/插件/tf-20.py rename to 15 工程化/松耦合/插件/tf-20.py diff --git a/16 其它/对象设计模式/注册回调/tf-15.py b/15 工程化/松耦合/注册回调.py similarity index 90% rename from 16 其它/对象设计模式/注册回调/tf-15.py rename to 15 工程化/松耦合/注册回调.py index b76b4c5..c4095b9 100644 --- a/16 其它/对象设计模式/注册回调/tf-15.py +++ b/15 工程化/松耦合/注册回调.py @@ -1,3 +1,10 @@ +''' +每个组件提供注册消息接口和注册消息动作 +把顺序的流程分解到各个组件内部实现 +这样避免回传到中心控制器 +这是一个示例性质的原型,具体分布式环境下需要调整 +''' + from collections import defaultdict from cppy.cp_util import * @@ -81,4 +88,5 @@ if __name__ == '__main__': stop_word_filter = StopWordFilter(wfapp) data_storage = DataStorage(wfapp, stop_word_filter) word_freq_counter = WordFrequencyCounter(wfapp, data_storage) + wfapp.run(testfilepath) \ No newline at end of file diff --git a/16 其它/对象设计模式/注册回调_订阅制/tf-16.py b/15 工程化/松耦合/消息订阅.py similarity index 92% rename from 16 其它/对象设计模式/注册回调_订阅制/tf-16.py rename to 15 工程化/松耦合/消息订阅.py index 29de10a..f638f42 100644 --- a/16 其它/对象设计模式/注册回调_订阅制/tf-16.py +++ b/15 工程化/松耦合/消息订阅.py @@ -1,4 +1,10 @@ from cppy.cp_util import * +''' +注册回调的一个变体 +提供一个中心消息管理器,统一管理消息的订阅和通知 +这是一个示例性质的原型,具体分布式环境下需要调整 +''' + ################################################# # The event management diff --git a/16 其它/对象设计模式/消息驱动的对象/tf-12.py b/15 工程化/松耦合/消息驱动的组件/1 指令驱动.py similarity index 75% rename from 16 其它/对象设计模式/消息驱动的对象/tf-12.py rename to 15 工程化/松耦合/消息驱动的组件/1 指令驱动.py index f859749..d881a63 100644 --- a/16 其它/对象设计模式/消息驱动的对象/tf-12.py +++ b/15 工程化/松耦合/消息驱动的组件/1 指令驱动.py @@ -1,4 +1,7 @@ -import sys, re, operator, string +''' +依靠给各个组件的 dispatch 调用接口发指令来驱动所有工作 +这是一个示例性质的原型,具体环境下需要调整 +''' from cppy.cp_util import * class DataStorageManager(): @@ -13,19 +16,9 @@ class DataStorageManager(): return self._words() else: raise Exception("Message not understood " + message[0]) - # 使用内省的写法 - ''' - def dispatch(self, message): - method_name = '_' + message[0] - if hasattr(self, method_name): - method = getattr(self, method_name) - return method(*message[1:]) - else: - raise ValueError(f"DataStorageManager doesn't understand message {message[0]}") - ''' def _init(self, path_to_file): - self._data = re.findall('\w+', read_file(path_to_file).lower()) + self._data = re_split( read_file(path_to_file) ) def _words(self): return self._data @@ -62,14 +55,12 @@ class WordFrequencyManager(): else: raise Exception("Message not understood " + message[0]) - def _increment_count(self, word): - if word in self._word_freqs: - self._word_freqs[word] += 1 - else: - self._word_freqs[word] = 1 - + def _increment_count(self, word): + self._word_freqs[word] = self._word_freqs.get(word,0) + 1 + def _sorted(self): - return sorted(self._word_freqs.items(), key=operator.itemgetter(1), reverse=True) + return sort_dict(self._word_freqs) + class WordFrequencyController(): @@ -94,8 +85,7 @@ class WordFrequencyController(): self._word_freq_manager.dispatch(['increment_count', w]) word_freqs = self._word_freq_manager.dispatch(['sorted']) - for (w, c) in word_freqs[0:10]: - print(w, '-', c) + print_word_freqs(word_freqs) if __name__ == '__main__': diff --git a/16 其它/对象设计模式/消息驱动的对象_线程独立/tf-29.py b/15 工程化/松耦合/消息驱动的组件/2 仅有消息接口.py similarity index 72% rename from 16 其它/对象设计模式/消息驱动的对象_线程独立/tf-29.py rename to 15 工程化/松耦合/消息驱动的组件/2 仅有消息接口.py index eee2f62..5530a6e 100644 --- a/16 其它/对象设计模式/消息驱动的对象_线程独立/tf-29.py +++ b/15 工程化/松耦合/消息驱动的组件/2 仅有消息接口.py @@ -1,3 +1,9 @@ +''' +依靠给各个不同线程组件的队列发指令来驱动所有工作,比较繁琐。 +比较 01.py 的实现,各个组件完全不能互操作,仅依靠队列发消息进行协作 +这是一个示例性质的原型,具体分布式环境下需要调整 +''' + from threading import Thread from queue import Queue from cppy.cp_util import * @@ -13,8 +19,8 @@ class ActiveWFObject(Thread): while not self._stopMe: message = self.queue.get() self._dispatch(message) - if message[0] == 'die': - self._stopMe = True + if message[0] == 'over': + break def send(receiver, message): receiver.queue.put(message) @@ -33,15 +39,14 @@ class DataStorageManager(ActiveWFObject): send(self._stop_word_manager, message) def _init(self, message): - path_to_file = message[0] - self._stop_word_manager = message[1] - self._data = extract_file_words(path_to_file) + self._data = extract_file_words(message[0]) + self._stop_word_manager = message[1] def _process_words(self, message): recipient = message[0] for w in self._data: send(self._stop_word_manager, ['filter', w]) - send(self._stop_word_manager, ['top10', recipient]) + send(self._stop_word_manager, ['topWord', recipient]) class StopWordManager(ActiveWFObject): """ Models the stop word filter """ @@ -72,24 +77,24 @@ class WordFrequencyManager(ActiveWFObject): def _dispatch(self, message): if message[0] == 'word': self._increment_count(message[1:]) - elif message[0] == 'top10': - self._top10(message[1:]) + elif message[0] == 'topWord': + self._topWord(message[1:]) def _increment_count(self, message): word, = message self._word_freqs[word] = self._word_freqs.get(word, 0) + 1 - def _top10(self, message): + def _topWord(self, message): recipient = message[0] freqs_sorted = sort_dict ( self._word_freqs ) - send(recipient, ['top10', freqs_sorted]) + send(recipient, ['topWord', freqs_sorted]) -class WordFrequencyController(ActiveWFObject): +class MyController(ActiveWFObject): def _dispatch(self, message): if message[0] == 'run': self._run(message[1:]) - elif message[0] == 'top10': + elif message[0] == 'topWord': self._display(message[1:]) else: raise Exception("Message not understood " + message[0]) @@ -101,20 +106,20 @@ class WordFrequencyController(ActiveWFObject): def _display(self, message): word_freqs, = message print_word_freqs( word_freqs) - send(self._storage_manager, ['die']) + send(self._storage_manager, ['over']) self._stopMe = True -if __name__ == '__main__': +if __name__ == '__main__': word_freq_manager = WordFrequencyManager() stop_word_manager = StopWordManager() storage_manager = DataStorageManager() + wfcontroller = MyController() - send(stop_word_manager, ['init', word_freq_manager]) send(storage_manager, ['init', testfilepath, stop_word_manager]) - - wfcontroller = WordFrequencyController() + send(stop_word_manager, ['init', word_freq_manager]) send(wfcontroller, ['run', storage_manager]) - # Wait for the active objects to finish - [t.join() for t in [word_freq_manager, stop_word_manager, storage_manager, wfcontroller]] \ No newline at end of file + # 等待所有管理器完成工作 + threads = [word_freq_manager, stop_word_manager, storage_manager, wfcontroller] + for thread in threads: thread.join() \ No newline at end of file diff --git a/16 其它/函数缓存/84.py b/16 其它/函数缓存.py similarity index 100% rename from 16 其它/函数缓存/84.py rename to 16 其它/函数缓存.py