01

2 years ago · e5dc492333
commit e5dc492333
30 changed files with 7784 additions and 0 deletions
--- a/一盘大棋/A01.py
+++ b/一盘大棋/A01.py
@ -0,0 +1,46 @@
+import string
+from cppy.cp_util import *
+
+# 准备词和停用词表
+word_freqs = []
+with open( stopwordfilepath,encoding='utf-8' ) as f:
+    stop_words = f.read().split(',')
+stop_words.extend(list(string.ascii_lowercase))
+
+for line in open( testfilepath ,encoding='utf-8' ):
+    start_char = None
+    i = 0
+    for c in line:
+        if start_char == None:
+            if c.isalnum():
+                # 一个单词开始
+                start_char = i
+        else:
+            if not c.isalnum():
+                # 一个单词结束
+                found = False
+                word = line[start_char:i].lower()
+                # 跳过停用词
+                if word not in stop_words:
+                    pair_index = 0
+                    # 单词是否第一次出现
+                    for pair in word_freqs:
+                        if word == pair[0]:
+                            pair[1] += 1
+                            found = True
+                            break
+                        pair_index += 1
+                    if not found:
+                        word_freqs.append([word, 1])
+                    elif len(word_freqs) > 1:                        
+                        for n in reversed(range(pair_index)):
+                            if word_freqs[pair_index][1] > word_freqs[n][1]:
+                                # 交换
+                                word_freqs[n], word_freqs[pair_index] = word_freqs[pair_index], word_freqs[n]
+                                pair_index = n
+                # 重置开始标记
+                start_char = None
+        i += 1
+
+for tf in word_freqs[0:10]:
+    print(tf[0], '-', tf[1])
--- a/一盘大棋/A02.py
+++ b/一盘大棋/A02.py
@ -0,0 +1,31 @@
+from cppy.cp_util import *
+from collections import Counter
+
+# 准备词和停用词表
+stop_words = set(open(stopwordfilepath).read().split(','))
+stop_words.update(list(string.ascii_lowercase))
+
+# 读取文件并计算单词频率
+word_freqs = Counter()
+with open(testfilepath,encoding = 'utf8') as f:
+    for line_num, line in enumerate(f, 1):
+        start_char = None
+        for i, c in enumerate(line):
+            if start_char is None and c.isalnum():
+                start_char = i
+            elif start_char is not None and not c.isalnum():
+                word = line[start_char:i].lower()
+                if word not in stop_words:
+                    word_freqs[word] += 1
+                start_char = None
+
+# 打印前10个最常见的单词
+for word, freq in word_freqs.most_common(10):
+    print(f"{word}-{freq}")
+
+'''
+相比 A01
+使用collections.Counter来计数单词频率，从而简化了代码并提高了效率。
+使用enumerate来获取行号和行内容，使用set来存储停用词，都有助于提高代码的性能和可读性。
+使用most_common方法来获取最常见的单词，使输出更为简洁。
+'''
--- a/一盘大棋/A03.py
+++ b/一盘大棋/A03.py
--- a/软件不能挂掉/tf-21.py
+++ b/软件不能挂掉/tf-21.py
@ -0,0 +1,48 @@
+import sys, re, operator, string, inspect
+from cppy.cp_util import *
+
+#
+# The functions
+#
+def extract_words(path_to_file):
+    try:
+        with open(path_to_file, 'r', encoding='utf-8') as f:
+            str_data = f.read()
+    except IOError as e:
+        print(f"I/O error({e.errno}) when opening {path_to_file}: {e.strerror}")
+        return []
+
+    word_list = re.findall('\w+', str_data.lower())
+    return word_list
+
+
+def remove_stop_words(word_list):
+    try:
+        stop_words = set(get_stopwords())
+    except IOError as e:
+        print(f"I/O error({e.errno}) when opening stops_words.txt: {e.strerror}")
+        return word_list
+
+    stop_words.update(string.ascii_lowercase)
+    return [w for w in word_list if w not in stop_words]
+
+
+def frequencies(word_list):
+    if type(word_list) is not list or word_list == []:  return {}
+
+    word_freqs = {}
+    for w in word_list:
+        if w in word_freqs:
+            word_freqs[w] += 1
+        else:
+            word_freqs[w] = 1
+    return word_freqs
+
+def sort(word_freq):
+    if type(word_freq) is not dict or word_freq == {}: return []
+    return sorted(word_freq.items(), key=operator.itemgetter(1), reverse=True)
+
+
+if __name__ == '__main__':
+    word_freqs = sort(frequencies(remove_stop_words(extract_words(testfilepath))))
+    print_word_freqs(word_freqs)    
--- a/时间停止在那一刻/tf-22.py
+++ b/时间停止在那一刻/tf-22.py
@ -0,0 +1,39 @@
+from collections import Counter
+from cppy.cp_util import *
+
+
+def extract_words(path_to_file):
+    assert(type(path_to_file) is str), "I need a string!" 
+    assert(path_to_file), "I need a non-empty string!" 
+
+    try:
+        with open(path_to_file,encoding='utf-8') as f:
+            str_data = f.read()
+    except IOError as e:
+        print("I/O error({0}) when opening {1}: {2}! I quit!".format(e.errno, path_to_file, e.strerror))
+        raise e    
+    
+    return re_split(str_data)
+
+def remove_stop_words(word_list):
+    assert(type(word_list) is list), "I need a list!"
+
+    try:
+        stop_words = get_stopwords()
+    except IOError as e:
+        print("I/O error({0}) opening stops_words.txt: {1}! I quit!".format(e.errno, e.strerror))
+        raise e
+            
+    return [w for w in word_list if not w in stop_words]
+
+def frequencies(word_list):
+    return Counter(word_list)
+
+def sort(word_freq):
+    return word_freq.most_common()
+
+
+if __name__ == '__main__':    
+    word_freqs = sort(frequencies(remove_stop_words(extract_words(testfilepath))))
+    print_word_freqs(word_freqs)    
+    
--- a/所有错误的应对/tf-23.py
+++ b/所有错误的应对/tf-23.py
@ -0,0 +1,25 @@
+from cppy.cp_util import *
+
+
+def extractWords(path_to_file):
+    assert(type(path_to_file) is str), "I need a string! I quit!" 
+    assert(path_to_file), "I need a non-empty string! I quit!"         
+    return extract_file_words(path_to_file)
+
+def frequencies(word_list):
+    assert(type(word_list) is list), "I need a list! I quit!"
+    assert(word_list != []), "I need a non-empty list! I quit!"    
+    return get_frequencies(word_list)
+
+def sort(word_freqs):
+    assert(type(word_freqs) is dict), "I need a dictionary! I quit!"
+    assert(word_freqs != {}), "I need a non-empty dictionary! I quit!"
+    return sort_dict(word_freqs)
+
+
+if __name__ == '__main__':
+    try:    
+        word_freqs = sort(frequencies(extractWords( testfilepath )))
+        print_word_freqs(word_freqs)        
+    except Exception as e:
+        print(" Something wrong: {0}".format(e) )
--- a/类型申明/tf-24.py
+++ b/类型申明/tf-24.py
@ -0,0 +1,30 @@
+from collections import Counter
+from cppy.cp_util import *
+
+class AcceptTypes:
+    def __init__(self, *args):
+        self._args = args
+
+    def __call__(self, f):
+        def wrapped_f(*args, **kwargs):
+            for i, arg_type in enumerate(self._args):
+                if not isinstance(args[i], arg_type):
+                    raise TypeError(f"Argument {i} expected {arg_type}, got {type(args[i])}")
+            return f(*args, **kwargs)
+        return wrapped_f
+
+@AcceptTypes(str)
+def extract_words_(path_to_file):
+    return extract_file_words(path_to_file)    
+
+@AcceptTypes(list)
+def frequencies_(word_list):
+    return Counter(word_list)
+
+@AcceptTypes(Counter)
+def sort_(word_freq):
+    return word_freq.most_common()
+
+if __name__ == '__main__':
+    word_freqs = sort_(frequencies_(extract_words_( testfilepath )))
+    print_word_freqs(word_freqs)
--- a/计算设备/map-reduce/tf-31.py
+++ b/计算设备/map-reduce/tf-31.py
@ -0,0 +1,27 @@
+from functools import reduce
+from cppy.cp_util import *
+from collections import Counter
+
+def partition(data_str, nlines):
+    lines = data_str.split('\n')
+    for i in range(0, len(lines), nlines):
+        yield '\n'.join(lines[i:i+nlines])
+
+def split_words(data_str):
+    word_list =  extract_str_words(data_str)   
+    return Counter( word_list )
+
+def count_words(pairs_list_1, pairs_list_2):
+    return pairs_list_1 + pairs_list_2   
+
+
+if __name__ == '__main__':        
+    data = read_file(testfilepath)
+    
+    # 使用 map 方法和 split_words 函数处理每个分区
+    splits = map(split_words, partition(data, 200))  
+    splits_list = list(splits)
+    
+    # 使用 reduce 和 count_words 函数统计所有分区的词频
+    word_freqs = sort_dict(reduce(count_words, splits_list, Counter()) )    
+    print_word_freqs(word_freqs)
--- a/计算设备/map-reduce/tf-32.py
+++ b/计算设备/map-reduce/tf-32.py
@ -0,0 +1,37 @@
+from functools import reduce
+from cppy.cp_util import *
+
+#################################################
+# Functions for map reduce
+#################################################
+def partition(data_str, nlines):
+    lines = data_str.split('\n')
+    for i in range(0, len(lines), nlines):
+        yield '\n'.join(lines[i:i+nlines])
+
+def split_words(data_str):    
+    words = extract_str_words(data_str)    
+    return [ (w, 1) for w in words ]        
+
+def regroup(pairs_list):
+    mapping = {}
+    for pairs in pairs_list:
+        for p in pairs:
+            mapping[p[0]] = mapping.get(p[0], []) + [p]
+    return mapping
+
+def count_words(mapping):
+    def add(x, y): return x+y
+    return ( mapping[0], 
+             reduce(add, (pair[1] for pair in mapping[1]))
+           )
+
+def sort (word_freq):
+    return sorted(word_freq, key=operator.itemgetter(1), reverse=True)
+
+if __name__ == '__main__':        
+    data = read_file(testfilepath)
+    splits = map(split_words, partition(data, 200))        
+    splits_per_word = regroup(splits)
+    word_freqs = sort(map(count_words, splits_per_word.items()))    
+    print_word_freqs(word_freqs)    
--- a/计算设备/map-reduce/tf_92.py
+++ b/计算设备/map-reduce/tf_92.py
@ -0,0 +1,40 @@
+import re  
+import multiprocessing  
+from collections import Counter
+from cppy.cp_util import *  
+
+stopwords = get_stopwords()
+
+def process_chunk(chunk):  
+    # 切词并过滤停用词  
+    words = re.findall(r'\w+', chunk.lower())  
+    words = [ word for word in words if word not in stopwords and len(word) > 2]  
+    return Counter(words)  
+  
+def merge_counts(counts_list):  
+    # 合并多个Counter对象  
+    total_counts = Counter()  
+    for counts in counts_list:  
+        total_counts += counts  
+    return total_counts    
+  
+if __name__ == '__main__':  
+    # 读取文件内容  
+    content = read_file(testfilepath)    
+
+    # 分割文件内容为多个块，每个块由一个进程处理  
+    chunk_size = 1000  # 可以根据实际情况调整块大小  
+    chunks = [content[i:i+chunk_size] for i in range(0, len(content), chunk_size)]  
+  
+    # 使用多进程处理每个块  
+    pool = multiprocessing.Pool(processes=multiprocessing.cpu_count())  
+    counts_list = pool.map(process_chunk, chunks)  
+    pool.close()  
+    pool.join()  
+  
+    # 合并计数  
+    total_counts = merge_counts(counts_list)  
+  
+    # 输出最高频的n个词          
+    for word, count in total_counts.most_common(10):  
+        print(f"{word}-- {count}")
--- a/计算设备/共享内存数据空间/30A.py
+++ b/计算设备/共享内存数据空间/30A.py
@ -0,0 +1,50 @@
+import threading
+import queue
+from cppy.cp_util import *
+
+# 处理单词
+def process_words(word_space, freq_space, stopwords):
+    word_freqs = {}
+    while True:
+        try:
+            word = word_space.get(timeout=1)
+        except queue.Empty:
+            break
+        count_word(word, word_freqs, stopwords)
+    freq_space.put(word_freqs)
+
+# 创建并启动线程
+def start_threads(word_space, freq_space, stopwords):
+    workers = []
+    for i in range(5):
+        worker = threading.Thread(target=process_words, 
+                                  args=(word_space, freq_space, stopwords))
+        worker.start()
+        workers.append(worker)
+    return workers
+
+
+if __name__ == "__main__":    
+    stopwords = get_stopwords()
+    word_space = queue.Queue()
+    freq_space = queue.Queue()
+
+    # 将数据压入 word_space
+    for word in extract_file_words(testfilepath):
+        word_space.put(word)
+
+    # 创建并启动线程
+    workers = start_threads(word_space, freq_space, stopwords)
+
+    # 等待所有线程完成
+    for worker in workers:  worker.join()
+
+    # 合并处理结果
+    word_freqs = {}
+    while not freq_space.empty():
+        freqs = freq_space.get()
+        for (k, v) in freqs.items():
+            word_freqs[k] = word_freqs.get(k,0) + v            
+    
+    # 打印
+    print_word_freqs ( sort_dict (word_freqs) )
--- a/计算设备/共享内存数据空间/30B.py
+++ b/计算设备/共享内存数据空间/30B.py
@ -0,0 +1,37 @@
+import  threading, queue, operator
+from cppy.cp_util import *
+
+class WordFrequencyCounter:
+    def __init__(self, input_file):
+        self.word_space = queue.Queue()
+        self.freq_space = queue.Queue()        
+        for word in extract_file_words(input_file):
+            self.word_space.put(word)
+
+    def process_words(self):
+        word_freqs = {}
+        while not self.word_space.empty():
+            try:
+                word = self.word_space.get(timeout=1)
+                word_freqs[word] = word_freqs.get(word, 0) + 1
+            except queue.Empty:
+                break
+        self.freq_space.put(word_freqs)
+
+    def run(self):
+        workers = [threading.Thread(target=self.process_words) for _ in range(5)]
+        for worker in workers:  worker.start()
+        for worker in workers:  worker.join()
+
+        word_freqs = {}
+        while not self.freq_space.empty():
+            freqs = self.freq_space.get()
+            for word, count in freqs.items():
+                word_freqs[word] = word_freqs.get(word, 0) + count
+
+        print_word_freqs ( sort_dict (word_freqs) )
+        
+
+if __name__ == '__main__':
+    counter = WordFrequencyCounter( testfilepath )
+    counter.run()
--- a/计算设备/数据库/tf-26.py
+++ b/计算设备/数据库/tf-26.py
@ -0,0 +1,62 @@
+import sqlite3, os.path
+from cppy.cp_util import *
+
+
+# 数据库表结构
+TABLES = {
+    'documents': '''CREATE TABLE IF NOT EXISTS documents (
+                        id INTEGER PRIMARY KEY AUTOINCREMENT,
+                        name TEXT NOT NULL
+                    )''',
+    'words': '''CREATE TABLE IF NOT EXISTS words (
+                    doc_id INTEGER NOT NULL,
+                    value TEXT NOT NULL,
+                    FOREIGN KEY (doc_id) REFERENCES documents (id)
+                )''',
+    'characters': '''CREATE TABLE IF NOT EXISTS characters (
+                        word_id INTEGER NOT NULL,
+                        value TEXT NOT NULL,
+                        FOREIGN KEY (word_id) REFERENCES words (id)
+                )'''
+}
+
+
+# 创建数据库表
+def create_db_schema(connection):
+    for table, sql in TABLES.items():
+        c = connection.cursor()
+        c.execute(sql)
+        connection.commit()
+        c.close()
+
+
+def load_file_into_database(path_to_file, connection):        
+    words = extract_file_words( path_to_file )
+
+    c = connection.cursor()
+    c.execute("INSERT INTO documents (name) VALUES (?)", (path_to_file,))
+    doc_id = c.lastrowid
+
+    for w in words:
+        c.execute("INSERT INTO words (doc_id, value) VALUES (?, ?)", (doc_id, w))
+        word_id = c.lastrowid
+        for char in w:
+            c.execute("INSERT INTO characters (word_id, value) VALUES (?, ?)", (word_id, char))
+    connection.commit()
+    c.close()
+
+
+# 建数据库，处理数据入库
+db_path = 'tfdb'
+if not os.path.isfile(db_path):
+    with sqlite3.connect(db_path) as connection:
+        create_db_schema(connection)
+        load_file_into_database(testfilepath, connection)
+
+
+# 查询输出
+with sqlite3.connect(db_path) as connection:
+    c = connection.cursor()
+    c.execute("SELECT value, COUNT(*) as C FROM words GROUP BY value ORDER BY C DESC LIMIT 10")
+    for row in c.fetchall():
+        print(row[0], '-', row[1])
--- a/计算设备/数据库/tf.db
+++ b/计算设备/数据库/tf.db
--- a/计算设备/极限内存/tf-01.py
+++ b/计算设备/极限内存/tf-01.py
@ -0,0 +1,109 @@
+import sys, os, string
+from cppy.cp_util import *
+
+def touchopen(filename, *args, **kwargs):
+    try:
+        os.remove(filename)
+    except OSError:
+        pass
+    open(filename, "a",encoding='utf-8').close() # "touch" file
+    return open(filename, *args, **kwargs)
+
+# The constrained memory should have no more than 1024*n cells
+data = []
+n = 10
+
+f = open( stopwordfilepath,encoding='utf-8' )
+data = [f.read(1024*n).split(',')] # data[0] holds the stop words
+f.close()
+
+data.append([])    # data[1] is line (max 80 characters)
+data.append(None)  # data[2] is index of the start_char of word
+data.append(0)     # data[3] is index on characters, i = 0
+data.append(False) # data[4] is flag indicating if word was found
+data.append('')    # data[5] is the word
+data.append('')    # data[6] is word,NNNN
+data.append(0)     # data[7] is frequency
+
+# Open the secondary memory
+word_freqs = touchopen('word_freqs', 'rb+')
+# Open the input file
+f = open( testfilepath , 'r',encoding='utf-8')
+# Loop over input file's lines
+while True:
+    print('.',end='',flush = True)
+    data[1] = [f.readline()] 
+    if data[1] == ['']: # end of input file
+        break
+    if data[1][0][len(data[1][0])-1] != '\n': # If it does not end with \n
+        data[1][0] = data[1][0] + '\n' # Add \n
+    data[2] = None
+    data[3] = 0 
+    # Loop over characters in the line
+    for c in data[1][0]: # elimination of symbol c is exercise
+        if data[2] == None:
+            if c.isalnum():
+                # We found the start of a word
+                data[2] = data[3]
+        else:
+            if not c.isalnum():
+                # We found the end of a word. Process it
+                data[4] = False 
+                data[5] = data[1][0][data[2]:data[3]].lower()
+                # Ignore words with len < 2, and stop words
+                if len(data[5]) >= 2 and data[5] not in data[0]:
+                    # Let's see if it already exists
+                    while True:
+                        data[6] = str(word_freqs.readline().strip(), 'utf-8')
+                        if data[6] == '':
+                            break;
+                        data[7] = int(data[6].split(',')[1])
+                        # word, no white space
+                        data[6] = data[6].split(',')[0].strip() 
+                        if data[5] == data[6]:
+                            data[7] += 1
+                            data[4] = True
+                            break
+                    if not data[4]:
+                        word_freqs.seek(0, 1) # Needed in Windows
+                        word_freqs.write(bytes("%20s,%04d\n" % (data[5], 1), 'utf-8'))
+                    else:
+                        word_freqs.seek(-26, 1)
+                        word_freqs.write(bytes("%20s,%04d\n" % (data[5], data[7]), 'utf-8'))
+                    word_freqs.seek(0,0)
+                # Let's reset
+                data[2] = None
+        data[3] += 1
+# We're done with the input file
+f.close()
+word_freqs.flush()
+
+# PART 2
+# Now we need to find the 25 most frequently occurring words.
+# We don't need anything from the previous values in memory
+del data[:]
+
+# Let's use the first 25 entries for the top 25 words
+data = data + [[]]*(25 - len(data))
+data.append('') # data[25] is word,freq from file
+data.append(0)  # data[26] is freq
+
+# Loop over secondary memory file
+while True:
+    data[25] = str(word_freqs.readline().strip(), 'utf-8')
+    if data[25] == '': # EOF
+        break
+    data[26] = int(data[25].split(',')[1]) # Read it as integer
+    data[25] = data[25].split(',')[0].strip() # word
+    # Check if this word has more counts than the ones in memory
+    for i in range(25): # elimination of symbol i is exercise
+        if data[i] == [] or data[i][1] < data[26]:
+            data.insert(i, [data[25], data[26]]) 
+            del data[26] #  delete the last element
+            break
+            
+for tf in data[0:10]: 
+    if len(tf) == 2:
+        print(tf[0], '-', tf[1])
+
+word_freqs.close()
--- a/计算设备/极限内存/word_freqs
+++ b/计算设备/极限内存/word_freqs
--- a/计算设备/注册回调/tf-15.py
+++ b/计算设备/注册回调/tf-15.py
@ -0,0 +1,85 @@
+import operator, string
+from collections import defaultdict
+from cppy.cp_util import *
+
+#
+#  Framework
+#
+class WordFrequencyFramework:
+    def __init__(self):
+        self._load_event_handlers = []
+        self._dowork_event_handlers = []
+        self._end_event_handlers = []
+
+    def register_for_load_event(self, handler):
+        self._load_event_handlers.append(handler)
+
+    def register_for_dowork_event(self, handler):
+        self._dowork_event_handlers.append(handler)
+
+    def register_for_end_event(self, handler):
+        self._end_event_handlers.append(handler)
+    
+    def run(self, path_to_file):
+        for h in self._load_event_handlers:  h(path_to_file)
+        for h in self._dowork_event_handlers: h()
+        for h in self._end_event_handlers: h()
+
+#
+# 功能组件
+#
+class DataStorage:
+    """ Models the contents of the file """
+    _data = ''
+    _stop_word_filter = None
+    _word_event_handlers = []
+
+    def __init__(self, wfapp, stop_word_filter):
+        self._stop_word_filter = stop_word_filter
+        wfapp.register_for_load_event(self.__load)
+        wfapp.register_for_dowork_event(self.__produce_words)
+
+    def __load(self, path_to_file):        
+        self._data = re_split( read_file(path_to_file) )    
+
+    def __produce_words(self):
+        for w in self._data:            
+            if not self._stop_word_filter.is_stop_word(w):
+                for h in self._word_event_handlers:
+                    h(w)
+
+    def register_for_word_event(self, handler):
+        self._word_event_handlers.append(handler)
+
+class StopWordFilter:
+    """ Models the stop word filter """
+    _stop_words = []
+    def __init__(self, wfapp):
+        wfapp.register_for_load_event(self.__load)
+
+    def __load(self, ignore):
+        self._stop_words = get_stopwords()        
+
+    def is_stop_word(self, word):
+        return word in self._stop_words
+
+
+class WordFrequencyCounter:
+    def __init__(self, wfapp, data_storage):
+        self._word_freqs = defaultdict(int)
+        data_storage.register_for_word_event(self.__increment_count)
+        wfapp.register_for_end_event(self.__print_freqs)
+
+    def __increment_count(self, word):
+        self._word_freqs[word] += 1
+
+    def __print_freqs(self):
+        print_word_freqs ( sort_dict (self._word_freqs) )       
+
+
+if __name__ == '__main__':
+    wfapp = WordFrequencyFramework()
+    stop_word_filter = StopWordFilter(wfapp)
+    data_storage = DataStorage(wfapp, stop_word_filter)
+    word_freq_counter = WordFrequencyCounter(wfapp, data_storage)
+    wfapp.run(testfilepath)
--- a/计算设备/注册回调_订阅制/tf-16.py
+++ b/计算设备/注册回调_订阅制/tf-16.py
@ -0,0 +1,87 @@
+from cppy.cp_util import *
+
+#################################################
+# The event management
+#################################################
+class EventManager:
+    def __init__(self):
+        self._subscriptions = {}
+
+    def subscribe(self, event_type, handler):
+        self._subscriptions.setdefault(event_type, []).append(handler)
+
+    def publish(self, event):
+        event_type = event[0]        
+        for h in self._subscriptions.get(event_type, []):  
+            h(event)
+
+#################################################
+# The application entities
+#################################################
+class DataStorage:
+    """ Models the contents of the file """
+    def __init__(self, event_manager):
+        self._event_manager = event_manager
+        self._event_manager.subscribe('load', self.load)
+        self._event_manager.subscribe('start', self.produce_words)
+
+    def load(self, event):        
+        self._data = extract_file_words( event[1] )
+
+    def produce_words(self, event):        
+        for w in self._data:
+            self._event_manager.publish(('word', w))
+        self._event_manager.publish(('eof', None))
+
+class StopWordFilter:
+    """ Models the stop word filter """
+    def __init__(self, event_manager):
+        self._stop_words = []
+        self._event_manager = event_manager
+        self._event_manager.subscribe('load', self.load)
+        self._event_manager.subscribe('word', self.is_stop_word)
+
+    def load(self, event):        
+        self._stop_words = get_stopwords()
+
+    def is_stop_word(self, event):
+        word = event[1]
+        if word not in self._stop_words:
+            self._event_manager.publish(('valid_word', word))
+
+class WordFrequencyCounter:
+    """ Keeps the word frequency data """
+    def __init__(self, event_manager):
+        self._word_freqs = {}
+        self._event_manager = event_manager
+        self._event_manager.subscribe('valid_word', self.increment_count)
+        self._event_manager.subscribe('print', self.print_freqs)
+
+    def increment_count(self, event):
+        word = event[1]
+        self._word_freqs[word] = self._word_freqs.get(word, 0) + 1        
+
+    def print_freqs(self, event):
+        print_word_freqs ( sort_dict (self._word_freqs) )      
+        
+
+class WordFrequencyApplication:
+    def __init__(self, event_manager):
+        self._event_manager = event_manager
+        self._event_manager.subscribe('run', self.run)
+        self._event_manager.subscribe('eof', self.stop)
+
+    def run(self, event):
+        path_to_file = event[1]
+        self._event_manager.publish(('load', path_to_file))
+        self._event_manager.publish(('start', None))
+
+    def stop(self, event):
+        self._event_manager.publish(('print', None))
+
+
+if __name__ == "__main__":    
+    em = EventManager()
+    DataStorage(em), StopWordFilter(em), WordFrequencyCounter(em)
+    WordFrequencyApplication(em)
+    em.publish(('run', testfilepath ))
--- a/计算设备/消息驱动的对象/tf-12.py
+++ b/计算设备/消息驱动的对象/tf-12.py
@ -0,0 +1,104 @@
+import sys, re, operator, string
+from cppy.cp_util import *
+
+class DataStorageManager():
+    """ Models the contents of the file """
+    def __init__(self):
+        self._data = []
+
+    def dispatch(self, message):
+        if message[0] == 'init':
+            return self._init(message[1])
+        elif message[0] == 'words':
+            return self._words()
+        else:
+            raise Exception("Message not understood " + message[0])
+    # 使用内省的写法
+    '''
+    def dispatch(self, message):
+        method_name = '_' + message[0]
+        if hasattr(self, method_name):
+            method = getattr(self, method_name)
+            return method(*message[1:])
+        else:
+            raise ValueError(f"DataStorageManager doesn't understand message {message[0]}")
+    '''
+
+    def _init(self, path_to_file):
+        self._data = re.findall('\w+', read_file(path_to_file).lower())
+
+    def _words(self):
+        return self._data
+   
+
+class StopWordManager():
+    """ Models the stop word filter """
+    _stop_words = []
+
+    def dispatch(self, message):
+        if message[0] == 'init':
+            return self._init()
+        elif message[0] == 'is_stop_word':
+            return self._is_stop_word(message[1])
+        else:
+            raise Exception("Message not understood " + message[0])
+ 
+    def _init(self):        
+        self._stop_words = get_stopwords()  
+
+    def _is_stop_word(self, word):
+        return word in self._stop_words
+
+
+class WordFrequencyManager():
+    """ Keeps the word frequency data """
+    _word_freqs = {}
+
+    def dispatch(self, message):
+        if message[0] == 'increment_count':
+            return self._increment_count(message[1])
+        elif message[0] == 'sorted':
+            return self._sorted()
+        else:
+            raise Exception("Message not understood " + message[0])
+ 
+    def _increment_count(self, word):
+        if word in self._word_freqs:
+            self._word_freqs[word] += 1
+        else:
+            self._word_freqs[word] = 1
+
+    def _sorted(self):
+        return sorted(self._word_freqs.items(), key=operator.itemgetter(1), reverse=True)
+
+class WordFrequencyController():
+
+    def dispatch(self, message):
+        if message[0] == 'init':
+            return self._init(message[1])
+        elif message[0] == 'run':
+            return self._run()
+        else:
+            raise Exception("Message not understood " + message[0])
+ 
+    def _init(self, path_to_file):
+        self._storage_manager = DataStorageManager()
+        self._stop_word_manager = StopWordManager()
+        self._word_freq_manager = WordFrequencyManager()
+        self._storage_manager.dispatch(['init', path_to_file])
+        self._stop_word_manager.dispatch(['init'])
+
+    def _run(self):
+        for w in self._storage_manager.dispatch(['words']):
+            if not self._stop_word_manager.dispatch(['is_stop_word', w]):
+                self._word_freq_manager.dispatch(['increment_count', w])
+
+        word_freqs = self._word_freq_manager.dispatch(['sorted'])
+        for (w, c) in word_freqs[0:10]:
+            print(w, '-', c)
+
+
+if __name__ == '__main__':
+    wfcontroller = WordFrequencyController()
+    wfcontroller.dispatch(['init', testfilepath])
+    wfcontroller.dispatch(['run'])
--- a/计算设备/消息驱动的对象_线程独立/tf-29.py
+++ b/计算设备/消息驱动的对象_线程独立/tf-29.py
@ -0,0 +1,120 @@
+from threading import Thread
+from queue import Queue
+from cppy.cp_util import *
+
+class ActiveWFObject(Thread):
+    def __init__(self):
+        super().__init__()
+        self.queue = Queue()
+        self._stopMe = False
+        self.start()
+
+    def run(self):
+        while not self._stopMe:
+            message = self.queue.get()
+            self._dispatch(message)
+            if message[0] == 'die':
+                self._stopMe = True
+
+def send(receiver, message):
+    receiver.queue.put(message)
+
+class DataStorageManager(ActiveWFObject):
+    """ Models the contents of the file """
+    _data = ''
+
+    def _dispatch(self, message):
+        if message[0] == 'init':
+            self._init(message[1:])
+        elif message[0] == 'send_word_freqs':
+            self._process_words(message[1:])
+        else:
+            # forward
+            send(self._stop_word_manager, message)
+ 
+    def _init(self, message):
+        path_to_file = message[0]
+        self._stop_word_manager = message[1]    
+        self._data = extract_file_words(path_to_file)
+
+    def _process_words(self, message):
+        recipient = message[0]        
+        for w in self._data:
+            send(self._stop_word_manager, ['filter', w])
+        send(self._stop_word_manager, ['top10', recipient])
+
+class StopWordManager(ActiveWFObject):
+    """ Models the stop word filter """
+    _stop_words = []
+
+    def _dispatch(self, message):
+        if message[0] == 'init':
+            self._init(message[1:])
+        elif message[0] == 'filter':
+            return self._filter(message[1:])
+        else:
+            # forward
+            send(self._word_freqs_manager, message)
+ 
+    def _init(self, message):        
+        self._stop_words = get_stopwords()
+        self._word_freqs_manager = message[0]
+
+    def _filter(self, message):
+        word = message[0]
+        if word not in self._stop_words:
+            send(self._word_freqs_manager, ['word', word])
+
+class WordFrequencyManager(ActiveWFObject):
+    """ Keeps the word frequency data """
+    _word_freqs = {}
+
+    def _dispatch(self, message):
+        if message[0] == 'word':
+            self._increment_count(message[1:])
+        elif message[0] == 'top10':
+            self._top10(message[1:])
+ 
+    def _increment_count(self, message):
+        word, = message
+        self._word_freqs[word] = self._word_freqs.get(word, 0) + 1
+
+    def _top10(self, message):
+        recipient = message[0]
+        freqs_sorted = sort_dict ( self._word_freqs )
+        send(recipient, ['top10', freqs_sorted])
+
+class WordFrequencyController(ActiveWFObject):
+
+    def _dispatch(self, message):
+        if message[0] == 'run':
+            self._run(message[1:])
+        elif message[0] == 'top10':
+            self._display(message[1:])
+        else:
+            raise Exception("Message not understood " + message[0])
+ 
+    def _run(self, message):
+        self._storage_manager, = message
+        send(self._storage_manager, ['send_word_freqs', self])
+
+    def _display(self, message):
+        word_freqs, = message
+        print_word_freqs( word_freqs)        
+        send(self._storage_manager, ['die'])
+        self._stopMe = True
+
+
+if __name__ == '__main__':
+    word_freq_manager = WordFrequencyManager()
+    stop_word_manager = StopWordManager()
+    storage_manager = DataStorageManager()
+
+    send(stop_word_manager, ['init', word_freq_manager])
+    send(storage_manager, ['init', testfilepath, stop_word_manager])
+
+    wfcontroller = WordFrequencyController()
+    send(wfcontroller, ['run', storage_manager])
+
+    # Wait for the active objects to finish
+    [t.join() for t in [word_freq_manager, stop_word_manager, storage_manager, wfcontroller]]
--- a/语言特性/内省/tf_93.py
+++ b/语言特性/内省/tf_93.py
@ -0,0 +1,34 @@
+from cppy.cp_util import *
+
+
+class Calculator:
+    def frequencies(self,word_list):    
+        return get_frequencies(word_list)
+
+    def sort(self,word_freq):            
+        return sort_dict( word_freq)
+
+    def print_all(self,word_freqs):        
+        print_word_freqs(word_freqs[1:])        
+
+
+##########################################
+#  应用框架
+##########################################    
+def call_method(obj, method_name, *args):
+    # 使用内省的 getattr 函数动态获取 obj 对象的 method_name 方法
+    method = getattr(obj, method_name, None)
+    if method:
+        return method(*args)  # 动态调用方法
+    else:
+        return "Method not found."
+
+
+if __name__ == '__main__':   
+    # 流水线处理方法清单
+    method_names = ' frequencies sort print_all'
+    data = extract_file_words( testfilepath )
+    
+    calc = Calculator()
+    for method_name in method_names.split():
+        data = call_method( calc,method_name,data )
--- a/语言特性/反射/tf_94.py
+++ b/语言特性/反射/tf_94.py
@ -0,0 +1,37 @@
+import cppy.cp_util as util
+
+##########################################
+#  工具类
+##########################################
+class wordsTaskHandler:
+    def handle(self,path_to_file):
+        return util.extract_file_words(path_to_file)
+
+class frequenciesTaskHandler:
+    def handle(self,word_list):
+        return util.get_frequencies(word_list)
+
+class sortTaskHandler:
+    def handle(self,word_freq):
+        return util.sort_dict(word_freq)
+
+##########################################
+#  应用框架
+##########################################
+def handle_task(task_type,*args):    
+    handler_class_name = f"{task_type}TaskHandler" # 构建处理器类名
+    
+    # 使用globals()获取当前全局符号表
+    handler_class = globals().get(handler_class_name)    
+    if handler_class:        
+        handler = handler_class() # 实例化处理器类        
+        return handler.handle(*args) # 调用处理方法
+    else:
+        print(f"No handler found for task type: {task_type}")        
+
+
+if __name__ == '__main__':   
+    word_list = handle_task("words",util.testfilepath)
+    word_freq = handle_task("frequencies",word_list)
+    word_sort = handle_task("sort",word_freq)        
+    util.print_word_freqs(word_sort)
--- a/语言特性/反射/tf_95.py
+++ b/语言特性/反射/tf_95.py
@ -0,0 +1,18 @@
+import cppy.cp_util as util
+import operator
+
+# 工具函数
+def extract_words(path_to_file):    
+    return util.extract_file_words(path_to_file)
+
+def frequencies(word_list):
+    return util.get_frequencies(word_list)
+
+s = ' lambda word_freq: '+ \
+    ' sorted(word_freq.items(), key=operator.itemgetter(1), reverse=True) '
+exec( 'mysort = ' + s )  # s 可以是读文件，也可以网络发过来的代码
+
+if __name__ == '__main__':   
+    word_freq = frequencies(extract_words( util.testfilepath )) 
+    word_freq = locals()['mysort'](word_freq)
+    util.print_word_freqs(word_freq)
--- a/语言特性/尾调用_类方法/tf-10.py
+++ b/语言特性/尾调用_类方法/tf-10.py
@ -0,0 +1,27 @@
+from cppy.cp_util import *
+
+
+# 框架类
+class TFFlowcls:
+    def __init__(self, v):
+        self._value = v
+
+    def bind(self, func):
+        self._value = func(self._value)
+        return self
+
+    def over(self):
+        print(self._value)
+
+
+def top10_freqs(word_freqs):
+    top10 = "\n".join(f"{word} - {count}" for word, count in word_freqs[:10])
+    return top10
+
+if __name__ == "__main__":
+    TFFlowcls( testfilepath )\
+    .bind(extract_file_words)\
+    .bind(get_frequencies)\
+    .bind(sort_dict)\
+    .bind(top10_freqs)\
+    .over()
--- a/语言特性/尾调用_类方法/tf-25.py
+++ b/语言特性/尾调用_类方法/tf-25.py
@ -0,0 +1,64 @@
+from cppy.cp_util import *
+
+#
+# 框架类
+#
+class TFFlowAll:
+    def __init__(self, func):
+        self._funcs = [func]
+
+    def bind(self, func):
+        self._funcs.append(func)
+        return self
+    
+    def execute(self):  
+        def is_callable(obj):  
+            """Check if an object is callable."""  
+            return hasattr(obj, '__call__')  
+    
+        def call_if_possible(obj):  
+            """Call the object if it's callable, otherwise return it as is."""  
+            return obj() if is_callable(obj) else obj  
+    
+        # Initialize the value to a no-op lambda function  
+        value = lambda: None              
+        for func in self._funcs:              
+            value = call_if_possible(func(value))              
+        print(call_if_possible(value))
+
+#
+# 工具函数
+#
+def get_input(arg):
+    def _f():
+        return testfilepath
+    return _f
+
+def extractwords(path_to_file):
+    def _f():
+        return extract_file_words(path_to_file)
+    return _f
+
+def frequencies(word_list):
+    def _f():
+        return get_frequencies(word_list)
+    return _f
+
+def sort(word_freq):
+    def _f():
+        return sort_dict(word_freq)
+    return _f
+
+def top10_freqs(word_freqs):
+    def _f():
+        return '\n'.join(f"{word} - {freq}" for word, freq in word_freqs[:10])
+    return _f
+
+
+if __name__ == "__main__":
+    TFFlowAll(get_input)\
+    .bind(extractwords)\
+    .bind(frequencies)\
+    .bind(sort)\
+    .bind(top10_freqs)\
+    .execute()
--- a/语言特性/异步/tf_91.py
+++ b/语言特性/异步/tf_91.py
@ -0,0 +1,49 @@
+import threading
+from collections import Counter
+from cppy.cp_util import *
+
+"""
+ 把切分数据片段分给多线程改为分配多个文件给多个线程（有IO操作），就能看到效果了
+"""
+
+stop_words = get_stopwords()  
+
+# 定义一个函数来计算每个线程的词频
+def count_words(start, end, text, result_index, results):    
+    words = re_split( text[start:end] )
+    words = [w for w in words if not w in stop_words]    
+    result = Counter(words)
+    results[result_index] = result
+
+if __name__ == '__main__':
+    # 读取文件内容
+    text = read_file(testfilepath)    
+
+    # 确定线程数量
+    num_threads = 4
+    text_length = len(text)
+    chunk_size = text_length // num_threads
+
+    # 存储每个线程的结果
+    results = [None] * num_threads
+    threads = []
+
+    # 创建并启动线程
+    for i in range(num_threads):
+        start = i * chunk_size
+        # 确保最后一个线程能够读取文件的末尾
+        end = text_length if i == num_threads - 1 else (i + 1) * chunk_size
+        t = threading.Thread(target=count_words, args=(start, end, text, i, results))
+        threads.append(t)
+        t.start()
+
+    # 等待所有线程完成
+    for t in threads: t.join()
+
+    # 合并结果
+    total_count = Counter()
+    for result in results:  total_count += result
+
+    # 打印词频最高的10个单词
+    for w,c in total_count.most_common(10):
+        print(w, '--',c)
--- a/语言特性/异步/数据流/tf-28.py
+++ b/语言特性/异步/数据流/tf-28.py
@ -0,0 +1,45 @@
+from cppy.cp_util import *
+
+###########################################
+# 生成器
+###########################################
+def characters(filename):  # 弹出一行
+    for line in open(filename,encoding='utf-8'):
+        for c in line:
+            yield c
+
+
+def all_words(filename):  # 弹出一个词
+    start_char = True
+    for c in characters(filename):
+        if start_char == True:
+            word = ""
+            if c.isalnum(): # start of a word                
+                word = c.lower()
+                start_char = False
+            else:
+                pass
+        else:
+            if c.isalnum():
+                word += c.lower() # end of word, emit it
+            else:                
+                start_char = True
+                yield word
+
+
+def non_stop_words(filename, stopwords):
+    for w in all_words(filename):
+        if not w in stopwords:
+            yield w    # 弹出一个审核过的词
+
+
+if __name__ == "__main__":
+    stopwords = get_stopwords()
+
+    freqs = {}
+    for word in non_stop_words(testfilepath,stopwords):
+        freqs[word] = freqs.get(word, 0) + 1        
+
+    data  = sort_dict(freqs)
+    print_word_freqs(data)
+    
--- a/语言特性/装饰/tf-19.py
+++ b/语言特性/装饰/tf-19.py
@ -0,0 +1,33 @@
+import time
+import cppy.cp_util as util
+
+
+# 工具函数
+def extract_words(path_to_file):    
+    return util.extract_file_words(path_to_file)
+
+def frequencies(word_list):
+    return util.get_frequencies(word_list)
+
+def sort(word_freq):
+    return util.sort_dict(word_freq)
+
+# 闭包
+def profile(f):
+    def profilewrapper(*arg, **kw):
+        start_time = time.time()
+        ret_value = f(*arg, **kw)
+        elapsed = time.time() - start_time
+        print(f"{f.__name__} took {elapsed} s")
+        return ret_value
+    return profilewrapper
+
+# 装饰
+tracked_functions = [extract_words, frequencies, sort]
+for func in tracked_functions:
+    globals()[func.__name__] = profile(func) # 自省
+
+
+if __name__ == "__main__":
+    word_freqs = sort( frequencies(extract_words( util.testfilepath )) )
+    util.print_word_freqs(word_freqs)
--- a/语言特性/递归/test.txt
+++ b/语言特性/递归/test.txt
@ -0,0 +1,2 @@
+" my Some sure acquaintance or other, my dear, sure,other  I suppose; I am sure I do not
+know. sure "
--- a/语言特性/递归/tf-08.py
+++ b/语言特性/递归/tf-08.py
@ -0,0 +1,27 @@
+import sys
+from cppy.cp_util import *
+
+script_dir = os.path.dirname(os.path.abspath(__file__))  
+testfile = os.path.join(script_dir, 'test.txt')
+stop_words = get_stopwords()
+
+# 如果崩溃，把 5000 改下
+RECURSION_LIMIT = 5000
+sys.setrecursionlimit( RECURSION_LIMIT )
+
+def count( i,chunks, stopwords, wordfreqs):        
+    if i < 0 : return       
+    for word in  chunks[i]:                     
+        if word not in stopwords:  
+            wordfreqs[word] = wordfreqs.get(word, 0) + 1    
+    count( i-1, chunks,stopwords, wordfreqs )
+
+word_list = re_split( open(testfile,encoding='utf-8').read() )
+filesize = len( word_list )    
+chunk_size = ( filesize // RECURSION_LIMIT ) + 1
+chunks = [  word_list[ x*chunk_size:(x+1)*RECURSION_LIMIT ] 
+                for x in range(chunk_size)  ]
+word_freqs = {}
+count( chunk_size -1 ,chunks, stop_words, word_freqs )
+
+print_word_freqs(sort_dict(word_freqs))