debug

2 years ago · 950cb41e08
parent 2518a5cd85
commit 950cb41e08
6 changed files with 171 additions and 52 deletions
--- a/语言特性/尾调用_类方法/t26c。py
+++ b/语言特性/尾调用_类方法/t26c。py
@ -0,0 +1,65 @@
 from cppy.cp_util import *
 from collections import Counter  
 from heapq import nlargest  
 import re  
 class Pipeline:  
    def __init__(self):  
        pass  
    def __or__(self, other):  
        class PipelineComposition(Pipeline):  
            def __init__(self, first, second):  
                self.first = first  
                self.second = second  
            def process(self, data):  
                return self.second.process(self.first.process(data))  
        return PipelineComposition(self, other)  
    def process(self, data):  
        raise NotImplementedError  
 class FileReader(Pipeline):  
    def __init__(self, filename):  
        super().__init__()  
        self.filename = filename  
    def process(self):  
        with open(self.filename, 'r', encoding='utf-8') as file:  
            content = file.read()  
        return content  
 class WordFrequencyCounter(Pipeline):  
    def process(self, text):  
        words = re.findall(r'\w+', text.lower())  
        word_freq = Counter(words)  
        return word_freq  
 class TopNFilter(Pipeline):  
    def __init__(self, n):  
        super().__init__()  
        self.n = n  
    def process(self, word_freq):  
        return nlargest(self.n, word_freq.items(), key=lambda item: item[1])  
 # 假设有一个文本文件"text.txt"，其内容是需要分析的文本  
 filename = testfilepath
 n = 5  # 求取最高5个词频  
 # 创建管道  
 pipeline = FileReader(filename) | WordFrequencyCounter() | TopNFilter(n)  
 # 执行管道  
 top_n_words = pipeline.process()  
 # 打印结果  
 for word, freq in top_n_words:  
    print(f"{word}: {freq}")
--- a/语言特性/尾调用_类方法/tf-10.py
+++ b/语言特性/尾调用_类方法/tf-10.py
@ -14,7 +14,7 @@ class TFFlowcls:
        print(self._value)
-def top10_freqs(word_freqs):
+def top_freqs(word_freqs):
    top10 = "\n".join(f"{word} - {count}" for word, count in word_freqs[:10])
    return top10
@ -23,5 +23,5 @@ if __name__ == "__main__":
    .bind(extract_file_words)\
    .bind(get_frequencies)\
    .bind(sort_dict)\
-    .bind(top10_freqs)\
+    .bind(top_freqs)\
    .over()
--- a/语言特性/尾调用_类方法/tf-25.py
+++ b/语言特性/尾调用_类方法/tf-25.py
@ -3,7 +3,7 @@ from cppy.cp_util import *
 #
 # 框架类
 #
-class TFFlowAll:
+class TFFlowcls:
    def __init__(self, func):
        self._funcs = [func]
@ -12,19 +12,14 @@ class TFFlowAll:
        return self
    def execute(self):     
        def is_callable(obj):  
            """Check if an object is callable."""  
            return hasattr(obj, '__call__')  
        def call_if_possible(obj):  
            """Call the object if it's callable, otherwise return it as is."""  
-            return obj() if is_callable(obj) else obj  
+            return obj() if hasattr(obj, '__call__')  else obj  
        # Initialize the value to a no-op lambda function  
        value = lambda: None              
        for func in self._funcs:              
            value = call_if_possible(func(value))                      
        print(call_if_possible(value))
 #
 # 工具函数
@ -51,12 +46,12 @@ def sort(word_freq):
 def top10_freqs(word_freqs):
    def _f():
-        return '\n'.join(f"{word} - {freq}" for word, freq in word_freqs[:10])
+        return print_word_freqs( word_freqs )
    return _f
 if __name__ == "__main__":
-    TFFlowAll(get_input)\
+    TFFlowcls(get_input)\
    .bind(extractwords)\
    .bind(frequencies)\
    .bind(sort)\
--- a/语言特性/尾调用_类方法/tf-26.py
+++ b/语言特性/尾调用_类方法/tf-26.py
@ -0,0 +1,35 @@
 from cppy.cp_util import *
 from collections import Counter
 class Pipe:
    def __init__(self, func, *args, kwargs=None):
        self.func = func
        # self.args, self.kwargs= None ,None 
        if args : self.args = args
        if kwargs: self.kwargs = kwargs
        # print( self.args, self.kwargs)
    def __or__(self, other):
        return other(self._value)        
    def __call__(self, data):
        self._value = self.func(data, *self.args, self.kwargs)
 def read_file(filename):
    with open(filename, 'r') as f:
        return f.read()
 def split_words(text):
    return re.findall(r'\b\w+\b', text.lower())
 def count_words(words):
    return Counter(words)
 def top_n_words(word_counts, n):
    return word_counts.most_common(n)
 # 使用管道
 pipe = Pipe(extract_file_words) | Pipe(get_frequencies) | Pipe(sort_dict) | Pipe(print_word_freqs, 5)
 result = pipe(testfilepath)
 print(result)
--- a/语言特性/尾调用_类方法/tf-26B.py
+++ b/语言特性/尾调用_类方法/tf-26B.py
@ -0,0 +1,49 @@
 import re
 from collections import Counter
 from functools import reduce
 from cppy.cp_util import *
 class Pipeline:
    def __init__(self, function):
        self.function = function
    def __or__(self, other):
        if isinstance(other, Pipeline):
            return Pipeline(lambda x: self.function(x) or other.function(x))
        else:
            raise TypeError("The argument must be an instance of Pipeline")
    def process(self, data):
        return self.function(data)
 # 定义处理函数
 def read_file(path):
    with open(path, 'r', encoding='utf-8') as file:
        return file.read()
 def clean_text(text):
    return re.sub(r'[^\w\s]', '', text).lower()
 def tokenize(text):
    return re.findall(r'\b\w+\b', text)
 def remove_stop_words(tokens, stop_words):
    return [token for token in tokens if token not in stop_words]
 def count_frequencies(tokens):
    return Counter(tokens)
 def get_top_n_frequencies(counter, n):
    return counter.most_common(n)
 # 定义停用词列表
 stop_words = set(['the', 'and', 'a', 'to', 'of', 'in', 'for', 'on', 'is', 'it', 'with', 'that', 'as', 'by', 'this', 'at', 'be', 'which', 'from', 'or', 'are', 'an', 'but', 'not', 'you', 'have', 'your', 'can', 'will', 'all', 'any', 'if', 'their', 'would', 'what', 'there', 'when', 'which', 'who', 'whom', 'whose', 'where', 'why'])
 # 创建管道
 pipeline = (Pipeline(read_file) | clean_text | tokenize
             | remove_stop_words | count_frequencies
             | get_top_n_frequencies(n=10))
 # 执行管道并打印结果
 top_n_word_frequencies = pipeline.process( testfilepath )
 print(top_n_word_frequencies)
--- a/语言特性/异步/数据流/tf-28.py
+++ b/语言特性/异步/数据流/tf-28.py
@ -1,45 +1,20 @@
 from cppy.cp_util import *
-###########################################
+#
 # 生成器
-###########################################
+#
-def characters(filename):  # 弹出一行
+def non_stop_words(testfilepath): 
    for line in open(filename,encoding='utf-8'):
        for c in line:
            yield c
 def all_words(filename):  # 弹出一个词
    start_char = True
    for c in characters(filename):
        if start_char == True:
            word = ""
            if c.isalnum(): # start of a word                
                word = c.lower()
                start_char = False
            else:
                pass
        else:
            if c.isalnum():
                word += c.lower() # end of word, emit it
            else:                
                start_char = True
                yield word
 def non_stop_words(filename, stopwords):
    for w in all_words(filename):
        if not w in stopwords:
            yield w    # 弹出一个审核过的词
 if __name__ == "__main__":
    stopwords = get_stopwords()
    data_str = read_file(testfilepath)
    wordlist = re_split( data_str )
    for word in wordlist:
        if word not in stopwords: 
            yield word   # 弹出一个非停用词
 freqs = {}
-    for word in non_stop_words(testfilepath,stopwords):
+for word in non_stop_words(testfilepath):
    freqs[word] = freqs.get(word, 0) + 1        
 data  = sort_dict(freqs)
 print_word_freqs(data)