debug

2 years ago · 950cb41e08
parent 2518a5cd85
commit 950cb41e08
6 changed files with 171 additions and 52 deletions
--- a/语言特性/尾调用_类方法/t26c。py
+++ b/语言特性/尾调用_类方法/t26c。py
@ -0,0 +1,65 @@
+from cppy.cp_util import *
+from collections import Counter  
+from heapq import nlargest  
+import re  
+  
+  
+class Pipeline:  
+    def __init__(self):  
+        pass  
+  
+    def __or__(self, other):  
+        class PipelineComposition(Pipeline):  
+            def __init__(self, first, second):  
+                self.first = first  
+                self.second = second  
+  
+            def process(self, data):  
+                return self.second.process(self.first.process(data))  
+  
+        return PipelineComposition(self, other)  
+  
+    def process(self, data):  
+        raise NotImplementedError  
+  
+  
+class FileReader(Pipeline):  
+    def __init__(self, filename):  
+        super().__init__()  
+        self.filename = filename  
+  
+    def process(self):  
+        with open(self.filename, 'r', encoding='utf-8') as file:  
+            content = file.read()  
+        return content  
+  
+  
+class WordFrequencyCounter(Pipeline):  
+    def process(self, text):  
+        words = re.findall(r'\w+', text.lower())  
+        word_freq = Counter(words)  
+        return word_freq  
+  
+  
+class TopNFilter(Pipeline):  
+    def __init__(self, n):  
+        super().__init__()  
+        self.n = n  
+  
+    def process(self, word_freq):  
+        return nlargest(self.n, word_freq.items(), key=lambda item: item[1])  
+  
+  
+# 假设有一个文本文件"text.txt"，其内容是需要分析的文本  
+filename = testfilepath
+n = 5  # 求取最高5个词频  
+  
+# 创建管道  
+pipeline = FileReader(filename) | WordFrequencyCounter() | TopNFilter(n)  
+  
+# 执行管道  
+top_n_words = pipeline.process()  
+  
+# 打印结果  
+for word, freq in top_n_words:  
+    print(f"{word}: {freq}")
--- a/语言特性/尾调用_类方法/tf-10.py
+++ b/语言特性/尾调用_类方法/tf-10.py
@ -14,7 +14,7 @@ class TFFlowcls:
        print(self._value)


-def top10_freqs(word_freqs):
+def top_freqs(word_freqs):
    top10 = "\n".join(f"{word} - {count}" for word, count in word_freqs[:10])
    return top10

@ -23,5 +23,5 @@ if __name__ == "__main__":
    .bind(extract_file_words)\
    .bind(get_frequencies)\
    .bind(sort_dict)\
-    .bind(top10_freqs)\
+    .bind(top_freqs)\
    .over()
--- a/语言特性/尾调用_类方法/tf-25.py
+++ b/语言特性/尾调用_类方法/tf-25.py
@ -3,7 +3,7 @@ from cppy.cp_util import *
 #
 # 框架类
 #
-class TFFlowAll:
+class TFFlowcls:
    def __init__(self, func):
        self._funcs = [func]

@ -12,19 +12,14 @@ class TFFlowAll:
        return self
    
    def execute(self):     
-        def is_callable(obj):  
-            """Check if an object is callable."""  
-            return hasattr(obj, '__call__')  
-    
        def call_if_possible(obj):  
            """Call the object if it's callable, otherwise return it as is."""  
-            return obj() if is_callable(obj) else obj  
+            return obj() if hasattr(obj, '__call__')  else obj  
    
        # Initialize the value to a no-op lambda function  
        value = lambda: None              
        for func in self._funcs:              
            value = call_if_possible(func(value))                      
-        print(call_if_possible(value))

 #
 # 工具函数
@ -51,12 +46,12 @@ def sort(word_freq):

 def top10_freqs(word_freqs):
    def _f():
-        return '\n'.join(f"{word} - {freq}" for word, freq in word_freqs[:10])
+        return print_word_freqs( word_freqs )
    return _f


 if __name__ == "__main__":
-    TFFlowAll(get_input)\
+    TFFlowcls(get_input)\
    .bind(extractwords)\
    .bind(frequencies)\
    .bind(sort)\
--- a/语言特性/尾调用_类方法/tf-26.py
+++ b/语言特性/尾调用_类方法/tf-26.py
@ -0,0 +1,35 @@
+from cppy.cp_util import *
+from collections import Counter
+
+class Pipe:
+    def __init__(self, func, *args, kwargs=None):
+        self.func = func
+        # self.args, self.kwargs= None ,None 
+        if args : self.args = args
+        if kwargs: self.kwargs = kwargs
+        # print( self.args, self.kwargs)
+
+    def __or__(self, other):
+        return other(self._value)        
+
+    def __call__(self, data):
+        self._value = self.func(data, *self.args, self.kwargs)
+
+def read_file(filename):
+    with open(filename, 'r') as f:
+        return f.read()
+
+def split_words(text):
+    return re.findall(r'\b\w+\b', text.lower())
+
+def count_words(words):
+    return Counter(words)
+
+def top_n_words(word_counts, n):
+    return word_counts.most_common(n)
+
+
+# 使用管道
+pipe = Pipe(extract_file_words) | Pipe(get_frequencies) | Pipe(sort_dict) | Pipe(print_word_freqs, 5)
+result = pipe(testfilepath)
+print(result)
--- a/语言特性/尾调用_类方法/tf-26B.py
+++ b/语言特性/尾调用_类方法/tf-26B.py
@ -0,0 +1,49 @@
+import re
+from collections import Counter
+from functools import reduce
+from cppy.cp_util import *
+
+class Pipeline:
+    def __init__(self, function):
+        self.function = function
+
+    def __or__(self, other):
+        if isinstance(other, Pipeline):
+            return Pipeline(lambda x: self.function(x) or other.function(x))
+        else:
+            raise TypeError("The argument must be an instance of Pipeline")
+
+    def process(self, data):
+        return self.function(data)
+
+# 定义处理函数
+def read_file(path):
+    with open(path, 'r', encoding='utf-8') as file:
+        return file.read()
+
+def clean_text(text):
+    return re.sub(r'[^\w\s]', '', text).lower()
+
+def tokenize(text):
+    return re.findall(r'\b\w+\b', text)
+
+def remove_stop_words(tokens, stop_words):
+    return [token for token in tokens if token not in stop_words]
+
+def count_frequencies(tokens):
+    return Counter(tokens)
+
+def get_top_n_frequencies(counter, n):
+    return counter.most_common(n)
+
+# 定义停用词列表
+stop_words = set(['the', 'and', 'a', 'to', 'of', 'in', 'for', 'on', 'is', 'it', 'with', 'that', 'as', 'by', 'this', 'at', 'be', 'which', 'from', 'or', 'are', 'an', 'but', 'not', 'you', 'have', 'your', 'can', 'will', 'all', 'any', 'if', 'their', 'would', 'what', 'there', 'when', 'which', 'who', 'whom', 'whose', 'where', 'why'])
+
+# 创建管道
+pipeline = (Pipeline(read_file) | clean_text | tokenize
+             | remove_stop_words | count_frequencies
+             | get_top_n_frequencies(n=10))
+
+# 执行管道并打印结果
+top_n_word_frequencies = pipeline.process( testfilepath )
+print(top_n_word_frequencies)
--- a/语言特性/异步/数据流/tf-28.py
+++ b/语言特性/异步/数据流/tf-28.py
@ -1,45 +1,20 @@
 from cppy.cp_util import *

-###########################################
+#
 # 生成器
-###########################################
-def characters(filename):  # 弹出一行
-    for line in open(filename,encoding='utf-8'):
-        for c in line:
-            yield c
-
-
-def all_words(filename):  # 弹出一个词
-    start_char = True
-    for c in characters(filename):
-        if start_char == True:
-            word = ""
-            if c.isalnum(): # start of a word                
-                word = c.lower()
-                start_char = False
-            else:
-                pass
-        else:
-            if c.isalnum():
-                word += c.lower() # end of word, emit it
-            else:                
-                start_char = True
-                yield word
-
-
-def non_stop_words(filename, stopwords):
-    for w in all_words(filename):
-        if not w in stopwords:
-            yield w    # 弹出一个审核过的词
-
-
-if __name__ == "__main__":
+#
+def non_stop_words(testfilepath): 
    stopwords = get_stopwords()
+    data_str = read_file(testfilepath)
+    wordlist = re_split( data_str )
+    for word in wordlist:
+        if word not in stopwords: 
+            yield word   # 弹出一个非停用词
+

 freqs = {}
-    for word in non_stop_words(testfilepath,stopwords):
+for word in non_stop_words(testfilepath):
    freqs[word] = freqs.get(word, 0) + 1        

 data  = sort_dict(freqs)
 print_word_freqs(data)
-