From 950cb41e08d44d553ab2187a8fc6b9df6efca1c2 Mon Sep 17 00:00:00 2001 From: zj3D Date: Sun, 17 Mar 2024 12:47:25 +0800 Subject: [PATCH] debug --- 语言特性/尾调用_类方法/t26c。py | 65 ++++++++++++++++++++++ 语言特性/尾调用_类方法/tf-10.py | 4 +- 语言特性/尾调用_类方法/tf-25.py | 17 ++---- 语言特性/尾调用_类方法/tf-26.py | 35 ++++++++++++ 语言特性/尾调用_类方法/tf-26B.py | 49 ++++++++++++++++ 语言特性/异步/数据流/tf-28.py | 53 +++++------------- 6 files changed, 171 insertions(+), 52 deletions(-) create mode 100644 语言特性/尾调用_类方法/t26c。py create mode 100644 语言特性/尾调用_类方法/tf-26.py create mode 100644 语言特性/尾调用_类方法/tf-26B.py diff --git a/语言特性/尾调用_类方法/t26c。py b/语言特性/尾调用_类方法/t26c。py new file mode 100644 index 0000000..74ae750 --- /dev/null +++ b/语言特性/尾调用_类方法/t26c。py @@ -0,0 +1,65 @@ +from cppy.cp_util import * +from collections import Counter +from heapq import nlargest +import re + + +class Pipeline: + def __init__(self): + pass + + def __or__(self, other): + class PipelineComposition(Pipeline): + def __init__(self, first, second): + self.first = first + self.second = second + + def process(self, data): + return self.second.process(self.first.process(data)) + + return PipelineComposition(self, other) + + def process(self, data): + raise NotImplementedError + + +class FileReader(Pipeline): + def __init__(self, filename): + super().__init__() + self.filename = filename + + def process(self): + with open(self.filename, 'r', encoding='utf-8') as file: + content = file.read() + return content + + +class WordFrequencyCounter(Pipeline): + def process(self, text): + words = re.findall(r'\w+', text.lower()) + word_freq = Counter(words) + return word_freq + + +class TopNFilter(Pipeline): + def __init__(self, n): + super().__init__() + self.n = n + + def process(self, word_freq): + return nlargest(self.n, word_freq.items(), key=lambda item: item[1]) + + +# 假设有一个文本文件"text.txt",其内容是需要分析的文本 +filename = testfilepath +n = 5 # 求取最高5个词频 + +# 创建管道 +pipeline = FileReader(filename) | WordFrequencyCounter() | TopNFilter(n) + +# 执行管道 +top_n_words = pipeline.process() + +# 打印结果 +for word, freq in top_n_words: + print(f"{word}: {freq}") \ No newline at end of file diff --git a/语言特性/尾调用_类方法/tf-10.py b/语言特性/尾调用_类方法/tf-10.py index 62d20b8..66fc8b5 100644 --- a/语言特性/尾调用_类方法/tf-10.py +++ b/语言特性/尾调用_类方法/tf-10.py @@ -14,7 +14,7 @@ class TFFlowcls: print(self._value) -def top10_freqs(word_freqs): +def top_freqs(word_freqs): top10 = "\n".join(f"{word} - {count}" for word, count in word_freqs[:10]) return top10 @@ -23,5 +23,5 @@ if __name__ == "__main__": .bind(extract_file_words)\ .bind(get_frequencies)\ .bind(sort_dict)\ - .bind(top10_freqs)\ + .bind(top_freqs)\ .over() \ No newline at end of file diff --git a/语言特性/尾调用_类方法/tf-25.py b/语言特性/尾调用_类方法/tf-25.py index 1c1f320..11fac4a 100644 --- a/语言特性/尾调用_类方法/tf-25.py +++ b/语言特性/尾调用_类方法/tf-25.py @@ -3,7 +3,7 @@ from cppy.cp_util import * # # 框架类 # -class TFFlowAll: +class TFFlowcls: def __init__(self, func): self._funcs = [func] @@ -11,20 +11,15 @@ class TFFlowAll: self._funcs.append(func) return self - def execute(self): - def is_callable(obj): - """Check if an object is callable.""" - return hasattr(obj, '__call__') - + def execute(self): def call_if_possible(obj): """Call the object if it's callable, otherwise return it as is.""" - return obj() if is_callable(obj) else obj + return obj() if hasattr(obj, '__call__') else obj # Initialize the value to a no-op lambda function value = lambda: None for func in self._funcs: - value = call_if_possible(func(value)) - print(call_if_possible(value)) + value = call_if_possible(func(value)) # # 工具函数 @@ -51,12 +46,12 @@ def sort(word_freq): def top10_freqs(word_freqs): def _f(): - return '\n'.join(f"{word} - {freq}" for word, freq in word_freqs[:10]) + return print_word_freqs( word_freqs ) return _f if __name__ == "__main__": - TFFlowAll(get_input)\ + TFFlowcls(get_input)\ .bind(extractwords)\ .bind(frequencies)\ .bind(sort)\ diff --git a/语言特性/尾调用_类方法/tf-26.py b/语言特性/尾调用_类方法/tf-26.py new file mode 100644 index 0000000..2e67d5f --- /dev/null +++ b/语言特性/尾调用_类方法/tf-26.py @@ -0,0 +1,35 @@ +from cppy.cp_util import * +from collections import Counter + +class Pipe: + def __init__(self, func, *args, kwargs=None): + self.func = func + # self.args, self.kwargs= None ,None + if args : self.args = args + if kwargs: self.kwargs = kwargs + # print( self.args, self.kwargs) + + def __or__(self, other): + return other(self._value) + + def __call__(self, data): + self._value = self.func(data, *self.args, self.kwargs) + +def read_file(filename): + with open(filename, 'r') as f: + return f.read() + +def split_words(text): + return re.findall(r'\b\w+\b', text.lower()) + +def count_words(words): + return Counter(words) + +def top_n_words(word_counts, n): + return word_counts.most_common(n) + + +# 使用管道 +pipe = Pipe(extract_file_words) | Pipe(get_frequencies) | Pipe(sort_dict) | Pipe(print_word_freqs, 5) +result = pipe(testfilepath) +print(result) diff --git a/语言特性/尾调用_类方法/tf-26B.py b/语言特性/尾调用_类方法/tf-26B.py new file mode 100644 index 0000000..20c7b3b --- /dev/null +++ b/语言特性/尾调用_类方法/tf-26B.py @@ -0,0 +1,49 @@ +import re +from collections import Counter +from functools import reduce +from cppy.cp_util import * + +class Pipeline: + def __init__(self, function): + self.function = function + + def __or__(self, other): + if isinstance(other, Pipeline): + return Pipeline(lambda x: self.function(x) or other.function(x)) + else: + raise TypeError("The argument must be an instance of Pipeline") + + def process(self, data): + return self.function(data) + +# 定义处理函数 +def read_file(path): + with open(path, 'r', encoding='utf-8') as file: + return file.read() + +def clean_text(text): + return re.sub(r'[^\w\s]', '', text).lower() + +def tokenize(text): + return re.findall(r'\b\w+\b', text) + +def remove_stop_words(tokens, stop_words): + return [token for token in tokens if token not in stop_words] + +def count_frequencies(tokens): + return Counter(tokens) + +def get_top_n_frequencies(counter, n): + return counter.most_common(n) + +# 定义停用词列表 +stop_words = set(['the', 'and', 'a', 'to', 'of', 'in', 'for', 'on', 'is', 'it', 'with', 'that', 'as', 'by', 'this', 'at', 'be', 'which', 'from', 'or', 'are', 'an', 'but', 'not', 'you', 'have', 'your', 'can', 'will', 'all', 'any', 'if', 'their', 'would', 'what', 'there', 'when', 'which', 'who', 'whom', 'whose', 'where', 'why']) + +# 创建管道 +pipeline = (Pipeline(read_file) | clean_text | tokenize + | remove_stop_words | count_frequencies + | get_top_n_frequencies(n=10)) + +# 执行管道并打印结果 +top_n_word_frequencies = pipeline.process( testfilepath ) +print(top_n_word_frequencies) \ No newline at end of file diff --git a/语言特性/异步/数据流/tf-28.py b/语言特性/异步/数据流/tf-28.py index 81ca717..0039fd6 100644 --- a/语言特性/异步/数据流/tf-28.py +++ b/语言特性/异步/数据流/tf-28.py @@ -1,45 +1,20 @@ from cppy.cp_util import * -########################################### +# # 生成器 -########################################### -def characters(filename): # 弹出一行 - for line in open(filename,encoding='utf-8'): - for c in line: - yield c - - -def all_words(filename): # 弹出一个词 - start_char = True - for c in characters(filename): - if start_char == True: - word = "" - if c.isalnum(): # start of a word - word = c.lower() - start_char = False - else: - pass - else: - if c.isalnum(): - word += c.lower() # end of word, emit it - else: - start_char = True - yield word - - -def non_stop_words(filename, stopwords): - for w in all_words(filename): - if not w in stopwords: - yield w # 弹出一个审核过的词 - - -if __name__ == "__main__": +# +def non_stop_words(testfilepath): stopwords = get_stopwords() + data_str = read_file(testfilepath) + wordlist = re_split( data_str ) + for word in wordlist: + if word not in stopwords: + yield word # 弹出一个非停用词 + - freqs = {} - for word in non_stop_words(testfilepath,stopwords): - freqs[word] = freqs.get(word, 0) + 1 +freqs = {} +for word in non_stop_words(testfilepath): + freqs[word] = freqs.get(word, 0) + 1 - data = sort_dict(freqs) - print_word_freqs(data) - \ No newline at end of file +data = sort_dict(freqs) +print_word_freqs(data) \ No newline at end of file