CodePattern/语言特性/尾调用_类方法/t26c。py

from cppy.cp_util import *
from collections import Counter  
from heapq import nlargest  
import re  
  
  
class Pipeline:  
    def __init__(self):  
        pass  
  
    def __or__(self, other):  
        class PipelineComposition(Pipeline):  
            def __init__(self, first, second):  
                self.first = first  
                self.second = second  
  
            def process(self, data):  
                return self.second.process(self.first.process(data))  
  
        return PipelineComposition(self, other)  
  
    def process(self, data):  
        raise NotImplementedError  
  
  
class FileReader(Pipeline):  
    def __init__(self, filename):  
        super().__init__()  
        self.filename = filename  
  
    def process(self):  
        with open(self.filename, 'r', encoding='utf-8') as file:  
            content = file.read()  
        return content  
  
  
class WordFrequencyCounter(Pipeline):  
    def process(self, text):  
        words = re.findall(r'\w+', text.lower())  
        word_freq = Counter(words)  
        return word_freq  
  
  
class TopNFilter(Pipeline):  
    def __init__(self, n):  
        super().__init__()  
        self.n = n  
  
    def process(self, word_freq):  
        return nlargest(self.n, word_freq.items(), key=lambda item: item[1])  
  
  
# 假设有一个文本文件"text.txt"，其内容是需要分析的文本  
filename = testfilepath
n = 5  # 求取最高5个词频  
  
# 创建管道  
pipeline = FileReader(filename) | WordFrequencyCounter() | TopNFilter(n)  
  
# 执行管道  
top_n_words = pipeline.process()  
  
# 打印结果  
for word, freq in top_n_words:  
    print(f"{word}: {freq}")
debug 8 months ago			`from cppy.cp_util import *`
			`from collections import Counter`
			`from heapq import nlargest`
			`import re`


			`class Pipeline:`
			`def __init__(self):`
			`pass`

			`def __or__(self, other):`
			`class PipelineComposition(Pipeline):`
			`def __init__(self, first, second):`
			`self.first = first`
			`self.second = second`

			`def process(self, data):`
			`return self.second.process(self.first.process(data))`

			`return PipelineComposition(self, other)`

			`def process(self, data):`
			`raise NotImplementedError`


			`class FileReader(Pipeline):`
			`def __init__(self, filename):`
			`super().__init__()`
			`self.filename = filename`

			`def process(self):`
			`with open(self.filename, 'r', encoding='utf-8') as file:`
			`content = file.read()`
			`return content`


			`class WordFrequencyCounter(Pipeline):`
			`def process(self, text):`
			`words = re.findall(r'\w+', text.lower())`
			`word_freq = Counter(words)`
			`return word_freq`


			`class TopNFilter(Pipeline):`
			`def __init__(self, n):`
			`super().__init__()`
			`self.n = n`

			`def process(self, word_freq):`
			`return nlargest(self.n, word_freq.items(), key=lambda item: item[1])`


			`# 假设有一个文本文件"text.txt"，其内容是需要分析的文本`
			`filename = testfilepath`
			`n = 5 # 求取最高5个词频`

			`# 创建管道`
			`pipeline = FileReader(filename) \| WordFrequencyCounter() \| TopNFilter(n)`

			`# 执行管道`
			`top_n_words = pipeline.process()`

			`# 打印结果`
			`for word, freq in top_n_words:`
			`print(f"{word}: {freq}")`