CodePattern/语言特性/尾调用_类方法/t26c。py

from cppy.cp_util import *
from collections import Counter
from heapq import nlargest
import re


class Pipeline:
    def __init__(self):
        pass

    def __or__(self, other):
        class PipelineComposition(Pipeline):
            def __init__(self, first, second):
                self.first = first
                self.second = second

            def process(self, data):
                return self.second.process(self.first.process(data))

        return PipelineComposition(self, other)

    def process(self, data):
        raise NotImplementedError


class FileReader(Pipeline):
    def __init__(self, filename):
        super().__init__()
        self.filename = filename

    def process(self):
        with open(self.filename, 'r', encoding='utf-8') as file:
            content = file.read()
        return content


class WordFrequencyCounter(Pipeline):
    def process(self, text):
        words = re.findall(r'\w+', text.lower())
        word_freq = Counter(words)
        return word_freq


class TopNFilter(Pipeline):
    def __init__(self, n):
        super().__init__()
        self.n = n

    def process(self, word_freq):
        return nlargest(self.n, word_freq.items(), key=lambda item: item[1])


# 假设有一个文本文件"text.txt"，其内容是需要分析的文本
filename = testfilepath
n = 5  # 求取最高5个词频

# 创建管道
pipeline = FileReader(filename) | WordFrequencyCounter() | TopNFilter(n)

# 执行管道
top_n_words = pipeline.process()

# 打印结果
for word, freq in top_n_words:
    print(f"{word}: {freq}")