from cppy.cp_util import *
from collections import Counter  
from heapq import nlargest  
import re  
  
  
class Pipeline:  
    def __init__(self):  
        pass  
  
    def __or__(self, other):  
        class PipelineComposition(Pipeline):  
            def __init__(self, first, second):  
                self.first = first  
                self.second = second  
  
            def process(self, data):  
                return self.second.process(self.first.process(data))  
  
        return PipelineComposition(self, other)  
  
    def process(self, data):  
        raise NotImplementedError  
  
  
class FileReader(Pipeline):  
    def __init__(self, filename):  
        super().__init__()  
        self.filename = filename  
  
    def process(self):  
        with open(self.filename, 'r', encoding='utf-8') as file:  
            content = file.read()  
        return content  
  
  
class WordFrequencyCounter(Pipeline):  
    def process(self, text):  
        words = re.findall(r'\w+', text.lower())  
        word_freq = Counter(words)  
        return word_freq  
  
  
class TopNFilter(Pipeline):  
    def __init__(self, n):  
        super().__init__()  
        self.n = n  
  
    def process(self, word_freq):  
        return nlargest(self.n, word_freq.items(), key=lambda item: item[1])  
  
  
# 假设有一个文本文件"text.txt"，其内容是需要分析的文本  
filename = testfilepath
n = 5  # 求取最高5个词频  
  
# 创建管道  
pipeline = FileReader(filename) | WordFrequencyCounter() | TopNFilter(n)  
  
# 执行管道  
top_n_words = pipeline.process()  
  
# 打印结果  
for word, freq in top_n_words:  
    print(f"{word}: {freq}")