from cppy.cp_util import * from collections import Counter from heapq import nlargest import re class Pipeline: def __init__(self): pass def __or__(self, other): class PipelineComposition(Pipeline): def __init__(self, first, second): self.first = first self.second = second def process(self, data): return self.second.process(self.first.process(data)) return PipelineComposition(self, other) def process(self, data): raise NotImplementedError class FileReader(Pipeline): def __init__(self, filename): super().__init__() self.filename = filename def process(self): with open(self.filename, 'r', encoding='utf-8') as file: content = file.read() return content class WordFrequencyCounter(Pipeline): def process(self, text): words = re.findall(r'\w+', text.lower()) word_freq = Counter(words) return word_freq class TopNFilter(Pipeline): def __init__(self, n): super().__init__() self.n = n def process(self, word_freq): return nlargest(self.n, word_freq.items(), key=lambda item: item[1]) # 假设有一个文本文件"text.txt",其内容是需要分析的文本 filename = testfilepath n = 5 # 求取最高5个词频 # 创建管道 pipeline = FileReader(filename) | WordFrequencyCounter() | TopNFilter(n) # 执行管道 top_n_words = pipeline.process() # 打印结果 for word, freq in top_n_words: print(f"{word}: {freq}")