''' 后续组件挂载到前序组件后续链上 仅提供 self.next_observer 的抽象关系 后续组件接到指令和数据,自己决定动作 理论上每个组件可以参与到多个生产队列 本例使用了类来封装消息,相对于字符串理论上提供了更丰富的扩展可能 这是一个示例性质的原型,具体环境下需要调整 ''' from collections import Counter from typing import List, Dict from cppy.cp_util import * # 定义消息类型 class Message: def __init__(self, data): self.data = data class TokenizedText(Message): pass class FilteredText(Message): pass class WordFrequency(Message): pass # 定义观察者接口 class Observer: def notify(self, message: Message): pass # 切词订阅者 class TokenizerSubscriber(Observer): def __init__(self, next_observer: Observer): self.next_observer = next_observer def notify(self, message: Message): if not isinstance(message.data, str): return tokenized_text = re_split(message.data) self.next_observer.notify(TokenizedText(tokenized_text)) # 停用词订阅者 class StopWordsRemoverSubscriber(Observer): def __init__(self, next_observer: Observer, stop_words: List[str]): self.next_observer = next_observer self.stop_words = set(stop_words) def notify(self, message: Message): if not isinstance(message, TokenizedText): return filtered_text = [word for word in message.data if word not in self.stop_words and len(word)>2 ] self.next_observer.notify(FilteredText(filtered_text)) # 词频统计订阅者 class WordFrequencyCalculatorSubscriber(Observer): def __init__(self, next_observer: Observer): self.next_observer = next_observer def notify(self, message: Message): if not isinstance(message, FilteredText): return word_freq = Counter(message.data) self.next_observer.notify( WordFrequency(word_freq) ) # 输出前N个词订阅者 class TopNWordsDisplaySubscriber(Observer): def __init__(self, n: int): self.n = n def notify(self, message: Message): if not isinstance(message, WordFrequency): return print_word_freqs( message.data.most_common(self.n) ) # 模拟发布者 def publish_text(text: str, observers: List[Observer]): for observer in observers: observer.notify(Message(text)) # 主函数 def main(): text = read_file() stop_words = get_stopwords() # 创建订阅者链 display_subscriber = TopNWordsDisplaySubscriber( n=10 ) freq_subscriber = WordFrequencyCalculatorSubscriber(display_subscriber) stop_words_subscriber = StopWordsRemoverSubscriber(freq_subscriber, stop_words) tokenizer_subscriber = TokenizerSubscriber(stop_words_subscriber) # 发布文本 publish_text(text, [tokenizer_subscriber]) if __name__ == "__main__": main()