|
|
|
|
|
|
|
|
|
'''
|
|
|
|
|
本例的基本模式还是观察者
|
|
|
|
|
基类 Subject 提供注册和提醒注册上的对象提醒机制
|
|
|
|
|
|
|
|
|
|
因为函数和参数混杂在一起传递,使得各个模块的处理结构其实是 case by case
|
|
|
|
|
'''
|
|
|
|
|
|
|
|
|
|
from collections import Counter
|
|
|
|
|
from typing import List
|
|
|
|
|
from cppy.cp_util import *
|
|
|
|
|
|
|
|
|
|
class Subject:
|
|
|
|
|
def register_handler(self, handler: callable, *args, **kwargs):
|
|
|
|
|
self.handler = handler
|
|
|
|
|
self.args = args
|
|
|
|
|
self.kwargs = kwargs
|
|
|
|
|
|
|
|
|
|
def notify(self, *args, **kwargs):
|
|
|
|
|
self.handler( self.data, *self.args, **self.kwargs)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# 组件一:TextLoader - 负责读取文本并过滤停用词
|
|
|
|
|
class TextLoader(Subject):
|
|
|
|
|
def load_text(self, filename: str) -> List[str]:
|
|
|
|
|
return extract_file_words(filename)
|
|
|
|
|
|
|
|
|
|
def notify(self, *args, **kwargs):
|
|
|
|
|
filename = args[0]
|
|
|
|
|
self.data = self.load_text(filename)
|
|
|
|
|
super().notify(self.data, *args, **kwargs)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# 组件二:WordCounter - 计算词频
|
|
|
|
|
class WordCounter(Subject):
|
|
|
|
|
def count_words(self, words: List[str]) -> dict:
|
|
|
|
|
return Counter(words)
|
|
|
|
|
|
|
|
|
|
def notify(self, *args, **kwargs ):
|
|
|
|
|
words = args[0]
|
|
|
|
|
self.data = self.count_words(words)
|
|
|
|
|
super().notify(self.data, *args, **kwargs)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# 组件三:TopWordsPresenter - 排序并输出前10个词
|
|
|
|
|
class TopWordsPresenter(Subject):
|
|
|
|
|
def notify(self, words,*args, **kwargs):
|
|
|
|
|
n = args[0]
|
|
|
|
|
top_words = words.most_common(n)
|
|
|
|
|
print_word_freqs( top_words )
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# 主程序逻辑
|
|
|
|
|
def main():
|
|
|
|
|
loader = TextLoader()
|
|
|
|
|
counter = WordCounter()
|
|
|
|
|
presenter = TopWordsPresenter()
|
|
|
|
|
|
|
|
|
|
# 注册事件处理器
|
|
|
|
|
loader.register_handler(counter.notify)
|
|
|
|
|
counter.register_handler( presenter.notify,10 )
|
|
|
|
|
|
|
|
|
|
# 触发加载文本并开始流程
|
|
|
|
|
loader.notify(testfilepath)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
|
main()
|