You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
|
|
|
|
'''
|
|
|
|
|
入门级示例,是用来帮助理解其他例子
|
|
|
|
|
|
|
|
|
|
把观察者挂到自己的处理队列上
|
|
|
|
|
适当时机调用所有队列上的约定的观察者的 update 方法
|
|
|
|
|
如果观察者有多个职能参与不同的任务链,不一定要统一命名update方法
|
|
|
|
|
|
|
|
|
|
这是一个示例性质的原型,具体环境下需要调整
|
|
|
|
|
'''
|
|
|
|
|
|
|
|
|
|
import collections
|
|
|
|
|
from abc import ABC, abstractmethod
|
|
|
|
|
from cppy.cp_util import *
|
|
|
|
|
|
|
|
|
|
# 定义观察者接口 ,在 Pyhon中并不是必须
|
|
|
|
|
class Observer(ABC):
|
|
|
|
|
@abstractmethod
|
|
|
|
|
def update(self, word):
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
# 定义具体观察者类,用于统计词频
|
|
|
|
|
class WordFrequencyObserver(Observer):
|
|
|
|
|
def __init__(self):
|
|
|
|
|
self.word_count = collections.Counter()
|
|
|
|
|
|
|
|
|
|
def update(self, word):
|
|
|
|
|
self.word_count[word] += 1
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# 定义主题类
|
|
|
|
|
class WordSubject:
|
|
|
|
|
def __init__(self):
|
|
|
|
|
self.observers = []
|
|
|
|
|
|
|
|
|
|
def attach(self, observer):
|
|
|
|
|
self.observers.append(observer)
|
|
|
|
|
|
|
|
|
|
def notify(self, word):
|
|
|
|
|
for observer in self.observers:
|
|
|
|
|
observer.update(word)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# 主函数
|
|
|
|
|
def main(testfilepath, top_n = 10 ):
|
|
|
|
|
stopwords = get_stopwords()
|
|
|
|
|
subject = WordSubject()
|
|
|
|
|
|
|
|
|
|
# 创建一个观察者并附加到主题
|
|
|
|
|
observer = WordFrequencyObserver()
|
|
|
|
|
subject.attach(observer)
|
|
|
|
|
|
|
|
|
|
# 处理文件
|
|
|
|
|
wordlist = re_split( read_file(testfilepath) )
|
|
|
|
|
for word in wordlist:
|
|
|
|
|
if word not in stopwords:
|
|
|
|
|
subject.notify(word) # 通知
|
|
|
|
|
|
|
|
|
|
# 打印最高的N个词频
|
|
|
|
|
top_words = observer.word_count.most_common(top_n)
|
|
|
|
|
print_word_freqs(top_words)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
|
main( testfilepath )
|