You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

56 lines
1.4 KiB

import collections
from abc import ABC, abstractmethod
from cppy.cp_util import *
# 定义观察者接口 ,在 Pyhon中并不是必须
class Observer(ABC):
@abstractmethod
def update(self, word):
pass
# 定义具体观察者类,用于统计词频
class WordFrequencyObserver(Observer):
def __init__(self):
self.word_count = collections.Counter()
def update(self, word):
self.word_count[word] += 1
def get_top_n(self,n):
return self.word_count.most_common(n)
# 定义主题类
class WordSubject:
def __init__(self):
self.observers = []
def attach(self, observer):
self.observers.append(observer)
def notify(self, word):
for observer in self.observers:
observer.update(word)
# 主函数
def main(testfilepath, top_n = 10 ):
stopwords = get_stopwords()
subject = WordSubject()
# 创建一个观察者并附加到主题
observer = WordFrequencyObserver()
subject.attach(observer)
# 处理文件
wordlist = re_split( read_file(testfilepath) )
for word in wordlist:
if word not in stopwords:
subject.notify(word) # 触发
# 打印最高的N个词频
top_words = observer.get_top_n(top_n)
print_word_freqs(top_words)
if __name__ == "__main__":
main( testfilepath )