parent
4aa6f8469d
commit
cab45b3281
@ -0,0 +1,56 @@
|
||||
import collections
|
||||
from abc import ABC, abstractmethod
|
||||
from cppy.cp_util import *
|
||||
|
||||
# 定义观察者接口 ,在 Pyhon中并不是必须
|
||||
class Observer(ABC):
|
||||
@abstractmethod
|
||||
def update(self, word):
|
||||
pass
|
||||
|
||||
# 定义具体观察者类,用于统计词频
|
||||
class WordFrequencyObserver(Observer):
|
||||
def __init__(self):
|
||||
self.word_count = collections.Counter()
|
||||
|
||||
def update(self, word):
|
||||
self.word_count[word] += 1
|
||||
|
||||
def get_top_n(self,n):
|
||||
return self.word_count.most_common(n)
|
||||
|
||||
# 定义主题类
|
||||
class WordSubject:
|
||||
def __init__(self):
|
||||
self.observers = []
|
||||
|
||||
def attach(self, observer):
|
||||
self.observers.append(observer)
|
||||
|
||||
def notify(self, word):
|
||||
for observer in self.observers:
|
||||
observer.update(word)
|
||||
|
||||
|
||||
# 主函数
|
||||
def main(testfilepath, top_n = 10 ):
|
||||
stopwords = get_stopwords()
|
||||
subject = WordSubject()
|
||||
|
||||
# 创建一个观察者并附加到主题
|
||||
observer = WordFrequencyObserver()
|
||||
subject.attach(observer)
|
||||
|
||||
# 处理文件
|
||||
wordlist = re_split( read_file(testfilepath) )
|
||||
for word in wordlist:
|
||||
if word not in stopwords:
|
||||
subject.notify(word)
|
||||
|
||||
# 打印最高的N个词频
|
||||
top_words = observer.get_top_n(top_n)
|
||||
print_word_freqs(top_words)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main( testfilepath )
|
Loading…
Reference in new issue