import os,re,string,operator from collections import Counter # TextProcessor 类负责处理文本并计算词频。当文本处理完成后,它会通过 notify 方法通知所有注册的观察者。 # WordFrequencyObserver 类是一个具体的观察者,它实现了 update 方法来接收词频更新并打印前10个最常见的单词。 class Subject: def __init__(self): self._observers = [] # 不能随意改变,所以肯定是私有 def attach(self, observer): self._observers.append(observer) def detach(self, observer): self._observers.remove(observer) def notify(self, word_freqs): for observer in self._observers: observer.update(word_freqs) # 关注,取消关注,通知有更新,Subject类是用来创建一个类,对订阅者(即观察者)列表进行维护 class Observer: def update(self, word_freqs): pass # 定义一个抽象的Observer # 而下面的是一个具体的Observer类 class WordFrequencyObserver(Observer): def update(self, word_freqs): print("词频已经被更新:") self.print_word_freqs(word_freqs) def print_word_freqs(self, word_freqs): sorted_freqs = sorted(word_freqs.items(), key=operator.itemgetter(1), reverse=True) for (w, c) in sorted_freqs[:10]: print(f"{w}: {c}") # 对文本进行分析 class TextProcessor: def __init__(self, subject: Subject): #subject是Subject的子类,类型注解,单独写也可以 self._subject = subject self._stop_words:str = set() #是一个集合(其实这里需要表明是str) def load_stop_words(self, path_to_file): with open(path_to_file, encoding='utf-8') as f: self._stop_words = set(line.strip().lower() for line in f) def process_text(self, path_to_file): with open(path_to_file, encoding='utf-8') as f: data = f.read() word_list = self.re_split(data) filtered_words = self.filter_words(word_list) word_freqs = self.count_frequencies(filtered_words) self._subject.notify(word_freqs) def re_split(self, data): pattern = re.compile('[\W_]+') return pattern.sub(' ', data).lower().split() def filter_words(self, word_list): return [w for w in word_list if w not in self._stop_words and len(w) >= 3] def count_frequencies(self, word_list): return Counter(word_list) # 开始测试 if __name__ == "__main__": stopwordfilepath = r'C:\Users\asus\Desktop\cppy余悦批注\cppy\data\stop_words.txt' testfilepath = r'C:\Users\asus\Desktop\cppy余悦批注\cppy\data\pride-and-prejudice.txt' # 调用实例 subject = Subject() observer = WordFrequencyObserver() subject.attach(observer) text_processor = TextProcessor(subject) text_processor.load_stop_words(stopwordfilepath) text_processor.process_text(testfilepath)