|  |  |  | @ -0,0 +1,77 @@ | 
			
		
	
		
			
				
					|  |  |  |  | import os,re,string,operator | 
			
		
	
		
			
				
					|  |  |  |  | from collections import Counter | 
			
		
	
		
			
				
					|  |  |  |  | 
 | 
			
		
	
		
			
				
					|  |  |  |  | # TextProcessor 类负责处理文本并计算词频。当文本处理完成后,它会通过 notify 方法通知所有注册的观察者。 | 
			
		
	
		
			
				
					|  |  |  |  | # WordFrequencyObserver 类是一个具体的观察者,它实现了 update 方法来接收词频更新并打印前10个最常见的单词。 | 
			
		
	
		
			
				
					|  |  |  |  | class Subject: | 
			
		
	
		
			
				
					|  |  |  |  |     def __init__(self): | 
			
		
	
		
			
				
					|  |  |  |  |         self._observers = [] | 
			
		
	
		
			
				
					|  |  |  |  |     # 不能随意改变,所以肯定是私有 | 
			
		
	
		
			
				
					|  |  |  |  |     def attach(self, observer): | 
			
		
	
		
			
				
					|  |  |  |  |         self._observers.append(observer) | 
			
		
	
		
			
				
					|  |  |  |  | 
 | 
			
		
	
		
			
				
					|  |  |  |  |     def detach(self, observer): | 
			
		
	
		
			
				
					|  |  |  |  |         self._observers.remove(observer) | 
			
		
	
		
			
				
					|  |  |  |  | 
 | 
			
		
	
		
			
				
					|  |  |  |  |     def notify(self, word_freqs): | 
			
		
	
		
			
				
					|  |  |  |  |         for observer in self._observers: | 
			
		
	
		
			
				
					|  |  |  |  |             observer.update(word_freqs) | 
			
		
	
		
			
				
					|  |  |  |  | # 关注,取消关注,通知有更新,Subject类是用来创建一个类,对订阅者(即观察者)列表进行维护 | 
			
		
	
		
			
				
					|  |  |  |  | 
 | 
			
		
	
		
			
				
					|  |  |  |  | class Observer: | 
			
		
	
		
			
				
					|  |  |  |  |     def update(self, word_freqs): | 
			
		
	
		
			
				
					|  |  |  |  |         pass | 
			
		
	
		
			
				
					|  |  |  |  | # 定义一个抽象的Observer | 
			
		
	
		
			
				
					|  |  |  |  | # 而下面的是一个具体的Observer类 | 
			
		
	
		
			
				
					|  |  |  |  | class WordFrequencyObserver(Observer): | 
			
		
	
		
			
				
					|  |  |  |  |     def update(self, word_freqs): | 
			
		
	
		
			
				
					|  |  |  |  |         print("词频已经被更新:") | 
			
		
	
		
			
				
					|  |  |  |  |         self.print_word_freqs(word_freqs) | 
			
		
	
		
			
				
					|  |  |  |  | 
 | 
			
		
	
		
			
				
					|  |  |  |  |     def print_word_freqs(self, word_freqs): | 
			
		
	
		
			
				
					|  |  |  |  |         sorted_freqs = sorted(word_freqs.items(), key=operator.itemgetter(1), reverse=True) | 
			
		
	
		
			
				
					|  |  |  |  |         for (w, c) in sorted_freqs[:10]: | 
			
		
	
		
			
				
					|  |  |  |  |             print(f"{w}: {c}") | 
			
		
	
		
			
				
					|  |  |  |  | 
 | 
			
		
	
		
			
				
					|  |  |  |  | # 对文本进行分析 | 
			
		
	
		
			
				
					|  |  |  |  | class TextProcessor: | 
			
		
	
		
			
				
					|  |  |  |  |     def __init__(self, subject: Subject): | 
			
		
	
		
			
				
					|  |  |  |  | #subject是Subject的子类,类型注解,单独写也可以 | 
			
		
	
		
			
				
					|  |  |  |  |         self._subject = subject | 
			
		
	
		
			
				
					|  |  |  |  |         self._stop_words:str = set() | 
			
		
	
		
			
				
					|  |  |  |  | #是一个集合(其实这里需要表明是str) | 
			
		
	
		
			
				
					|  |  |  |  |     def load_stop_words(self, path_to_file): | 
			
		
	
		
			
				
					|  |  |  |  |         with open(path_to_file, encoding='utf-8') as f: | 
			
		
	
		
			
				
					|  |  |  |  |             self._stop_words = set(line.strip().lower() for line in f) | 
			
		
	
		
			
				
					|  |  |  |  | 
 | 
			
		
	
		
			
				
					|  |  |  |  |     def process_text(self, path_to_file): | 
			
		
	
		
			
				
					|  |  |  |  |         with open(path_to_file, encoding='utf-8') as f: | 
			
		
	
		
			
				
					|  |  |  |  |             data = f.read() | 
			
		
	
		
			
				
					|  |  |  |  |             word_list = self.re_split(data) | 
			
		
	
		
			
				
					|  |  |  |  |             filtered_words = self.filter_words(word_list) | 
			
		
	
		
			
				
					|  |  |  |  |             word_freqs = self.count_frequencies(filtered_words) | 
			
		
	
		
			
				
					|  |  |  |  |             self._subject.notify(word_freqs) | 
			
		
	
		
			
				
					|  |  |  |  | 
 | 
			
		
	
		
			
				
					|  |  |  |  |     def re_split(self, data): | 
			
		
	
		
			
				
					|  |  |  |  |         pattern = re.compile('[\W_]+') | 
			
		
	
		
			
				
					|  |  |  |  |         return pattern.sub(' ', data).lower().split() | 
			
		
	
		
			
				
					|  |  |  |  | 
 | 
			
		
	
		
			
				
					|  |  |  |  |     def filter_words(self, word_list): | 
			
		
	
		
			
				
					|  |  |  |  |         return [w for w in word_list if w not in self._stop_words and len(w) >= 3] | 
			
		
	
		
			
				
					|  |  |  |  | 
 | 
			
		
	
		
			
				
					|  |  |  |  |     def count_frequencies(self, word_list): | 
			
		
	
		
			
				
					|  |  |  |  |         return Counter(word_list) | 
			
		
	
		
			
				
					|  |  |  |  | 
 | 
			
		
	
		
			
				
					|  |  |  |  | # 开始测试 | 
			
		
	
		
			
				
					|  |  |  |  | if __name__ == "__main__": | 
			
		
	
		
			
				
					|  |  |  |  |     stopwordfilepath = r'C:\Users\asus\Desktop\cppy余悦批注\cppy\data\stop_words.txt' | 
			
		
	
		
			
				
					|  |  |  |  |     testfilepath = r'C:\Users\asus\Desktop\cppy余悦批注\cppy\data\pride-and-prejudice.txt' | 
			
		
	
		
			
				
					|  |  |  |  | 
 | 
			
		
	
		
			
				
					|  |  |  |  |     # 调用实例 | 
			
		
	
		
			
				
					|  |  |  |  |     subject = Subject() | 
			
		
	
		
			
				
					|  |  |  |  |     observer = WordFrequencyObserver() | 
			
		
	
		
			
				
					|  |  |  |  |     subject.attach(observer) | 
			
		
	
		
			
				
					|  |  |  |  | 
 | 
			
		
	
		
			
				
					|  |  |  |  |     text_processor = TextProcessor(subject) | 
			
		
	
		
			
				
					|  |  |  |  |     text_processor.load_stop_words(stopwordfilepath) | 
			
		
	
		
			
				
					|  |  |  |  |     text_processor.process_text(testfilepath) |