From ebe28f7670ac0d58d7e7c278b547a3bc8006e058 Mon Sep 17 00:00:00 2001 From: zj3D Date: Fri, 22 Mar 2024 09:46:53 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BF=AE=2012?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../{24A.py => 参数类型检查.py} | 0 .../{24B.py => 参数类型申明.py} | 0 15 工程化/类属性 .py | 36 +++++++++++++++++++ 16 其它/对象设计模式/享元.py | 5 +-- 16 其它/对象设计模式/观察者.py | 6 ++-- 5 files changed, 41 insertions(+), 6 deletions(-) rename 15 工程化/3 类型申明/{24A.py => 参数类型检查.py} (100%) rename 15 工程化/3 类型申明/{24B.py => 参数类型申明.py} (100%) create mode 100644 15 工程化/类属性 .py diff --git a/15 工程化/3 类型申明/24A.py b/15 工程化/3 类型申明/参数类型检查.py similarity index 100% rename from 15 工程化/3 类型申明/24A.py rename to 15 工程化/3 类型申明/参数类型检查.py diff --git a/15 工程化/3 类型申明/24B.py b/15 工程化/3 类型申明/参数类型申明.py similarity index 100% rename from 15 工程化/3 类型申明/24B.py rename to 15 工程化/3 类型申明/参数类型申明.py diff --git a/15 工程化/类属性 .py b/15 工程化/类属性 .py new file mode 100644 index 0000000..a7b0578 --- /dev/null +++ b/15 工程化/类属性 .py @@ -0,0 +1,36 @@ +from cppy.cp_util import * +from dataclasses import dataclass +from collections import Counter +import re + +@dataclass +class WordFrequency: + text: str + stop_words: set = None + + def __post_init__(self): + # 如果未提供停用词表 + if self.stop_words is None: + self.stop_words = get_stopwords() + + def tokenize(self): + # 分词并去除停用词 + words = re.findall(r'\b\w+\b', self.text.lower()) + filtered_words = [word for word in words if word not in self.stop_words and len(word)>2] + return filtered_words + + def get_top_n(self, n=10): + # 计算词频 + word_freqs = Counter(self.tokenize()) + return word_freqs.most_common(n) + + +# 使用示例 +if __name__ == '__main__': + # 创建WordFrequency实例 + text = read_file() + word_freq = WordFrequency( text ) + + # 获取并打印词频 + top_words = word_freq.get_top_n() + print_word_freqs(top_words) \ No newline at end of file diff --git a/16 其它/对象设计模式/享元.py b/16 其它/对象设计模式/享元.py index baa00c1..c70e464 100644 --- a/16 其它/对象设计模式/享元.py +++ b/16 其它/对象设计模式/享元.py @@ -19,9 +19,9 @@ class WordFrequencyControllerFactory(): def get_WordFrequencyController(self, number,testfilepath): if number not in self.types: self.types[number] = WordFrequencyController(number,testfilepath) # 创建新的对象 - print('new obj',number,'*'*30) + print('new obj: ','*'*30,number) else: - print('ref obj','*'*30) + print('ref obj: ','*'*30,number) return self.types[number] # 重复使用已存在的对象 @@ -29,5 +29,6 @@ if __name__ == "__main__": factory = WordFrequencyControllerFactory() for number in [ 1,3,5,3,5,7 ]: WordFrequency = factory.get_WordFrequencyController(number,testfilepath) + # print(flush=True) WordFrequency.print_word_freqs() \ No newline at end of file diff --git a/16 其它/对象设计模式/观察者.py b/16 其它/对象设计模式/观察者.py index 28628c4..9f6c962 100644 --- a/16 其它/对象设计模式/观察者.py +++ b/16 其它/对象设计模式/观察者.py @@ -15,9 +15,7 @@ class WordFrequencyObserver(Observer): def update(self, word): self.word_count[word] += 1 - - def get_top_n(self,n): - return self.word_count.most_common(n) + # 定义主题类 class WordSubject: @@ -48,7 +46,7 @@ def main(testfilepath, top_n = 10 ): subject.notify(word) # 触发 # 打印最高的N个词频 - top_words = observer.get_top_n(top_n) + top_words = observer.word_count.most_common(top_n) print_word_freqs(top_words)