From 7da9aae860f81f2568e5ce3af04c9020691f6a41 Mon Sep 17 00:00:00 2001 From: zj3D Date: Wed, 13 Mar 2024 08:51:58 +0800 Subject: [PATCH 1/3] =?UTF-8?q?=E5=B7=A5=E7=A8=8B=E5=8C=96?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- {异常/4 类型申明 => 工程化/类型申明}/tf-24.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename {异常/4 类型申明 => 工程化/类型申明}/tf-24.py (100%) diff --git a/异常/4 类型申明/tf-24.py b/工程化/类型申明/tf-24.py similarity index 100% rename from 异常/4 类型申明/tf-24.py rename to 工程化/类型申明/tf-24.py -- 2.34.1 From b04a404b3a00d28ecdaf55fd1f707d2267f95684 Mon Sep 17 00:00:00 2001 From: zj3D Date: Wed, 13 Mar 2024 09:14:41 +0800 Subject: [PATCH 2/3] =?UTF-8?q?print=E7=BB=9F=E4=B8=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- 基本结构/041 插件/tf-20.py | 4 +--- 基本结构/051享元模式/tf-38.py | 4 ++-- 异常/1 软件不能挂掉/tf-21.py | 9 ++------- 计算设备/map-reduce/tf_92.py | 5 ++--- 计算设备/缓存/84.py | 3 +-- 语言特性/异步/tf_91.py | 5 ++--- 语言特性/装饰/tf-19A.py | 6 +++--- 7 files changed, 13 insertions(+), 23 deletions(-) diff --git a/基本结构/041 插件/tf-20.py b/基本结构/041 插件/tf-20.py index a312a34..1dbcd28 100644 --- a/基本结构/041 插件/tf-20.py +++ b/基本结构/041 插件/tf-20.py @@ -16,6 +16,4 @@ def load_plugins(): load_plugins() word_freqs = tffreqs.top25(tfwords.extract_words( testfilepath )) -for (w, c) in word_freqs: - print(w, '-', c) - +print_word_freqs(word_freqs) \ No newline at end of file diff --git a/基本结构/051享元模式/tf-38.py b/基本结构/051享元模式/tf-38.py index 32f835d..63fbe16 100644 --- a/基本结构/051享元模式/tf-38.py +++ b/基本结构/051享元模式/tf-38.py @@ -22,7 +22,7 @@ class WordFrequencyController(): #定义具体的享元类 class ConcreteWordFrequencyController(WordFrequencyController): def __init__(self, controllertype,filepath): - self.word_list = extract_words(filepath) + self.word_list = extract_file_words(filepath) self.word_freq = get_frequencies(self.word_list) self.word_freq = sort_dict(self.word_freq) def print_word_freqs(self, number): @@ -51,7 +51,7 @@ if __name__ == "__main__": factory = WordFrequencyControllerFactory() while True: try: - number = input("请输入需要显示词频前几的单词") + number = input("请输入需要显示词频前几的单词: ") process_command(factory, number) except EOFError: break \ No newline at end of file diff --git a/异常/1 软件不能挂掉/tf-21.py b/异常/1 软件不能挂掉/tf-21.py index 08fcd7a..ebdb398 100644 --- a/异常/1 软件不能挂掉/tf-21.py +++ b/异常/1 软件不能挂掉/tf-21.py @@ -1,4 +1,4 @@ -import sys, re, operator, string, inspect +import re, operator, string from cppy.cp_util import * # @@ -30,12 +30,7 @@ def remove_stop_words(word_list): def frequencies(word_list): if type(word_list) is not list or word_list == []: return {} - word_freqs = {} - for w in word_list: - if w in word_freqs: - word_freqs[w] += 1 - else: - word_freqs[w] = 1 + word_freqs = get_frequencies( word_list ) return word_freqs def sort(word_freq): diff --git a/计算设备/map-reduce/tf_92.py b/计算设备/map-reduce/tf_92.py index 797e831..e9a63a8 100644 --- a/计算设备/map-reduce/tf_92.py +++ b/计算设备/map-reduce/tf_92.py @@ -6,9 +6,8 @@ from cppy.cp_util import * stopwords = get_stopwords() def process_chunk(chunk): - # 切词并过滤停用词 - words = re.findall(r'\w+', chunk.lower()) - words = [ word for word in words if word not in stopwords and len(word) > 2] + # 切词并过滤停用词 + words = extract_str_words( chunk.lower() ) return Counter(words) def merge_counts(counts_list): diff --git a/计算设备/缓存/84.py b/计算设备/缓存/84.py index 6cd67ad..592653d 100644 --- a/计算设备/缓存/84.py +++ b/计算设备/缓存/84.py @@ -20,8 +20,7 @@ def calculate_word_frequency(file_path): # 测试函数 top_10_words = calculate_word_frequency(testfilepath) -for word, freq in top_10_words: - print(f"{word}: {freq}") +print_word_freqs(top_10_words) ''' python 提供了一种缓存调用函数的机制 diff --git a/语言特性/异步/tf_91.py b/语言特性/异步/tf_91.py index 9ec04c1..f0a0475 100644 --- a/语言特性/异步/tf_91.py +++ b/语言特性/异步/tf_91.py @@ -40,6 +40,5 @@ if __name__ == '__main__': total_count = Counter() for result in results: total_count += result - # 打印词频最高的10个单词 - for w,c in total_count.most_common(10): - print(w, '--',c) \ No newline at end of file + # 打印词频最高的10个单词 + print_word_freqs( total_count.most_common(10) ) \ No newline at end of file diff --git a/语言特性/装饰/tf-19A.py b/语言特性/装饰/tf-19A.py index 4a64860..fe92b28 100644 --- a/语言特性/装饰/tf-19A.py +++ b/语言特性/装饰/tf-19A.py @@ -15,9 +15,9 @@ def word_frequency( top_n=10 ): word_counts[word] += 1 # 输出所有词的频率最高的n个词 - most_common = word_counts.most_common(top_n) - for w, count in most_common: - print(f"{w} - {count}") + most_common = word_counts.most_common(top_n) + util.print_word_freqs( most_common ) + return result return wrapper -- 2.34.1 From b1911ef9cb3e0fc025b47829d290a93dec3d50aa Mon Sep 17 00:00:00 2001 From: zj3D Date: Wed, 13 Mar 2024 09:27:15 +0800 Subject: [PATCH 3/3] =?UTF-8?q?=E6=B8=85=E6=B4=81=E4=BB=A3=E7=A0=8138?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- 基本结构/051享元模式/tf-38.py | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/基本结构/051享元模式/tf-38.py b/基本结构/051享元模式/tf-38.py index 63fbe16..eab1e50 100644 --- a/基本结构/051享元模式/tf-38.py +++ b/基本结构/051享元模式/tf-38.py @@ -1,18 +1,13 @@ -# -*- encoding:utf-8 -*- -from cppy.cp_util import * ''' -享元模式 享元模式是一种结构型设计模式,在享元模式中,对象被设计为可共享的,可以被多个上下文使用,而不必在每个上下文中都创建新的对象。 +享元模式中,对象被设计为可共享的,被多个上下文使用,而不必在每个上下文中都创建新的对象。 如果我们有大量不同的词频分析需求,有时需要词频前10的单词,有时需要词频前20的单词,有时还需要限定词汇的长度,那就需要创建多个词频统计器,每个词频统 -计器都独立创建并存储其内部状态,那么系统的内存占用可能会很大,在这种情况下,享元模式共享相同类型的词频统计器对象,每种类型的词频统计器只需创建一个 -共享实例,然后通过设置不同的参数个性化每个对象,通过共享相同的内部状态,降低了对象的创建和内存占用成本。 +计器都独立创建并存储其内部状态,在这种情况下,享元模式共享相同类型的词频统计器对象,只需创建一个共享实例,然后通过设置不同的参数个性化每个对象,通过共享相同的内部状态,降低了对象的创建和内存占用成本。 例如,我需要对3个文件获取词频前十的单词,对另外3个文件获取词频前二十的单词,那么我只需要创建2个词频统计器对象,每个对象存储相同的内部状态,一个对象 获取前十的单词,一个对象获取前二十的单词,而不用创建6个对象 ''' -#以需要的词频数量分类 -# class Type(number): -def get_number(): - number = int(input("请输入需要显示词频前几的单词")) - return number + +from cppy.cp_util import * + #定义享元接口 class WordFrequencyController(): @@ -37,8 +32,7 @@ class WordFrequencyControllerFactory(): def get_WordFrequencyController(self, controller_type,testfilepath): if controller_type not in self.types: self.types[controller_type] = ConcreteWordFrequencyController(controller_type,testfilepath) - #创建新的享元对象 - print(self.types)#显示已存在的享元对象 + #创建新的享元对象 return self.types[controller_type]#重复使用已存在的享元对象 def process_command(factory: WordFrequencyControllerFactory, number: str): -- 2.34.1