From b04a404b3a00d28ecdaf55fd1f707d2267f95684 Mon Sep 17 00:00:00 2001 From: zj3D Date: Wed, 13 Mar 2024 09:14:41 +0800 Subject: [PATCH] =?UTF-8?q?print=E7=BB=9F=E4=B8=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- 基本结构/041 插件/tf-20.py | 4 +--- 基本结构/051享元模式/tf-38.py | 4 ++-- 异常/1 软件不能挂掉/tf-21.py | 9 ++------- 计算设备/map-reduce/tf_92.py | 5 ++--- 计算设备/缓存/84.py | 3 +-- 语言特性/异步/tf_91.py | 5 ++--- 语言特性/装饰/tf-19A.py | 6 +++--- 7 files changed, 13 insertions(+), 23 deletions(-) diff --git a/基本结构/041 插件/tf-20.py b/基本结构/041 插件/tf-20.py index a312a34..1dbcd28 100644 --- a/基本结构/041 插件/tf-20.py +++ b/基本结构/041 插件/tf-20.py @@ -16,6 +16,4 @@ def load_plugins(): load_plugins() word_freqs = tffreqs.top25(tfwords.extract_words( testfilepath )) -for (w, c) in word_freqs: - print(w, '-', c) - +print_word_freqs(word_freqs) \ No newline at end of file diff --git a/基本结构/051享元模式/tf-38.py b/基本结构/051享元模式/tf-38.py index 32f835d..63fbe16 100644 --- a/基本结构/051享元模式/tf-38.py +++ b/基本结构/051享元模式/tf-38.py @@ -22,7 +22,7 @@ class WordFrequencyController(): #定义具体的享元类 class ConcreteWordFrequencyController(WordFrequencyController): def __init__(self, controllertype,filepath): - self.word_list = extract_words(filepath) + self.word_list = extract_file_words(filepath) self.word_freq = get_frequencies(self.word_list) self.word_freq = sort_dict(self.word_freq) def print_word_freqs(self, number): @@ -51,7 +51,7 @@ if __name__ == "__main__": factory = WordFrequencyControllerFactory() while True: try: - number = input("请输入需要显示词频前几的单词") + number = input("请输入需要显示词频前几的单词: ") process_command(factory, number) except EOFError: break \ No newline at end of file diff --git a/异常/1 软件不能挂掉/tf-21.py b/异常/1 软件不能挂掉/tf-21.py index 08fcd7a..ebdb398 100644 --- a/异常/1 软件不能挂掉/tf-21.py +++ b/异常/1 软件不能挂掉/tf-21.py @@ -1,4 +1,4 @@ -import sys, re, operator, string, inspect +import re, operator, string from cppy.cp_util import * # @@ -30,12 +30,7 @@ def remove_stop_words(word_list): def frequencies(word_list): if type(word_list) is not list or word_list == []: return {} - word_freqs = {} - for w in word_list: - if w in word_freqs: - word_freqs[w] += 1 - else: - word_freqs[w] = 1 + word_freqs = get_frequencies( word_list ) return word_freqs def sort(word_freq): diff --git a/计算设备/map-reduce/tf_92.py b/计算设备/map-reduce/tf_92.py index 797e831..e9a63a8 100644 --- a/计算设备/map-reduce/tf_92.py +++ b/计算设备/map-reduce/tf_92.py @@ -6,9 +6,8 @@ from cppy.cp_util import * stopwords = get_stopwords() def process_chunk(chunk): - # 切词并过滤停用词 - words = re.findall(r'\w+', chunk.lower()) - words = [ word for word in words if word not in stopwords and len(word) > 2] + # 切词并过滤停用词 + words = extract_str_words( chunk.lower() ) return Counter(words) def merge_counts(counts_list): diff --git a/计算设备/缓存/84.py b/计算设备/缓存/84.py index 6cd67ad..592653d 100644 --- a/计算设备/缓存/84.py +++ b/计算设备/缓存/84.py @@ -20,8 +20,7 @@ def calculate_word_frequency(file_path): # 测试函数 top_10_words = calculate_word_frequency(testfilepath) -for word, freq in top_10_words: - print(f"{word}: {freq}") +print_word_freqs(top_10_words) ''' python 提供了一种缓存调用函数的机制 diff --git a/语言特性/异步/tf_91.py b/语言特性/异步/tf_91.py index 9ec04c1..f0a0475 100644 --- a/语言特性/异步/tf_91.py +++ b/语言特性/异步/tf_91.py @@ -40,6 +40,5 @@ if __name__ == '__main__': total_count = Counter() for result in results: total_count += result - # 打印词频最高的10个单词 - for w,c in total_count.most_common(10): - print(w, '--',c) \ No newline at end of file + # 打印词频最高的10个单词 + print_word_freqs( total_count.most_common(10) ) \ No newline at end of file diff --git a/语言特性/装饰/tf-19A.py b/语言特性/装饰/tf-19A.py index 4a64860..fe92b28 100644 --- a/语言特性/装饰/tf-19A.py +++ b/语言特性/装饰/tf-19A.py @@ -15,9 +15,9 @@ def word_frequency( top_n=10 ): word_counts[word] += 1 # 输出所有词的频率最高的n个词 - most_common = word_counts.most_common(top_n) - for w, count in most_common: - print(f"{w} - {count}") + most_common = word_counts.most_common(top_n) + util.print_word_freqs( most_common ) + return result return wrapper