From 028c7ddb076b73ab8a0200b067f9112745344fb6 Mon Sep 17 00:00:00 2001 From: pbr4nzfkh <18879212807@163.com> Date: Sun, 17 Mar 2024 10:20:49 +0800 Subject: [PATCH] =?UTF-8?q?Delete=20'=E8=AE=A1=E7=AE=97=E8=AE=BE=E5=A4=87/?= =?UTF-8?q?map-reduce/tf-31.py'?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- 计算设备/map-reduce/tf-31.py | 27 --------------------------- 1 file changed, 27 deletions(-) delete mode 100644 计算设备/map-reduce/tf-31.py diff --git a/计算设备/map-reduce/tf-31.py b/计算设备/map-reduce/tf-31.py deleted file mode 100644 index 0efdcd5..0000000 --- a/计算设备/map-reduce/tf-31.py +++ /dev/null @@ -1,27 +0,0 @@ -from functools import reduce -from cppy.cp_util import * -from collections import Counter - -def partition(data_str, nlines): - lines = data_str.split('\n') - for i in range(0, len(lines), nlines): - yield '\n'.join(lines[i:i+nlines]) - -def split_words(data_str): - word_list = extract_str_words(data_str) - return Counter( word_list ) - -def count_words(pairs_list_1, pairs_list_2): - return pairs_list_1 + pairs_list_2 - - -if __name__ == '__main__': - data = read_file(testfilepath) - - # 使用 map 方法和 split_words 函数处理每个分区 - splits = map(split_words, partition(data, 200)) - splits_list = list(splits) - - # 使用 reduce 和 count_words 函数统计所有分区的词频 - word_freqs = sort_dict(reduce(count_words, splits_list, Counter()) ) - print_word_freqs(word_freqs) \ No newline at end of file