From e993c23ed1b55dae5aa5b4aea80b633ff2e1725a Mon Sep 17 00:00:00 2001 From: pbr4nzfkh <18879212807@163.com> Date: Sun, 17 Mar 2024 10:24:30 +0800 Subject: [PATCH] =?UTF-8?q?Delete=20'=E8=AE=A1=E7=AE=97=E8=AE=BE=E5=A4=87/?= =?UTF-8?q?map-reduce/tf-32.py'?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- 计算设备/map-reduce/tf-32.py | 37 -------------------------------- 1 file changed, 37 deletions(-) delete mode 100644 计算设备/map-reduce/tf-32.py diff --git a/计算设备/map-reduce/tf-32.py b/计算设备/map-reduce/tf-32.py deleted file mode 100644 index 52bbca1..0000000 --- a/计算设备/map-reduce/tf-32.py +++ /dev/null @@ -1,37 +0,0 @@ -from functools import reduce -from cppy.cp_util import * - -################################################# -# Functions for map reduce -################################################# -def partition(data_str, nlines): - lines = data_str.split('\n') - for i in range(0, len(lines), nlines): - yield '\n'.join(lines[i:i+nlines]) - -def split_words(data_str): - words = extract_str_words(data_str) - return [ (w, 1) for w in words ] - -def regroup(pairs_list): - mapping = {} - for pairs in pairs_list: - for p in pairs: - mapping[p[0]] = mapping.get(p[0], []) + [p] - return mapping - -def count_words(mapping): - def add(x, y): return x+y - return ( mapping[0], - reduce(add, (pair[1] for pair in mapping[1])) - ) - -def sort (word_freq): - return sorted(word_freq, key=operator.itemgetter(1), reverse=True) - -if __name__ == '__main__': - data = read_file(testfilepath) - splits = map(split_words, partition(data, 200)) - splits_per_word = regroup(splits) - word_freqs = sort(map(count_words, splits_per_word.items())) - print_word_freqs(word_freqs) \ No newline at end of file