from functools import reduce from cppy.cp_util import * from collections import Counter def partition(data_str, nlines): lines = data_str.split('\n') for i in range(0, len(lines), nlines): yield '\n'.join(lines[i:i+nlines]) def split_words(data_str): word_list = extract_str_words(data_str) return Counter( word_list ) def count_words(pairs_list_1, pairs_list_2): return pairs_list_1 + pairs_list_2 if __name__ == '__main__': data = read_file(testfilepath) # 使用 map 方法和 split_words 函数处理每个分区 splits = map(split_words, partition(data, 200)) splits_list = list(splits) # 使用 reduce 和 count_words 函数统计所有分区的词频 word_freqs = sort_dict(reduce(count_words, splits_list, Counter()) ) print_word_freqs(word_freqs)