from functools import reduce
from cppy.cp_util import *

#################################################
# Functions for map reduce
#################################################
def partition(data_str, nlines):
    lines = data_str.split('\n')
    for i in range(0, len(lines), nlines):
        yield '\n'.join(lines[i:i+nlines])

def split_words(data_str):    
    words = extract_str_words(data_str)    
    return [ (w, 1) for w in words ]        

def regroup(pairs_list):
    mapping = {}
    for pairs in pairs_list:
        for p in pairs:
            mapping[p[0]] = mapping.get(p[0], []) + [p]
    return mapping

def count_words(mapping):
    def add(x, y): return x+y
    return ( mapping[0], 
             reduce(add, (pair[1] for pair in mapping[1]))
           )

def sort (word_freq):
    return sorted(word_freq, key=operator.itemgetter(1), reverse=True)

if __name__ == '__main__':        
    data = read_file(testfilepath)
    splits = map(split_words, partition(data, 200))        
    splits_per_word = regroup(splits)
    word_freqs = sort(map(count_words, splits_per_word.items()))    
    print_word_freqs(word_freqs)