from cppy.cp_util import *

def extract_words(obj, path_to_file):    
    obj['data'] = re_split( read_file(path_to_file) )

def load_stop_words(obj):    
    obj['stop_words'] = get_stopwords()

def increment_count(obj, w):
    obj['freqs'][w] = 1 if w not in obj['freqs'] else obj['freqs'][w]+1

data_storage_obj = {
    'data' : [],
    'init' : lambda path_to_file : extract_words(data_storage_obj, path_to_file),
    'words' : lambda : data_storage_obj['data']
}

stop_words_obj = {
    'stop_words' : [],
    'init' : lambda : load_stop_words(stop_words_obj),
    'is_stop_word' : lambda word : word in stop_words_obj['stop_words']
}

word_freqs_obj = {
    'freqs' : {},
    'increment_count' : lambda w : increment_count(word_freqs_obj, w),
    'sorted' : lambda : sort_dict(word_freqs_obj['freqs']) 
}


if __name__ == '__main__':
    data_storage_obj['init']( testfilepath )
    stop_words_obj['init']()

    for w in data_storage_obj['words']():
        if not stop_words_obj['is_stop_word'](w):
            word_freqs_obj['increment_count'](w)

    word_freqs = word_freqs_obj['sorted']()
    for (w, c) in word_freqs[0:10]:
        print(w, '-', c)