from cppy.cp_util import * def extract_words(obj, path_to_file): obj['data'] = extract_file_words(path_to_file) def load_stop_words(obj): obj['stop_words'] = get_stopwords() def increment_count(obj, w): obj['freqs'][w] = 1 if w not in obj['freqs'] else obj['freqs'][w]+1 data_storage_obj = { 'data' : [], 'init' : lambda path_to_file : extract_words(data_storage_obj, path_to_file), 'words' : lambda : data_storage_obj['data'] } stop_words_obj = { 'stop_words' : [], 'init' : lambda : load_stop_words(stop_words_obj), 'is_stop_word' : lambda word : word in stop_words_obj['stop_words'] } word_freqs_obj = { 'freqs' : {}, 'increment_count' : lambda w : increment_count(word_freqs_obj, w), 'sorted' : lambda : sort_dict(word_freqs_obj['freqs']) } if __name__ == '__main__': data_storage_obj['init']( testfilepath ) stop_words_obj['init']() for w in data_storage_obj['words'](): if not stop_words_obj['is_stop_word'](w): word_freqs_obj['increment_count'](w) word_freqs = word_freqs_obj['sorted']() for (w, c) in word_freqs[0:10]: print(w, '-', c)