import re, operator from cppy.cp_util import * def print_text(word_freqs, func): print_word_freqs(word_freqs) func(None) def frequencies(word_list, func): wf = get_frequencies(word_list) func(wf, print_text) def scan(str_data, func): func(str_data.split(), frequencies) def filter_chars(str_data, func): pattern = re.compile('[\W_]+') func(pattern.sub(' ', str_data), scan) def remove_stop_words(word_list, func): stop_words = get_stopwords() func([w for w in word_list if not w in stop_words], sort) def sort(wf, func): func(sorted(wf.items(), key=operator.itemgetter(1), reverse=True), no_op) def no_op(func): return def normalize(str_data, func): func(str_data.lower(), remove_stop_words) def read_file(path_to_file, func): with open(path_to_file,encoding='utf-8') as f: data = f.read() func(data, normalize) if __name__ == "__main__": read_file(testfilepath, filter_chars)