from collections import Counter from cppy.cp_util import * def extract_words(path_to_file): assert(type(path_to_file) is str), "Must be a string!" assert(path_to_file), "Must be a non-empty string!" try: with open(path_to_file,encoding='utf-8') as f: str_data = f.read() except IOError as e: print("I/O error({0}) when opening {1}: {2}".format(e.errno, path_to_file, e.strerror)) raise e return re_split(str_data) def remove_stop_words(word_list): assert(type(word_list) is list), "Must be a list!" try: stop_words = get_stopwords() except IOError as e: print("I/O error({0}) opening stops_words.txt: {1}".format(e.errno, e.strerror)) raise e return [w for w in word_list if not w in stop_words] def frequencies(word_list): return Counter(word_list) def sort(word_freq): return word_freq.most_common() if __name__ == '__main__': word_freqs = sort(frequencies(remove_stop_words(extract_words(testfilepath)))) print_word_freqs(word_freqs)