import string from collections import Counter from cppy.cp_util import * # data data = [] words = [] word_freqs = [] ################################ # procedures ################################ def read_file(path_to_file): global data with open(path_to_file,encoding='utf-8') as f: data = data + list(f.read()) def filter_chars_and_normalize(): global data global words for i in range(len(data)): data[i] = ' ' if not data[i].isalnum() else data[i].lower() data_str = ''.join(data) words = words + data_str.split() with open(stopwordfilepath) as f: stop_words = set(f.read().split(',')) stop_words.update(string.ascii_lowercase) words = [word for word in words if word not in stop_words] def frequencies(): global words global word_freqs word_freqs.extend([(word, 1) for word in words]) def sort(): global word_freqs word_freqs = Counter(words).most_common() if __name__ == "__main__": read_file( testfilepath ) filter_chars_and_normalize() frequencies() sort() for tf in word_freqs[:10]: print(tf[0], '-', tf[1])