import string from collections import Counter from cppy.cp_util import * ################################ # data ################################ data = '' words = [] word_freqs = [] ################################ # procedures ################################ def read_file(path_to_file): global data with open(path_to_file,encoding='utf-8') as f: data = f.read() def extractwords(): global data global words words = data.lower().split() with open(stopwordfilepath) as f: stop_words = set(f.read().split(',')) stop_words.update(string.ascii_lowercase) words = [word for word in words if word not in stop_words] def frequencies(): global words global word_freqs word_freqs.extend([(word, 1) for word in words]) def sort(): global word_freqs word_freqs = Counter(words).most_common() if __name__ == "__main__": read_file( testfilepath ) extractwords() frequencies() sort() for tf in word_freqs[:10]: print(tf[0], '-', tf[1])