import re from cppy.cp_util import * def extractwords(str_data): pattern = re.compile('[\W_]+') word_list = pattern.sub(' ', str_data).lower().split() stop_words = get_stopwords() return [w for w in word_list if not w in stop_words] def frequencies(word_list): word_freqs = {} for word in word_list: word_freqs[word] = word_freqs.get(word, 0) + 1 return word_freqs def sort(word_freq): return sorted( word_freq.items(), key=lambda x: x[1], reverse=True ) if __name__ == "__main__": txtcontent = read_file( testfilepath ) word_list = extractwords( txtcontent ) word_freqs = frequencies( word_list ) word_sorts = sort ( word_freqs ) for tf in word_sorts[:10]: print(tf[0], '-', tf[1])