import re
from cppy.cp_util import *


def filter_chars_and_normalize(str_data):
    pattern = re.compile('[\W_]+')
    word_list = pattern.sub(' ', str_data).lower().split()
    stop_words = get_stopwords()    
    return [w for w in word_list if not w in stop_words]


def frequencies(word_list):    
    word_freqs = {}  
    for word in word_list:  
        word_freqs[word] = word_freqs.get(word, 0) + 1    
    return word_freqs


def sort(word_freq):    
    return sorted( word_freq.items(), key=lambda x: x[1], reverse=True )


def print_all(word_freqs, n = 10 ):    
    for word, freq in word_freqs[ :n ]:
        print(word, '-', freq)        


if __name__ == "__main__":
    print_all(sort(frequencies(     
            filter_chars_and_normalize(
                read_file( testfilepath ))))
    )