import re, operator, string
from cppy.cp_util import *

#
# The functions
#
def extract_words(path_to_file):
    try:
        with open(path_to_file, 'r', encoding='utf-8') as f:
            str_data = f.read()
    except IOError as e:
        print(f"I/O error({e.errno}) when opening {path_to_file}: {e.strerror}")
        return []

    word_list = re.findall('\w+', str_data.lower())
    return word_list


def remove_stop_words(word_list):
    try:
        stop_words = set(get_stopwords())
    except IOError as e:
        print(f"I/O error({e.errno}) when opening stops_words.txt: {e.strerror}")
        return word_list

    stop_words.update(string.ascii_lowercase)
    return [w for w in word_list if w not in stop_words]


def frequencies(word_list):
    if type(word_list) is not list or word_list == []:  return {}

    word_freqs = get_frequencies( word_list )    
    return word_freqs

def sort(word_freq):
    if type(word_freq) is not dict or word_freq == {}: return []
    return sorted(word_freq.items(), key=operator.itemgetter(1), reverse=True)


if __name__ == '__main__':
    word_freqs = sort(frequencies(remove_stop_words(extract_words(testfilepath))))
    print_word_freqs(word_freqs)