import string
from collections import Counter
from cppy.cp_util import *

# data
data = []
words = []
word_freqs = []

################################
# procedures
################################
def read_file(path_to_file):
    global data
    with open(path_to_file,encoding='utf-8') as f:
        data = data + list(f.read())

def filter_chars_and_normalize():    
    global data
    global words
    for i in range(len(data)):                
        data[i] = ' ' if not data[i].isalnum() else data[i].lower()        

    data_str = ''.join(data)
    words = words + data_str.split()

    with open(stopwordfilepath) as f:
        stop_words = set(f.read().split(','))    
    stop_words.update(string.ascii_lowercase)
    words = [word for word in words if word not in stop_words]

def frequencies():    
    global words
    global word_freqs
    word_freqs.extend([(word, 1) for word in words])

def sort():    
    global word_freqs    
    word_freqs = Counter(words).most_common()


if __name__ == "__main__":
    read_file( testfilepath )
    filter_chars_and_normalize()    
    frequencies()
    sort()

    for tf in word_freqs[:10]:
        print(tf[0], '-', tf[1])