import string
from collections import Counter
from cppy.cp_util import *

################################
# data
################################
data = ''
words = []
word_freqs = []


################################
# procedures
################################
def read_file(path_to_file):
    """读取文件内容，并赋值给全局变量data"""
    global data
    with open(path_to_file, encoding='utf-8') as f:
        data = f.read()


def extractwords():
    """提取data中的单词，并赋值给全局变量words"""
    global data
    global words
    words = data.lower().split()
    with open(stopwordfilepath) as f:
        stop_words = set(f.read().split(','))
    stop_words.update(string.ascii_lowercase)
    words = [word for word in words if word not in stop_words]


def frequencies():
    """统计words中单词的频率，并赋值给全局变量word_freqs"""
    global words
    global word_freqs
    word_freqs.extend([(word, 1) for word in words])


def sort():
    """对word_freqs按照频率进行排序"""
    global word_freqs
    word_freqs = Counter(words).most_common()


if __name__ == "__main__":
    read_file(testfilepath)
    extractwords()
    frequencies()
    sort()

    for tf in word_freqs[:10]:
        print(tf[0], '-', tf[1])