import sys, re, string
from cppy.cp_util import *

def extract_words(path_to_file):
    words = re.findall('[a-z]{2,}', open(path_to_file,encoding='utf-8').read().lower())
    stopwords = get_stopwords()
    return [w for w in words if w not in stopwords]