You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
|
import sys, re, string
|
|
from cppy.cp_util import *
|
|
|
|
def extract_words(path_to_file):
|
|
words = re.findall('[a-z]{2,}', open(path_to_file,encoding='utf-8').read().lower())
|
|
stopwords = get_stopwords()
|
|
return [w for w in words if w not in stopwords]
|
|
|