You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

9 lines
257 B

9 months ago
import sys, re, string
from cppy.cp_util import *
def extract_words(path_to_file):
words = re.findall('[a-z]{2,}', open(path_to_file,encoding='utf-8').read().lower())
stopwords = get_stopwords()
return [w for w in words if w not in stopwords]