forked from p46318075/CodePattern
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
9 lines
257 B
9 lines
257 B
9 months ago
|
import sys, re, string
|
||
|
from cppy.cp_util import *
|
||
|
|
||
|
def extract_words(path_to_file):
|
||
|
words = re.findall('[a-z]{2,}', open(path_to_file,encoding='utf-8').read().lower())
|
||
|
stopwords = get_stopwords()
|
||
|
return [w for w in words if w not in stopwords]
|
||
|
|