You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

39 lines
966 B

9 months ago
import re, operator
from cppy.cp_util import *
def print_text(word_freqs, func):
print_word_freqs(word_freqs)
func(None)
def frequencies(word_list, func):
wf = get_frequencies(word_list)
func(wf, print_text)
def scan(str_data, func):
func(str_data.split(), frequencies)
def filter_chars(str_data, func):
pattern = re.compile('[\W_]+')
func(pattern.sub(' ', str_data), scan)
def remove_stop_words(word_list, func):
stop_words = get_stopwords()
func([w for w in word_list if not w in stop_words], sort)
def sort(wf, func):
func(sorted(wf.items(), key=operator.itemgetter(1), reverse=True), no_op)
def no_op(func):
return
def normalize(str_data, func):
func(str_data.lower(), remove_stop_words)
def read_file(path_to_file, func):
with open(path_to_file,encoding='utf-8') as f:
data = f.read()
func(data, normalize)
if __name__ == "__main__":
read_file(testfilepath, filter_chars)