parent
254c11c3c9
commit
041fced368
@ -1,39 +1,21 @@
|
||||
import re, operator
|
||||
from cppy.cp_util import *
|
||||
|
||||
def print_text(word_freqs, func):
|
||||
print_word_freqs(word_freqs)
|
||||
func(None)
|
||||
def readfile(path_to_file, func):
|
||||
data = read_file(path_to_file)
|
||||
func(data, frequencies)
|
||||
|
||||
def extractwords(str_data,func):
|
||||
func(extract_str_words(str_data), sort)
|
||||
|
||||
def frequencies(word_list, func):
|
||||
wf = get_frequencies(word_list)
|
||||
func(wf, print_text)
|
||||
|
||||
def scan(str_data, func):
|
||||
func(str_data.split(), frequencies)
|
||||
|
||||
def filter_chars(str_data, func):
|
||||
pattern = re.compile('[\W_]+')
|
||||
func(pattern.sub(' ', str_data), scan)
|
||||
|
||||
def remove_stop_words(word_list, func):
|
||||
stop_words = get_stopwords()
|
||||
func([w for w in word_list if not w in stop_words], sort)
|
||||
func(wf, printall)
|
||||
|
||||
def sort(wf, func):
|
||||
func(sorted(wf.items(), key=operator.itemgetter(1), reverse=True), no_op)
|
||||
|
||||
def no_op(func):
|
||||
return
|
||||
|
||||
def normalize(str_data, func):
|
||||
func(str_data.lower(), remove_stop_words)
|
||||
|
||||
def read_file(path_to_file, func):
|
||||
with open(path_to_file,encoding='utf-8') as f:
|
||||
data = f.read()
|
||||
func(data, normalize)
|
||||
func(sort_dict(wf), None)
|
||||
|
||||
def printall(word_freqs, func):
|
||||
print_word_freqs(word_freqs)
|
||||
|
||||
if __name__ == "__main__":
|
||||
read_file(testfilepath, filter_chars)
|
||||
readfile(testfilepath, extractwords)
|
Loading…
Reference in new issue