forked from p46318075/CodePattern
parent
254c11c3c9
commit
041fced368
@ -1,39 +1,21 @@
|
|||||||
import re, operator
|
|
||||||
from cppy.cp_util import *
|
from cppy.cp_util import *
|
||||||
|
|
||||||
def print_text(word_freqs, func):
|
def readfile(path_to_file, func):
|
||||||
print_word_freqs(word_freqs)
|
data = read_file(path_to_file)
|
||||||
func(None)
|
func(data, frequencies)
|
||||||
|
|
||||||
|
def extractwords(str_data,func):
|
||||||
|
func(extract_str_words(str_data), sort)
|
||||||
|
|
||||||
def frequencies(word_list, func):
|
def frequencies(word_list, func):
|
||||||
wf = get_frequencies(word_list)
|
wf = get_frequencies(word_list)
|
||||||
func(wf, print_text)
|
func(wf, printall)
|
||||||
|
|
||||||
def scan(str_data, func):
|
|
||||||
func(str_data.split(), frequencies)
|
|
||||||
|
|
||||||
def filter_chars(str_data, func):
|
|
||||||
pattern = re.compile('[\W_]+')
|
|
||||||
func(pattern.sub(' ', str_data), scan)
|
|
||||||
|
|
||||||
def remove_stop_words(word_list, func):
|
|
||||||
stop_words = get_stopwords()
|
|
||||||
func([w for w in word_list if not w in stop_words], sort)
|
|
||||||
|
|
||||||
def sort(wf, func):
|
def sort(wf, func):
|
||||||
func(sorted(wf.items(), key=operator.itemgetter(1), reverse=True), no_op)
|
func(sort_dict(wf), None)
|
||||||
|
|
||||||
def no_op(func):
|
|
||||||
return
|
|
||||||
|
|
||||||
def normalize(str_data, func):
|
|
||||||
func(str_data.lower(), remove_stop_words)
|
|
||||||
|
|
||||||
def read_file(path_to_file, func):
|
|
||||||
with open(path_to_file,encoding='utf-8') as f:
|
|
||||||
data = f.read()
|
|
||||||
func(data, normalize)
|
|
||||||
|
|
||||||
|
def printall(word_freqs, func):
|
||||||
|
print_word_freqs(word_freqs)
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
read_file(testfilepath, filter_chars)
|
readfile(testfilepath, extractwords)
|
Loading…
Reference in new issue