|
|
|
@ -3,9 +3,6 @@ from cppy.cp_util import *
|
|
|
|
|
def extract_words(obj, path_to_file):
|
|
|
|
|
obj['data'] = extract_file_words(path_to_file)
|
|
|
|
|
|
|
|
|
|
def load_stop_words(obj):
|
|
|
|
|
obj['stop_words'] = get_stopwords()
|
|
|
|
|
|
|
|
|
|
def increment_count(obj, w):
|
|
|
|
|
obj['freqs'][w] = 1 if w not in obj['freqs'] else obj['freqs'][w]+1
|
|
|
|
|
|
|
|
|
@ -15,12 +12,6 @@ data_storage_obj = {
|
|
|
|
|
'words' : lambda : data_storage_obj['data']
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
stop_words_obj = {
|
|
|
|
|
'stop_words' : [],
|
|
|
|
|
'init' : lambda : load_stop_words(stop_words_obj),
|
|
|
|
|
'is_stop_word' : lambda word : word in stop_words_obj['stop_words']
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
word_freqs_obj = {
|
|
|
|
|
'freqs' : {},
|
|
|
|
|
'increment_count' : lambda w : increment_count(word_freqs_obj, w),
|
|
|
|
@ -30,12 +21,9 @@ word_freqs_obj = {
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
|
data_storage_obj['init']( testfilepath )
|
|
|
|
|
stop_words_obj['init']()
|
|
|
|
|
|
|
|
|
|
for w in data_storage_obj['words']():
|
|
|
|
|
if not stop_words_obj['is_stop_word'](w):
|
|
|
|
|
word_freqs_obj['increment_count'](w)
|
|
|
|
|
for word in data_storage_obj['words']():
|
|
|
|
|
word_freqs_obj['increment_count'](word)
|
|
|
|
|
|
|
|
|
|
word_freqs = word_freqs_obj['sorted']()
|
|
|
|
|
for (w, c) in word_freqs[0:10]:
|
|
|
|
|
print(w, '-', c)
|
|
|
|
|
print_word_freqs(word_freqs)
|