patch

1 year ago · 041fced368
parent 254c11c3c9
commit 041fced368
4 changed files with 24 additions and 49 deletions
--- a/基本结构/021
+++ b/基本结构/021
@ -2,8 +2,10 @@ import string
 from collections import Counter
 from cppy.cp_util import *
 ################################
 # data
-data = []
+################################
 data = ''
 words = []
 word_freqs = []
@ -13,17 +15,12 @@ word_freqs = []
 def read_file(path_to_file):
    global data
    with open(path_to_file,encoding='utf-8') as f:
-        data = data + list(f.read())
+        data = f.read()
-def filter_chars_and_normalize():    
+def extractwords():    
    global data
    global words    
-    for i in range(len(data)):                
+    words = data.lower().split()
        data[i] = ' ' if not data[i].isalnum() else data[i].lower()        
    data_str = ''.join(data)
    words = words + data_str.split()
    with open(stopwordfilepath) as f:
        stop_words = set(f.read().split(','))    
    stop_words.update(string.ascii_lowercase)
@ -41,7 +38,7 @@ def sort():
 if __name__ == "__main__":
    read_file( testfilepath )
-    filter_chars_and_normalize()    
+    extractwords()    
    frequencies()
    sort()
--- a/基本结构/021
+++ b/基本结构/021
@ -2,31 +2,28 @@ import re
 from cppy.cp_util import *
-def filter_chars_and_normalize(str_data):
+def extractwords(str_data):
    pattern = re.compile('[\W_]+')
    word_list = pattern.sub(' ', str_data).lower().split()
    stop_words = get_stopwords()    
    return [w for w in word_list if not w in stop_words]
 def frequencies(word_list):    
    word_freqs = {}  
    for word in word_list:  
        word_freqs[word] = word_freqs.get(word, 0) + 1    
    return word_freqs
 def sort(word_freq):    
    return sorted( word_freq.items(), key=lambda x: x[1], reverse=True )
-
+def printall(word_freqs, n = 10 ):    
 def print_all(word_freqs, n = 10 ):    
    for word, freq in word_freqs[ :n ]:
        print(word, '-', freq)        
 if __name__ == "__main__":
-    print_all(sort(frequencies(     
+    printall(sort(frequencies(     
-            filter_chars_and_normalize(
+            extractwords(
                read_file( testfilepath ))))
    )
--- a/基本结构/021
+++ b/基本结构/021
@ -1,39 +1,21 @@
 import re, operator
 from cppy.cp_util import *
-def print_text(word_freqs, func):
+def readfile(path_to_file, func):
-    print_word_freqs(word_freqs) 
+    data = read_file(path_to_file)
-    func(None)
+    func(data, frequencies)
 def extractwords(str_data,func):       
    func(extract_str_words(str_data), sort)
 def frequencies(word_list, func):
    wf = get_frequencies(word_list)    
-    func(wf, print_text)
+    func(wf, printall)
 def scan(str_data, func):
    func(str_data.split(), frequencies)
 def filter_chars(str_data, func):
    pattern = re.compile('[\W_]+')
    func(pattern.sub(' ', str_data), scan)
 def remove_stop_words(word_list, func):    
    stop_words = get_stopwords()  
    func([w for w in word_list if not w in stop_words], sort)
 def sort(wf, func):
-    func(sorted(wf.items(), key=operator.itemgetter(1), reverse=True), no_op)
+    func(sort_dict(wf), None)
 def no_op(func):
    return
 def normalize(str_data, func):
    func(str_data.lower(), remove_stop_words)
 def read_file(path_to_file, func):
    with open(path_to_file,encoding='utf-8') as f:
        data = f.read()
    func(data, normalize)
 def printall(word_freqs, func):
    print_word_freqs(word_freqs) 
 if __name__ == "__main__":
-    read_file(testfilepath, filter_chars)
+    readfile(testfilepath, extractwords)
--- a/基本结构/状态机/81A.py
+++ b/基本结构/状态机/81A.py
@ -56,5 +56,4 @@ state_machine = WordFrequencyStateMachine(util.testfilepath)
 word_frequencies = state_machine.run()
 # 打印结果
-for word, freq in word_frequencies.most_common(10):
+util.print_word_freqs(word_frequencies.most_common(10))
    print(f"{word}: {freq}")