patch

2 years ago · 041fced368
parent 254c11c3c9
commit 041fced368
4 changed files with 24 additions and 49 deletions
--- a/基本结构/021
+++ b/基本结构/021
@ -2,8 +2,10 @@ import string
 from collections import Counter
 from cppy.cp_util import *

+################################
 # data
-data = []
+################################
+data = ''
 words = []
 word_freqs = []

@ -13,17 +15,12 @@ word_freqs = []
 def read_file(path_to_file):
    global data
    with open(path_to_file,encoding='utf-8') as f:
-        data = data + list(f.read())
+        data = f.read()

-def filter_chars_and_normalize():    
+def extractwords():    
    global data
-    global words
-    for i in range(len(data)):                
-        data[i] = ' ' if not data[i].isalnum() else data[i].lower()        
-
-    data_str = ''.join(data)
-    words = words + data_str.split()
-
+    global words    
+    words = data.lower().split()
    with open(stopwordfilepath) as f:
        stop_words = set(f.read().split(','))    
    stop_words.update(string.ascii_lowercase)
@ -41,7 +38,7 @@ def sort():

 if __name__ == "__main__":
    read_file( testfilepath )
-    filter_chars_and_normalize()    
+    extractwords()    
    frequencies()
    sort()

--- a/基本结构/021
+++ b/基本结构/021
@ -2,31 +2,28 @@ import re
 from cppy.cp_util import *


-def filter_chars_and_normalize(str_data):
+def extractwords(str_data):
    pattern = re.compile('[\W_]+')
    word_list = pattern.sub(' ', str_data).lower().split()
    stop_words = get_stopwords()    
    return [w for w in word_list if not w in stop_words]

-
 def frequencies(word_list):    
    word_freqs = {}  
    for word in word_list:  
        word_freqs[word] = word_freqs.get(word, 0) + 1    
    return word_freqs

-
 def sort(word_freq):    
    return sorted( word_freq.items(), key=lambda x: x[1], reverse=True )

-
-def print_all(word_freqs, n = 10 ):    
+def printall(word_freqs, n = 10 ):    
    for word, freq in word_freqs[ :n ]:
        print(word, '-', freq)        


 if __name__ == "__main__":
-    print_all(sort(frequencies(     
-            filter_chars_and_normalize(
+    printall(sort(frequencies(     
+            extractwords(
                read_file( testfilepath ))))
    )
--- a/基本结构/021
+++ b/基本结构/021
@ -1,39 +1,21 @@
-import re, operator
 from cppy.cp_util import *

-def print_text(word_freqs, func):
-    print_word_freqs(word_freqs) 
-    func(None)
+def readfile(path_to_file, func):
+    data = read_file(path_to_file)
+    func(data, frequencies)
+
+def extractwords(str_data,func):       
+    func(extract_str_words(str_data), sort)

 def frequencies(word_list, func):
    wf = get_frequencies(word_list)    
-    func(wf, print_text)
-
-def scan(str_data, func):
-    func(str_data.split(), frequencies)
-
-def filter_chars(str_data, func):
-    pattern = re.compile('[\W_]+')
-    func(pattern.sub(' ', str_data), scan)
-
-def remove_stop_words(word_list, func):    
-    stop_words = get_stopwords()  
-    func([w for w in word_list if not w in stop_words], sort)
+    func(wf, printall)

 def sort(wf, func):
-    func(sorted(wf.items(), key=operator.itemgetter(1), reverse=True), no_op)
-
-def no_op(func):
-    return
-
-def normalize(str_data, func):
-    func(str_data.lower(), remove_stop_words)
-
-def read_file(path_to_file, func):
-    with open(path_to_file,encoding='utf-8') as f:
-        data = f.read()
-    func(data, normalize)
+    func(sort_dict(wf), None)

+def printall(word_freqs, func):
+    print_word_freqs(word_freqs) 

 if __name__ == "__main__":
-    read_file(testfilepath, filter_chars)
+    readfile(testfilepath, extractwords)
--- a/基本结构/状态机/81A.py
+++ b/基本结构/状态机/81A.py
@ -56,5 +56,4 @@ state_machine = WordFrequencyStateMachine(util.testfilepath)
 word_frequencies = state_machine.run()

 # 打印结果
-for word, freq in word_frequencies.most_common(10):
-    print(f"{word}: {freq}")
+util.print_word_freqs(word_frequencies.most_common(10))