大修 10

1 year ago · c8946209bf
parent 2d46194636
commit c8946209bf
11 changed files with 40 additions and 67 deletions
--- a/最基础的写法.py
+++ b/最基础的写法.py
@ -1,17 +1,20 @@
-import string
+# 引入停用词表和测试文件的路径
-from cppy.cp_util import stopwordfilepath,testfilepath
+from cppy.cp_util import stopwordfilepath, testfilepath
-# 准备词和停用词表
+# 准备停用词表
-word_freqs = []
+with open(stopwordfilepath, encoding='utf-8') as f:
 with open( stopwordfilepath,encoding='utf-8' ) as f:
    stop_words = f.read().split(',')
-stop_words.extend(list(string.ascii_lowercase))
+for letter in 'abcdefghijklmnopqrstuvwxyz':
    stop_words.append(letter)
-for line in open( testfilepath ,encoding='utf-8' ):
+# 读文件，逐行扫描文本，发现词，确定不是停用词，计数
 word_freqs = []
 for line in open( testfilepath, encoding='utf-8' ):
    start_char = None
    i = 0
    for c in line:
-        if start_char == None:
+        if start_char is None:
            if c.isalnum():
                # 一个单词开始
                start_char = i
@ -32,15 +35,18 @@ for line in open( testfilepath ,encoding='utf-8' ):
                        pair_index += 1
                    if not found:
                        word_freqs.append([word, 1])
                    elif len(word_freqs) > 1:                        
                        for n in reversed(range(pair_index)):
                            if word_freqs[pair_index][1] > word_freqs[n][1]:
                                # 交换
                                word_freqs[n], word_freqs[pair_index] = word_freqs[pair_index], word_freqs[n]
                                pair_index = n
                # 重置开始标记
                start_char = None
        i += 1
-for tf in word_freqs[0:10]:
+# 使用冒泡排序对词频进行排序
-    print(tf[0], '-', tf[1])
+n = len(word_freqs)
 for i in range(n):
    for j in range(0, n-i-1):
        if word_freqs[j][1] < word_freqs[j+1][1]:
            word_freqs[j], word_freqs[j+1] = word_freqs[j+1], word_freqs[j]            
 # 打印频率最高的前10个词
 for tf in word_freqs[:10]:
    print(tf[0], '-', tf[1])
--- a/加入语言特性.py
+++ b/加入语言特性.py
--- a/一盘大棋/A03
+++ b/一盘大棋/A03
--- a/多计算单元/数据共享/2
+++ b/多计算单元/数据共享/2
--- a/多计算单元/数据共享/2
+++ b/多计算单元/数据共享/2
--- a/消息驱动的组件/2
+++ b/消息驱动的组件/2
@ -1,7 +1,7 @@
 from cppy.cp_util import *
 # 这个例子没有实际意义，是用来帮助理解下一个例子
-# 程序，只需要做第一件事情，后面的顺序逻辑写到各个函数里面了
+# 主程序只需要做第一件事情，后面的顺序逻辑写到各个函数里面了
 def readfile(path_to_file, func):
    data = read_file(path_to_file)
--- a/对象接口/tf-14B.py
+++ b/对象接口/tf-14B.py
@ -27,7 +27,7 @@ class IWordFrequencyCounter(metaclass=abc.ABCMeta):
 # 类实现
 #
 class DataStorageManager1:    
-    def __init__(self, path_to_file):
+    def __init__(self, path_to_file):        
        self._data = read_file(path_to_file)        
        self._data = re_split(self._data)
--- a/类型申明/24A.py
+++ b/类型申明/24A.py
@ -1,7 +1,7 @@
 from collections import Counter
 from cppy.cp_util import *
-class AcceptTypes:
+class TypesCheck:
    def __init__(self, *args):
        self._args = args
@ -9,19 +9,19 @@ class AcceptTypes:
        def wrapped_f(*args, **kwargs):
            for i, arg_type in enumerate(self._args):
                if not isinstance(args[i], arg_type):
-                    raise TypeError(f"Argument {i} expected {arg_type}, got {type(args[i])}")
+                    raise TypeError(f" {i} expected {arg_type}, got {type(args[i])}")
            return f(*args, **kwargs)
        return wrapped_f
-@AcceptTypes(str)
+@TypesCheck(str)
 def extract_words_(path_to_file):
    return extract_file_words(path_to_file)    
-@AcceptTypes(list)
+@TypesCheck(list)
 def frequencies_(word_list):
    return Counter(word_list)
-@AcceptTypes(Counter)
+@TypesCheck(Counter)
 def sort_(word_freq):
    return word_freq.most_common()
--- a/时间停止在那一刻.py
+++ b/时间停止在那一刻.py
@ -3,25 +3,25 @@ from cppy.cp_util import *
 def extract_words(path_to_file):
-    assert(type(path_to_file) is str), "I need a string!" 
+    assert(type(path_to_file) is str), "Must be a string!" 
-    assert(path_to_file), "I need a non-empty string!" 
+    assert(path_to_file), "Must be a non-empty string!" 
    try:
        with open(path_to_file,encoding='utf-8') as f:
            str_data = f.read()
    except IOError as e:
-        print("I/O error({0}) when opening {1}: {2}! I quit!".format(e.errno, path_to_file, e.strerror))
+        print("I/O error({0}) when opening {1}: {2}".format(e.errno, path_to_file, e.strerror))
        raise e    
    return re_split(str_data)
 def remove_stop_words(word_list):
-    assert(type(word_list) is list), "I need a list!"
+    assert(type(word_list) is list), "Must be a list!"
    try:
        stop_words = get_stopwords()
    except IOError as e:
-        print("I/O error({0}) opening stops_words.txt: {1}! I quit!".format(e.errno, e.strerror))
+        print("I/O error({0}) opening stops_words.txt: {1}".format(e.errno, e.strerror))
        raise e
    return [w for w in word_list if not w in stop_words]
--- a/所有错误的应对.py
+++ b/所有错误的应对.py
@ -2,18 +2,18 @@ from cppy.cp_util import *
 def extractWords(path_to_file):
-    assert(type(path_to_file) is str), "I need a string! I quit!" 
+    assert(type(path_to_file) is str), "Must be a string" 
-    assert(path_to_file), "I need a non-empty string! I quit!"         
+    assert(path_to_file), "Must be a non-empty string"         
    return extract_file_words(path_to_file)
 def frequencies(word_list):
-    assert(type(word_list) is list), "I need a list! I quit!"
+    assert(type(word_list) is list), "Must be a list"
-    assert(word_list != []), "I need a non-empty list! I quit!"    
+    assert(word_list != []), "Must be a non-empty list"    
    return get_frequencies(word_list)
 def sort(word_freqs):
-    assert(type(word_freqs) is dict), "I need a dictionary! I quit!"
+    assert(type(word_freqs) is dict), "Must be a dictionary"
-    assert(word_freqs != {}), "I need a non-empty dictionary! I quit!"
+    assert(word_freqs != {}), "Must be a non-empty dictionary"
    return sort_dict(word_freqs)
--- a/其它/状态机/81B.py
+++ b/其它/状态机/81B.py
@ -1,33 +0,0 @@
 # -*- coding: utf-8 -*-
 import cppy.cp_util as util
 # 每一列是一个数据元素和一个公式,第一列是输入数据，所以没有公式
 all_words = [(), None]
 non_stop_words = [(), util.extract_str_words]
 frequencies = [(), util.get_frequencies]
 sorted_data = [(), util.sort_dict]
 # 整个电子表格
 all_columns = [all_words, non_stop_words,\
                frequencies, sorted_data]
 # 每次输入数据后调用此方法
 def update():
    global all_columns
    for c in all_columns[1::]:
        if c[1] == util.extract_str_words:
            c[0] = c[1](all_words[0])
        elif c[1] == util.get_frequencies:
            c[0] = c[1](non_stop_words[0])
        elif c[1] == util.sort_dict:
            c[0] = c[1](frequencies[0])
 # 将固定数据加载到第一列中
 all_words[0] = util.read_file(util.testfilepath)
 # 调用update函数遍历列表
 update()
 #打印结果
 util.print_word_freqs(sorted_data[0])