大修 10

2 years ago · c8946209bf
parent 2d46194636
commit c8946209bf
11 changed files with 40 additions and 67 deletions
--- a/最基础的写法.py
+++ b/最基础的写法.py
@ -1,17 +1,20 @@
-import string
-from cppy.cp_util import stopwordfilepath,testfilepath
+# 引入停用词表和测试文件的路径
+from cppy.cp_util import stopwordfilepath, testfilepath

-# 准备词和停用词表
-word_freqs = []
-with open( stopwordfilepath,encoding='utf-8' ) as f:
+# 准备停用词表
+with open(stopwordfilepath, encoding='utf-8') as f:
    stop_words = f.read().split(',')
-stop_words.extend(list(string.ascii_lowercase))
+for letter in 'abcdefghijklmnopqrstuvwxyz':
+    stop_words.append(letter)
+

-for line in open( testfilepath ,encoding='utf-8' ):
+# 读文件，逐行扫描文本，发现词，确定不是停用词，计数
+word_freqs = []
+for line in open( testfilepath, encoding='utf-8' ):
    start_char = None
    i = 0
    for c in line:
-        if start_char == None:
+        if start_char is None:
            if c.isalnum():
                # 一个单词开始
                start_char = i
@ -32,15 +35,18 @@ for line in open( testfilepath ,encoding='utf-8' ):
                        pair_index += 1
                    if not found:
                        word_freqs.append([word, 1])
-                    elif len(word_freqs) > 1:                        
-                        for n in reversed(range(pair_index)):
-                            if word_freqs[pair_index][1] > word_freqs[n][1]:
-                                # 交换
-                                word_freqs[n], word_freqs[pair_index] = word_freqs[pair_index], word_freqs[n]
-                                pair_index = n
                # 重置开始标记
                start_char = None
        i += 1

-for tf in word_freqs[0:10]:
-    print(tf[0], '-', tf[1])
+# 使用冒泡排序对词频进行排序
+n = len(word_freqs)
+for i in range(n):
+    for j in range(0, n-i-1):
+        if word_freqs[j][1] < word_freqs[j+1][1]:
+            word_freqs[j], word_freqs[j+1] = word_freqs[j+1], word_freqs[j]            
+
+
+# 打印频率最高的前10个词
+for tf in word_freqs[:10]:
+    print(tf[0], '-', tf[1])
--- a/加入语言特性.py
+++ b/加入语言特性.py
--- a/一盘大棋/A03
+++ b/一盘大棋/A03
--- a/多计算单元/数据共享/2
+++ b/多计算单元/数据共享/2
--- a/多计算单元/数据共享/2
+++ b/多计算单元/数据共享/2
--- a/消息驱动的组件/2
+++ b/消息驱动的组件/2
@ -1,7 +1,7 @@
 from cppy.cp_util import *

 # 这个例子没有实际意义，是用来帮助理解下一个例子
-# 程序，只需要做第一件事情，后面的顺序逻辑写到各个函数里面了
+# 主程序只需要做第一件事情，后面的顺序逻辑写到各个函数里面了

 def readfile(path_to_file, func):
    data = read_file(path_to_file)
--- a/对象接口/tf-14B.py
+++ b/对象接口/tf-14B.py
@ -27,7 +27,7 @@ class IWordFrequencyCounter(metaclass=abc.ABCMeta):
 # 类实现
 #
 class DataStorageManager1:    
-    def __init__(self, path_to_file):
+    def __init__(self, path_to_file):        
        self._data = read_file(path_to_file)        
        self._data = re_split(self._data)

--- a/类型申明/24A.py
+++ b/类型申明/24A.py
@ -1,7 +1,7 @@
 from collections import Counter
 from cppy.cp_util import *

-class AcceptTypes:
+class TypesCheck:
    def __init__(self, *args):
        self._args = args

@ -9,19 +9,19 @@ class AcceptTypes:
        def wrapped_f(*args, **kwargs):
            for i, arg_type in enumerate(self._args):
                if not isinstance(args[i], arg_type):
-                    raise TypeError(f"Argument {i} expected {arg_type}, got {type(args[i])}")
+                    raise TypeError(f" {i} expected {arg_type}, got {type(args[i])}")
            return f(*args, **kwargs)
        return wrapped_f

-@AcceptTypes(str)
+@TypesCheck(str)
 def extract_words_(path_to_file):
    return extract_file_words(path_to_file)    

-@AcceptTypes(list)
+@TypesCheck(list)
 def frequencies_(word_list):
    return Counter(word_list)

-@AcceptTypes(Counter)
+@TypesCheck(Counter)
 def sort_(word_freq):
    return word_freq.most_common()

--- a/时间停止在那一刻.py
+++ b/时间停止在那一刻.py
@ -3,25 +3,25 @@ from cppy.cp_util import *


 def extract_words(path_to_file):
-    assert(type(path_to_file) is str), "I need a string!" 
-    assert(path_to_file), "I need a non-empty string!" 
+    assert(type(path_to_file) is str), "Must be a string!" 
+    assert(path_to_file), "Must be a non-empty string!" 

    try:
        with open(path_to_file,encoding='utf-8') as f:
            str_data = f.read()
    except IOError as e:
-        print("I/O error({0}) when opening {1}: {2}! I quit!".format(e.errno, path_to_file, e.strerror))
+        print("I/O error({0}) when opening {1}: {2}".format(e.errno, path_to_file, e.strerror))
        raise e    
    
    return re_split(str_data)

 def remove_stop_words(word_list):
-    assert(type(word_list) is list), "I need a list!"
+    assert(type(word_list) is list), "Must be a list!"

    try:
        stop_words = get_stopwords()
    except IOError as e:
-        print("I/O error({0}) opening stops_words.txt: {1}! I quit!".format(e.errno, e.strerror))
+        print("I/O error({0}) opening stops_words.txt: {1}".format(e.errno, e.strerror))
        raise e
            
    return [w for w in word_list if not w in stop_words]
--- a/所有错误的应对.py
+++ b/所有错误的应对.py
@ -2,18 +2,18 @@ from cppy.cp_util import *


 def extractWords(path_to_file):
-    assert(type(path_to_file) is str), "I need a string! I quit!" 
-    assert(path_to_file), "I need a non-empty string! I quit!"         
+    assert(type(path_to_file) is str), "Must be a string" 
+    assert(path_to_file), "Must be a non-empty string"         
    return extract_file_words(path_to_file)

 def frequencies(word_list):
-    assert(type(word_list) is list), "I need a list! I quit!"
-    assert(word_list != []), "I need a non-empty list! I quit!"    
+    assert(type(word_list) is list), "Must be a list"
+    assert(word_list != []), "Must be a non-empty list"    
    return get_frequencies(word_list)

 def sort(word_freqs):
-    assert(type(word_freqs) is dict), "I need a dictionary! I quit!"
-    assert(word_freqs != {}), "I need a non-empty dictionary! I quit!"
+    assert(type(word_freqs) is dict), "Must be a dictionary"
+    assert(word_freqs != {}), "Must be a non-empty dictionary"
    return sort_dict(word_freqs)


--- a/其它/状态机/81B.py
+++ b/其它/状态机/81B.py
@ -1,33 +0,0 @@
-# -*- coding: utf-8 -*-
-import cppy.cp_util as util
-
-# 每一列是一个数据元素和一个公式,第一列是输入数据，所以没有公式
-all_words = [(), None]
-non_stop_words = [(), util.extract_str_words]
-frequencies = [(), util.get_frequencies]
-sorted_data = [(), util.sort_dict]
-
-# 整个电子表格
-all_columns = [all_words, non_stop_words,\
-                frequencies, sorted_data]
-
-# 每次输入数据后调用此方法
-def update():
-    global all_columns
-    for c in all_columns[1::]:
-        if c[1] == util.extract_str_words:
-            c[0] = c[1](all_words[0])
-        elif c[1] == util.get_frequencies:
-            c[0] = c[1](non_stop_words[0])
-        elif c[1] == util.sort_dict:
-            c[0] = c[1](frequencies[0])
-
-# 将固定数据加载到第一列中
-all_words[0] = util.read_file(util.testfilepath)
-# 调用update函数遍历列表
-update()
-
-#打印结果
-util.print_word_freqs(sorted_data[0])
-
-