From c8946209bf1803e5418d0749ff8e032dc88d0bc0 Mon Sep 17 00:00:00 2001 From: zj3D Date: Fri, 22 Mar 2024 08:56:27 +0800 Subject: [PATCH] =?UTF-8?q?=E5=A4=A7=E4=BF=AE=2010?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../{A01.py => 1 最基础的写法.py} | 38 +++++++++++-------- ...言特性.py => 2 加入语言特性.py} | 0 .../{A03 Hacker.py => 3 Hacker.py} | 0 ...进程.py => 2 服务进程_不合并.py} | 0 ...程_分包.py => 2 服务进程_分包.py} | 0 .../2 去中心化开始.py | 2 +- 15 工程化/2 对象接口/tf-14B.py | 2 +- 15 工程化/3 类型申明/24A.py | 10 ++--- .../异常/2 时间停止在那一刻.py | 10 ++--- 16 其它/异常/3 所有错误的应对.py | 12 +++--- 16 其它/状态机/81B.py | 33 ---------------- 11 files changed, 40 insertions(+), 67 deletions(-) rename 10 一盘大棋/{A01.py => 1 最基础的写法.py} (55%) rename 10 一盘大棋/{A02 加入语言特性.py => 2 加入语言特性.py} (100%) rename 10 一盘大棋/{A03 Hacker.py => 3 Hacker.py} (100%) rename 13 多计算单元/数据共享/{2 服务进程.py => 2 服务进程_不合并.py} (100%) rename 13 多计算单元/数据共享/{3 服务进程_分包.py => 2 服务进程_分包.py} (100%) delete mode 100644 16 其它/状态机/81B.py diff --git a/10 一盘大棋/A01.py b/10 一盘大棋/1 最基础的写法.py similarity index 55% rename from 10 一盘大棋/A01.py rename to 10 一盘大棋/1 最基础的写法.py index 18f4d69..a0c4ab6 100644 --- a/10 一盘大棋/A01.py +++ b/10 一盘大棋/1 最基础的写法.py @@ -1,17 +1,20 @@ -import string -from cppy.cp_util import stopwordfilepath,testfilepath +# 引入停用词表和测试文件的路径 +from cppy.cp_util import stopwordfilepath, testfilepath -# 准备词和停用词表 -word_freqs = [] -with open( stopwordfilepath,encoding='utf-8' ) as f: +# 准备停用词表 +with open(stopwordfilepath, encoding='utf-8') as f: stop_words = f.read().split(',') -stop_words.extend(list(string.ascii_lowercase)) +for letter in 'abcdefghijklmnopqrstuvwxyz': + stop_words.append(letter) + -for line in open( testfilepath ,encoding='utf-8' ): +# 读文件,逐行扫描文本,发现词,确定不是停用词,计数 +word_freqs = [] +for line in open( testfilepath, encoding='utf-8' ): start_char = None i = 0 for c in line: - if start_char == None: + if start_char is None: if c.isalnum(): # 一个单词开始 start_char = i @@ -32,15 +35,18 @@ for line in open( testfilepath ,encoding='utf-8' ): pair_index += 1 if not found: word_freqs.append([word, 1]) - elif len(word_freqs) > 1: - for n in reversed(range(pair_index)): - if word_freqs[pair_index][1] > word_freqs[n][1]: - # 交换 - word_freqs[n], word_freqs[pair_index] = word_freqs[pair_index], word_freqs[n] - pair_index = n # 重置开始标记 start_char = None i += 1 -for tf in word_freqs[0:10]: - print(tf[0], '-', tf[1]) \ No newline at end of file +# 使用冒泡排序对词频进行排序 +n = len(word_freqs) +for i in range(n): + for j in range(0, n-i-1): + if word_freqs[j][1] < word_freqs[j+1][1]: + word_freqs[j], word_freqs[j+1] = word_freqs[j+1], word_freqs[j] + + +# 打印频率最高的前10个词 +for tf in word_freqs[:10]: + print(tf[0], '-', tf[1]) diff --git a/10 一盘大棋/A02 加入语言特性.py b/10 一盘大棋/2 加入语言特性.py similarity index 100% rename from 10 一盘大棋/A02 加入语言特性.py rename to 10 一盘大棋/2 加入语言特性.py diff --git a/10 一盘大棋/A03 Hacker.py b/10 一盘大棋/3 Hacker.py similarity index 100% rename from 10 一盘大棋/A03 Hacker.py rename to 10 一盘大棋/3 Hacker.py diff --git a/13 多计算单元/数据共享/2 服务进程.py b/13 多计算单元/数据共享/2 服务进程_不合并.py similarity index 100% rename from 13 多计算单元/数据共享/2 服务进程.py rename to 13 多计算单元/数据共享/2 服务进程_不合并.py diff --git a/13 多计算单元/数据共享/3 服务进程_分包.py b/13 多计算单元/数据共享/2 服务进程_分包.py similarity index 100% rename from 13 多计算单元/数据共享/3 服务进程_分包.py rename to 13 多计算单元/数据共享/2 服务进程_分包.py diff --git a/15 工程化/1 松耦合/1 消息驱动的组件/2 去中心化开始.py b/15 工程化/1 松耦合/1 消息驱动的组件/2 去中心化开始.py index 0f4a20e..f0e448b 100644 --- a/15 工程化/1 松耦合/1 消息驱动的组件/2 去中心化开始.py +++ b/15 工程化/1 松耦合/1 消息驱动的组件/2 去中心化开始.py @@ -1,7 +1,7 @@ from cppy.cp_util import * # 这个例子没有实际意义,是用来帮助理解下一个例子 -# 程序,只需要做第一件事情,后面的顺序逻辑写到各个函数里面了 +# 主程序只需要做第一件事情,后面的顺序逻辑写到各个函数里面了 def readfile(path_to_file, func): data = read_file(path_to_file) diff --git a/15 工程化/2 对象接口/tf-14B.py b/15 工程化/2 对象接口/tf-14B.py index dfba31f..42e6c94 100644 --- a/15 工程化/2 对象接口/tf-14B.py +++ b/15 工程化/2 对象接口/tf-14B.py @@ -27,7 +27,7 @@ class IWordFrequencyCounter(metaclass=abc.ABCMeta): # 类实现 # class DataStorageManager1: - def __init__(self, path_to_file): + def __init__(self, path_to_file): self._data = read_file(path_to_file) self._data = re_split(self._data) diff --git a/15 工程化/3 类型申明/24A.py b/15 工程化/3 类型申明/24A.py index 1c060bb..5e86529 100644 --- a/15 工程化/3 类型申明/24A.py +++ b/15 工程化/3 类型申明/24A.py @@ -1,7 +1,7 @@ from collections import Counter from cppy.cp_util import * -class AcceptTypes: +class TypesCheck: def __init__(self, *args): self._args = args @@ -9,19 +9,19 @@ class AcceptTypes: def wrapped_f(*args, **kwargs): for i, arg_type in enumerate(self._args): if not isinstance(args[i], arg_type): - raise TypeError(f"Argument {i} expected {arg_type}, got {type(args[i])}") + raise TypeError(f" {i} expected {arg_type}, got {type(args[i])}") return f(*args, **kwargs) return wrapped_f -@AcceptTypes(str) +@TypesCheck(str) def extract_words_(path_to_file): return extract_file_words(path_to_file) -@AcceptTypes(list) +@TypesCheck(list) def frequencies_(word_list): return Counter(word_list) -@AcceptTypes(Counter) +@TypesCheck(Counter) def sort_(word_freq): return word_freq.most_common() diff --git a/16 其它/异常/2 时间停止在那一刻.py b/16 其它/异常/2 时间停止在那一刻.py index d30a72b..0dd2fa6 100644 --- a/16 其它/异常/2 时间停止在那一刻.py +++ b/16 其它/异常/2 时间停止在那一刻.py @@ -3,25 +3,25 @@ from cppy.cp_util import * def extract_words(path_to_file): - assert(type(path_to_file) is str), "I need a string!" - assert(path_to_file), "I need a non-empty string!" + assert(type(path_to_file) is str), "Must be a string!" + assert(path_to_file), "Must be a non-empty string!" try: with open(path_to_file,encoding='utf-8') as f: str_data = f.read() except IOError as e: - print("I/O error({0}) when opening {1}: {2}! I quit!".format(e.errno, path_to_file, e.strerror)) + print("I/O error({0}) when opening {1}: {2}".format(e.errno, path_to_file, e.strerror)) raise e return re_split(str_data) def remove_stop_words(word_list): - assert(type(word_list) is list), "I need a list!" + assert(type(word_list) is list), "Must be a list!" try: stop_words = get_stopwords() except IOError as e: - print("I/O error({0}) opening stops_words.txt: {1}! I quit!".format(e.errno, e.strerror)) + print("I/O error({0}) opening stops_words.txt: {1}".format(e.errno, e.strerror)) raise e return [w for w in word_list if not w in stop_words] diff --git a/16 其它/异常/3 所有错误的应对.py b/16 其它/异常/3 所有错误的应对.py index e8ee31d..5c1d8e7 100644 --- a/16 其它/异常/3 所有错误的应对.py +++ b/16 其它/异常/3 所有错误的应对.py @@ -2,18 +2,18 @@ from cppy.cp_util import * def extractWords(path_to_file): - assert(type(path_to_file) is str), "I need a string! I quit!" - assert(path_to_file), "I need a non-empty string! I quit!" + assert(type(path_to_file) is str), "Must be a string" + assert(path_to_file), "Must be a non-empty string" return extract_file_words(path_to_file) def frequencies(word_list): - assert(type(word_list) is list), "I need a list! I quit!" - assert(word_list != []), "I need a non-empty list! I quit!" + assert(type(word_list) is list), "Must be a list" + assert(word_list != []), "Must be a non-empty list" return get_frequencies(word_list) def sort(word_freqs): - assert(type(word_freqs) is dict), "I need a dictionary! I quit!" - assert(word_freqs != {}), "I need a non-empty dictionary! I quit!" + assert(type(word_freqs) is dict), "Must be a dictionary" + assert(word_freqs != {}), "Must be a non-empty dictionary" return sort_dict(word_freqs) diff --git a/16 其它/状态机/81B.py b/16 其它/状态机/81B.py deleted file mode 100644 index 413185e..0000000 --- a/16 其它/状态机/81B.py +++ /dev/null @@ -1,33 +0,0 @@ -# -*- coding: utf-8 -*- -import cppy.cp_util as util - -# 每一列是一个数据元素和一个公式,第一列是输入数据,所以没有公式 -all_words = [(), None] -non_stop_words = [(), util.extract_str_words] -frequencies = [(), util.get_frequencies] -sorted_data = [(), util.sort_dict] - -# 整个电子表格 -all_columns = [all_words, non_stop_words,\ - frequencies, sorted_data] - -# 每次输入数据后调用此方法 -def update(): - global all_columns - for c in all_columns[1::]: - if c[1] == util.extract_str_words: - c[0] = c[1](all_words[0]) - elif c[1] == util.get_frequencies: - c[0] = c[1](non_stop_words[0]) - elif c[1] == util.sort_dict: - c[0] = c[1](frequencies[0]) - -# 将固定数据加载到第一列中 -all_words[0] = util.read_file(util.testfilepath) -# 调用update函数遍历列表 -update() - -#打印结果 -util.print_word_freqs(sorted_data[0]) - -