From e2eab490655e60699bdbcc56b50caa7c0bef4ab4 Mon Sep 17 00:00:00 2001 From: Yao <1928814540@qq.com> Date: Fri, 13 Sep 2024 14:35:19 +0800 Subject: [PATCH] =?UTF-8?q?feat:=20=E4=BF=AE=E6=94=B9=E4=BA=86=E4=BB=A3?= =?UTF-8?q?=E7=A0=81=E4=BB=A5=E5=AE=9E=E7=8E=B0=E6=96=B0=E7=9A=84=E5=8A=9F?= =?UTF-8?q?=E8=83=BD=E6=88=96API=EF=BC=9B=20fix:=20=E4=BF=AE=E5=A4=8D?= =?UTF-8?q?=E4=BA=86=E4=BB=A3=E7=A0=81=E4=B8=AD=E7=9A=84=E4=B8=80=E4=B8=AA?= =?UTF-8?q?=E9=94=99=E8=AF=AF=EF=BC=9B=20refactor:=20=E9=87=8D=E5=86=99/?= =?UTF-8?q?=E9=87=8D=E6=9E=84=E4=BB=A3=E7=A0=81=EF=BC=8C=E4=BD=86=E6=B2=A1?= =?UTF-8?q?=E6=9C=89=E6=94=B9=E5=8F=98=E4=BB=BB=E4=BD=95API=E8=A1=8C?= =?UTF-8?q?=E4=B8=BA=EF=BC=9B=20style:=20=E6=B7=BB=E5=8A=A0=E4=BA=86?= =?UTF-8?q?=E7=A9=BA=E6=A0=BC=E3=80=81=E6=A0=BC=E5=BC=8F=E5=8C=96=E3=80=81?= =?UTF-8?q?=E7=BC=BA=E5=A4=B1=E7=9A=84=E5=88=86=E5=8F=B7=E7=AD=89=EF=BC=9B?= =?UTF-8?q?=20test:=20=E6=B7=BB=E5=8A=A0=E4=BA=86=E7=BC=BA=E5=A4=B1?= =?UTF-8?q?=E7=9A=84=E6=B5=8B=E8=AF=95=E6=88=96=E4=BF=AE=E6=AD=A3=E4=BA=86?= =?UTF-8?q?=E7=8E=B0=E6=9C=89=E7=9A=84=E6=B5=8B=E8=AF=95=EF=BC=9B=20docs:?= =?UTF-8?q?=20=E6=9B=B4=E6=96=B0=E4=BA=86=E5=A6=82readme=E7=AD=89=E7=9A=84?= =?UTF-8?q?=E6=96=87=E6=A1=A3=EF=BC=9B=20build:=20=E6=9B=B4=E6=96=B0?= =?UTF-8?q?=E4=BA=86=E4=BE=9D=E8=B5=96=E9=A1=B9=E3=80=81=E9=A1=B9=E7=9B=AE?= =?UTF-8?q?=E7=89=88=E6=9C=AC=EF=BC=9B=20ops:=20=E5=BD=B1=E5=93=8D?= =?UTF-8?q?=E4=BA=86=E6=93=8D=E4=BD=9C=E6=80=A7=E7=BB=84=E4=BB=B6=EF=BC=8C?= =?UTF-8?q?=E5=A6=82=E5=9F=BA=E7=A1=80=E8=AE=BE=E6=96=BD=E3=80=81=E9=83=A8?= =?UTF-8?q?=E7=BD=B2=E3=80=81=E5=A4=87=E4=BB=BD=E3=80=81=E6=81=A2=E5=A4=8D?= =?UTF-8?q?=EF=BC=9B=20chore:=20=E4=BF=AE=E6=94=B9=E4=BA=86.gitignore?= =?UTF-8?q?=E7=AD=89=EF=BC=9B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../11 基础结构/函数/1 全局变量.py | 28 ++++++++----- .../函数/2 临时变量传递.py | 31 ++++++++------ .../11 基础结构/函数/3 常见风格.py | 8 ++-- .../11 基础结构/函数/4 递归.py | 27 ++++++------ .../流式调用/1 嵌套调用.py | 2 +- .../流式调用/2 方法bind.py | 11 ++--- .../流式调用/3 重载管道.py | 19 +++++---- .../流式调用/4 类方法.py | 14 ++++--- .../流式调用/5 类方法.py | 41 +++++++++++-------- .../12 语言特性/1 类生成器/元类.py | 20 +++++---- 10 files changed, 117 insertions(+), 84 deletions(-) diff --git a/A 代码模式/11 基础结构/函数/1 全局变量.py b/A 代码模式/11 基础结构/函数/1 全局变量.py index aa37ebe..dbffaf0 100644 --- a/A 代码模式/11 基础结构/函数/1 全局变量.py +++ b/A 代码模式/11 基础结构/函数/1 全局变量.py @@ -9,38 +9,46 @@ data = '' words = [] word_freqs = [] + ################################ # procedures ################################ def read_file(path_to_file): + """读取文件内容,并赋值给全局变量data""" global data - with open(path_to_file,encoding='utf-8') as f: + with open(path_to_file, encoding='utf-8') as f: data = f.read() -def extractwords(): + +def extractwords(): + """提取data中的单词,并赋值给全局变量words""" global data - global words + global words words = data.lower().split() with open(stopwordfilepath) as f: - stop_words = set(f.read().split(',')) + stop_words = set(f.read().split(',')) stop_words.update(string.ascii_lowercase) words = [word for word in words if word not in stop_words] -def frequencies(): + +def frequencies(): + """统计words中单词的频率,并赋值给全局变量word_freqs""" global words global word_freqs word_freqs.extend([(word, 1) for word in words]) -def sort(): - global word_freqs + +def sort(): + """对word_freqs按照频率进行排序""" + global word_freqs word_freqs = Counter(words).most_common() if __name__ == "__main__": - read_file( testfilepath ) - extractwords() + read_file(testfilepath) + extractwords() frequencies() sort() for tf in word_freqs[:10]: - print(tf[0], '-', tf[1]) \ No newline at end of file + print(tf[0], '-', tf[1]) diff --git a/A 代码模式/11 基础结构/函数/2 临时变量传递.py b/A 代码模式/11 基础结构/函数/2 临时变量传递.py index 1059799..537c4f4 100644 --- a/A 代码模式/11 基础结构/函数/2 临时变量传递.py +++ b/A 代码模式/11 基础结构/函数/2 临时变量传递.py @@ -3,25 +3,30 @@ from cppy.cp_util import * def extractwords(str_data): + """提取单词""" pattern = re.compile('[\W_]+') word_list = pattern.sub(' ', str_data).lower().split() - stop_words = get_stopwords() - return [w for w in word_list if not w in stop_words] + stop_words = get_stopwords() + return [w for w in word_list if w not in stop_words] -def frequencies(word_list): - word_freqs = {} - for word in word_list: - word_freqs[word] = word_freqs.get(word, 0) + 1 + +def frequencies(word_list): + """统计单词频率""" + word_freqs = {} + for word in word_list: + word_freqs[word] = word_freqs.get(word, 0) + 1 return word_freqs -def sort(word_freq): - return sorted( word_freq.items(), key=lambda x: x[1], reverse=True ) + +def sort(word_freq): + """对单词频率进行排序""" + return sorted(word_freq.items(), key=lambda x: x[1], reverse=True) if __name__ == "__main__": - txtcontent = read_file( testfilepath ) - word_list = extractwords( txtcontent ) - word_freqs = frequencies( word_list ) - word_sorts = sort ( word_freqs ) + txtcontent = read_file(testfilepath) + word_list = extractwords(txtcontent) + word_freqs = frequencies(word_list) + word_sorts = sort(word_freqs) for tf in word_sorts[:10]: - print(tf[0], '-', tf[1]) \ No newline at end of file + print(tf[0], '-', tf[1]) diff --git a/A 代码模式/11 基础结构/函数/3 常见风格.py b/A 代码模式/11 基础结构/函数/3 常见风格.py index 38bb19c..7a72632 100644 --- a/A 代码模式/11 基础结构/函数/3 常见风格.py +++ b/A 代码模式/11 基础结构/函数/3 常见风格.py @@ -3,7 +3,7 @@ from collections import Counter from cppy.cp_util import * # 读取文件 -with open(testfilepath,encoding='utf-8') as f: +with open(testfilepath, encoding='utf-8') as f: data = f.read().lower() # 直接转换为小写 # 过滤非字母字符 @@ -20,6 +20,8 @@ words = [word for word in words if word not in stop_words] word_freqs = Counter(words) # 排序并打印 -sorted_word_freqs = sorted(word_freqs.items(), key=lambda x: x[1], reverse=True) +sorted_word_freqs = sorted(word_freqs.items(), + key=lambda x: x[1], + reverse=True) -print_word_freqs(sorted_word_freqs) \ No newline at end of file +print_word_freqs(sorted_word_freqs) diff --git a/A 代码模式/11 基础结构/函数/4 递归.py b/A 代码模式/11 基础结构/函数/4 递归.py index d38efa5..f4c7ca8 100644 --- a/A 代码模式/11 基础结构/函数/4 递归.py +++ b/A 代码模式/11 基础结构/函数/4 递归.py @@ -3,28 +3,31 @@ from collections import Counter stop_words = get_stopwords() + def process_chunk(chunk): # 过滤停用词 - words = [ w for w in chunk if ( not w in stop_words ) and len(w) >= 3 ] + words = [w for w in chunk if (w not in stop_words) and len(w) >= 3] return Counter(words) -def process_chunks( chunks,word_freqs,x,max ): - next = x + 1 + +def process_chunks(chunks, word_freqs, x, max): + """递归处理分片""" + next = x + 1 if next < max: - process_chunks(chunks,word_freqs,next,max) - word_list = process_chunk(chunks[x]) + process_chunks(chunks, word_freqs, next, max) + word_list = process_chunk(chunks[x]) word_freqs += Counter(word_list) + # def process_chunks( chunks,word_freqs,x,max ): -# word_list = process_chunk(chunks[x]) +# word_list = process_chunk(chunks[x]) # word_freqs += Counter(word_list) # next = x + 1 # if next < max: -# process_chunks(chunks,word_freqs,next,max) - - +# process_chunks(chunks,word_freqs,next,max) + # 读数据,按1000个词一组分片 -chunks = get_chunks(testfilepath,2000) +chunks = get_chunks(testfilepath, 2000) word_freqs = Counter() -process_chunks( chunks,word_freqs,0,len(chunks) ) -print_word_freqs( word_freqs.most_common(10) ) \ No newline at end of file +process_chunks(chunks, word_freqs, 0, len(chunks)) +print_word_freqs(word_freqs.most_common(10)) diff --git a/A 代码模式/11 基础结构/流式调用/1 嵌套调用.py b/A 代码模式/11 基础结构/流式调用/1 嵌套调用.py index 777e689..7fb1ed5 100644 --- a/A 代码模式/11 基础结构/流式调用/1 嵌套调用.py +++ b/A 代码模式/11 基础结构/流式调用/1 嵌套调用.py @@ -1,3 +1,3 @@ from cppy.cp_util import * -print_word_freqs( sort_dict ( get_frequencies ( extract_file_words(testfilepath) ))) \ No newline at end of file +print_word_freqs(sort_dict(get_frequencies(extract_file_words(testfilepath)))) diff --git a/A 代码模式/11 基础结构/流式调用/2 方法bind.py b/A 代码模式/11 基础结构/流式调用/2 方法bind.py index 2df6ba5..b38df5a 100644 --- a/A 代码模式/11 基础结构/流式调用/2 方法bind.py +++ b/A 代码模式/11 基础结构/流式调用/2 方法bind.py @@ -2,14 +2,16 @@ from cppy.cp_util import * # 如果有连续的对数据加工操作,而且总是把共同加工数据对象当第一个参数,可以用本文件夹方法提升阅读体验 + # 框架类 class FunBind: - def bind(self, func,*args, **kwargs): + + def bind(self, func, *args, **kwargs): try: - self.data = func(self.data,*args, **kwargs) + self.data = func(self.data, *args, **kwargs) except: self.data = func(*args, **kwargs) - return self + return self data = FunBind()\ @@ -19,10 +21,9 @@ data = FunBind()\ .bind(print_word_freqs,10)\ .data print(data) - ''' 函数是自由函数,还是正常的函数写法 使用, - 列举函数名,首部参数外的其它参数 - 调用 data 得到最后数据 -''' \ No newline at end of file +''' diff --git a/A 代码模式/11 基础结构/流式调用/3 重载管道.py b/A 代码模式/11 基础结构/流式调用/3 重载管道.py index 6e3ec8a..7852927 100644 --- a/A 代码模式/11 基础结构/流式调用/3 重载管道.py +++ b/A 代码模式/11 基础结构/流式调用/3 重载管道.py @@ -1,28 +1,31 @@ from cppy.cp_util import * - ''' 函数是自由函数,还是正常的函数写法 使用, - 列举函数名,首部参数外的其它参数 - 调用 data 得到最后数据 ''' - + + class FunPipe: - def __init__(self, func, *args, **kwargs): + + def __init__(self, func, *args, **kwargs): self.func = func self.args = args self.kwargs = kwargs def __or__(self, other): - _data = self.func(*self.args, **self.kwargs) - return FunPipe( other.func,_data,*other.args,**other.kwargs) + _data = self.func(*self.args, **self.kwargs) + return FunPipe(other.func, _data, *other.args, **other.kwargs) @property def data(self): - return self.func(*self.args, **self.kwargs) + return self.func(*self.args, **self.kwargs) # 模仿管道 -pipe = FunPipe(extract_file_words,testfilepath) | FunPipe(get_frequencies) | FunPipe(sort_dict) | FunPipe(print_word_freqs, 10) +pipe = FunPipe( + extract_file_words, + testfilepath) | FunPipe(get_frequencies) | FunPipe(sort_dict) | FunPipe( + print_word_freqs, 10) pipe.data - diff --git a/A 代码模式/11 基础结构/流式调用/4 类方法.py b/A 代码模式/11 基础结构/流式调用/4 类方法.py index f124fb2..0b28388 100644 --- a/A 代码模式/11 基础结构/流式调用/4 类方法.py +++ b/A 代码模式/11 基础结构/流式调用/4 类方法.py @@ -1,17 +1,19 @@ from cppy.cp_util import * + class Flow: + def extract_file_words(self, filepath): self.data = extract_file_words(filepath) return self - + def get_frequencies(self): self.data = get_frequencies(self.data) return self - + def sort_dict(self): self.data = sort_dict(self.data) - return self + return self def print_word_freqs(self, n): print_word_freqs(self.data, n) @@ -19,11 +21,11 @@ class Flow: # 顺序调用 -Flow().extract_file_words(testfilepath).get_frequencies().sort_dict().print_word_freqs(10) - +Flow().extract_file_words( + testfilepath).get_frequencies().sort_dict().print_word_freqs(10) ''' 连续方法调用,看起来比较舒服 但是需要假设: - 每一个类方法返回 self :否则,没法连续 - 类方法默认不写第一个参数,数据都在 .data 里面 -''' \ No newline at end of file +''' diff --git a/A 代码模式/11 基础结构/流式调用/5 类方法.py b/A 代码模式/11 基础结构/流式调用/5 类方法.py index 4de00c1..4344c39 100644 --- a/A 代码模式/11 基础结构/流式调用/5 类方法.py +++ b/A 代码模式/11 基础结构/流式调用/5 类方法.py @@ -1,45 +1,50 @@ from cppy.cp_util import * + # 装饰器改写类 # - 找到以f_开头的方法 # - 将方法函数的返回值赋值给对象的data属性 -# - 返回对象自身 +# - 返回对象自身 def return_self_decorator(cls): + def return_self(func): # 定义一个闭包函数,用于接收参数 - def wrapper(self, *args, **kwargs): - self.data = func(self, *args, **kwargs) - return self # 返回类自身 + def wrapper(self, *args, **kwargs): + self.data = func(self, *args, **kwargs) + return self # 返回类自身 + return wrapper - + for name, method in cls.__dict__.items(): # 判断属性是否可调用,且属性名以f_开头 - if callable(method) and name.startswith('f_'): + if callable(method) and name.startswith('f_'): # 为类改写属性,将封装后的函数赋值 - setattr(cls, name, return_self(method)) + setattr(cls, name, return_self(method)) return cls + @return_self_decorator -class Flow(): +class Flow(): + def test(self): return 'test' - + def f_extract_file_words(self, filepath): - return extract_file_words(filepath) - + return extract_file_words(filepath) + def f_get_frequencies(self): - return get_frequencies(self.data) - + return get_frequencies(self.data) + def f_sort_dict(self): - return sort_dict(self.data) + return sort_dict(self.data) def f_print_word_freqs(self, n): - print_word_freqs(self.data, n) + print_word_freqs(self.data, n) # 顺序调用 -Flow().f_extract_file_words(testfilepath).f_get_frequencies().f_sort_dict().f_print_word_freqs(10) - +Flow().f_extract_file_words( + testfilepath).f_get_frequencies().f_sort_dict().f_print_word_freqs(10) ''' 改写后,参与 function flow 功能的方法 - 需要以 'f_' 开头 @@ -47,4 +52,4 @@ Flow().f_extract_file_words(testfilepath).f_get_frequencies().f_sort_dict().f_pr 仍旧需要特殊的方法写法 所以,还是 1,2种方法比较自然 -''' \ No newline at end of file +''' diff --git a/A 代码模式/12 语言特性/1 类生成器/元类.py b/A 代码模式/12 语言特性/1 类生成器/元类.py index 53c9dc1..d69ab13 100644 --- a/A 代码模式/12 语言特性/1 类生成器/元类.py +++ b/A 代码模式/12 语言特性/1 类生成器/元类.py @@ -1,26 +1,30 @@ from cppy.cp_util import * from collections import Counter + # 定义一个带计数器的元类 class CounterMetaclass(type): - def __new__(mcs, name, bases, attrs): - attrs['_counter'] = Counter() + + def __new__(mcs, name, bases, attrs): + attrs['_counter'] = Counter() return super().__new__(mcs, name, bases, attrs) + # 基于元类创建类 -class Word( metaclass=CounterMetaclass ): +class Word(metaclass=CounterMetaclass): + def __init__(self, word): self.word = word self._counter[self.word] += 1 @classmethod - def get_word_freqs(cls,n) -> Counter: + def get_word_freqs(cls, n) -> Counter: return cls._counter.most_common(n) - -for word in extract_file_words ( testfilepath ) : Word(word) -print_word_freqs(Word.get_word_freqs(10)) +for word in extract_file_words(testfilepath): + Word(word) +print_word_freqs(Word.get_word_freqs(10)) ''' 常用于将依赖项(如服务或配置)自动注入到类中。 -''' \ No newline at end of file +'''