feat: 修改了代码以实现新的功能或API；

fix: 修复了代码中的一个错误； refactor: 重写/重构代码，但没有改变任何API行为； style: 添加了空格、格式化、缺失的分号等； test: 添加了缺失的测试或修正了现有的测试； docs: 更新了如readme等的文档； build: 更新了依赖项、项目版本； ops: 影响了操作性组件，如基础设施、部署、备份、恢复； chore: 修改了.gitignore等；
1 year ago · e2eab49065
parent 36afa1d669
commit e2eab49065
10 changed files with 117 additions and 84 deletions
--- a/基础结构/函数/1
+++ b/基础结构/函数/1
@ -9,38 +9,46 @@ data = ''
 words = []
 word_freqs = []

+
 ################################
 # procedures
 ################################
 def read_file(path_to_file):
+    """读取文件内容，并赋值给全局变量data"""
    global data
-    with open(path_to_file,encoding='utf-8') as f:
+    with open(path_to_file, encoding='utf-8') as f:
        data = f.read()

-def extractwords():    
+
+def extractwords():
+    """提取data中的单词，并赋值给全局变量words"""
    global data
-    global words    
+    global words
    words = data.lower().split()
    with open(stopwordfilepath) as f:
-        stop_words = set(f.read().split(','))    
+        stop_words = set(f.read().split(','))
    stop_words.update(string.ascii_lowercase)
    words = [word for word in words if word not in stop_words]

-def frequencies():    
+
+def frequencies():
+    """统计words中单词的频率，并赋值给全局变量word_freqs"""
    global words
    global word_freqs
    word_freqs.extend([(word, 1) for word in words])

-def sort():    
-    global word_freqs    
+
+def sort():
+    """对word_freqs按照频率进行排序"""
+    global word_freqs
    word_freqs = Counter(words).most_common()


 if __name__ == "__main__":
-    read_file( testfilepath )
-    extractwords()    
+    read_file(testfilepath)
+    extractwords()
    frequencies()
    sort()

    for tf in word_freqs[:10]:
-        print(tf[0], '-', tf[1])
+        print(tf[0], '-', tf[1])
--- a/临时变量传递.py
+++ b/临时变量传递.py
@ -3,25 +3,30 @@ from cppy.cp_util import *


 def extractwords(str_data):
+    """提取单词"""
    pattern = re.compile('[\W_]+')
    word_list = pattern.sub(' ', str_data).lower().split()
-    stop_words = get_stopwords()    
-    return [w for w in word_list if not w in stop_words]
+    stop_words = get_stopwords()
+    return [w for w in word_list if w not in stop_words]

-def frequencies(word_list):    
-    word_freqs = {}  
-    for word in word_list:  
-        word_freqs[word] = word_freqs.get(word, 0) + 1    
+
+def frequencies(word_list):
+    """统计单词频率"""
+    word_freqs = {}
+    for word in word_list:
+        word_freqs[word] = word_freqs.get(word, 0) + 1
    return word_freqs

-def sort(word_freq):    
-    return sorted( word_freq.items(), key=lambda x: x[1], reverse=True )
+
+def sort(word_freq):
+    """对单词频率进行排序"""
+    return sorted(word_freq.items(), key=lambda x: x[1], reverse=True)


 if __name__ == "__main__":
-    txtcontent = read_file( testfilepath )
-    word_list = extractwords( txtcontent )
-    word_freqs = frequencies( word_list )     
-    word_sorts = sort ( word_freqs )
+    txtcontent = read_file(testfilepath)
+    word_list = extractwords(txtcontent)
+    word_freqs = frequencies(word_list)
+    word_sorts = sort(word_freqs)
    for tf in word_sorts[:10]:
-        print(tf[0], '-', tf[1])  
+        print(tf[0], '-', tf[1])
--- a/基础结构/函数/3
+++ b/基础结构/函数/3
@ -3,7 +3,7 @@ from collections import Counter
 from cppy.cp_util import *

 # 读取文件
-with open(testfilepath,encoding='utf-8') as f:
+with open(testfilepath, encoding='utf-8') as f:
    data = f.read().lower()  # 直接转换为小写

 # 过滤非字母字符
@ -20,6 +20,8 @@ words = [word for word in words if word not in stop_words]
 word_freqs = Counter(words)

 # 排序并打印
-sorted_word_freqs = sorted(word_freqs.items(), key=lambda x: x[1], reverse=True)
+sorted_word_freqs = sorted(word_freqs.items(),
+                           key=lambda x: x[1],
+                           reverse=True)

-print_word_freqs(sorted_word_freqs)
+print_word_freqs(sorted_word_freqs)
--- a/基础结构/函数/4
+++ b/基础结构/函数/4
@ -3,28 +3,31 @@ from collections import Counter

 stop_words = get_stopwords()

+
 def process_chunk(chunk):
    # 过滤停用词
-    words = [ w for w in chunk if ( not w in stop_words ) and len(w) >= 3 ]
+    words = [w for w in chunk if (w not in stop_words) and len(w) >= 3]
    return Counter(words)

-def process_chunks( chunks,word_freqs,x,max ):
-    next  = x + 1
+
+def process_chunks(chunks, word_freqs, x, max):
+    """递归处理分片"""
+    next = x + 1
    if next < max:
-        process_chunks(chunks,word_freqs,next,max)   
-    word_list = process_chunk(chunks[x])    
+        process_chunks(chunks, word_freqs, next, max)
+    word_list = process_chunk(chunks[x])
    word_freqs += Counter(word_list)

+
 # def process_chunks( chunks,word_freqs,x,max ):
-#     word_list = process_chunk(chunks[x])    
+#     word_list = process_chunk(chunks[x])
 #     word_freqs += Counter(word_list)
 #     next  = x + 1
 #     if next < max:
-#         process_chunks(chunks,word_freqs,next,max)   
-  
-  
+#         process_chunks(chunks,word_freqs,next,max)
+
 # 读数据，按1000个词一组分片
-chunks = get_chunks(testfilepath,2000)
+chunks = get_chunks(testfilepath, 2000)
 word_freqs = Counter()
-process_chunks( chunks,word_freqs,0,len(chunks) )
-print_word_freqs( word_freqs.most_common(10) )
+process_chunks(chunks, word_freqs, 0, len(chunks))
+print_word_freqs(word_freqs.most_common(10))
--- a/基础结构/流式调用/1
+++ b/基础结构/流式调用/1
@ -1,3 +1,3 @@
 from cppy.cp_util import *

-print_word_freqs( sort_dict ( get_frequencies ( extract_file_words(testfilepath) )))
+print_word_freqs(sort_dict(get_frequencies(extract_file_words(testfilepath))))
--- a/基础结构/流式调用/2
+++ b/基础结构/流式调用/2
@ -2,14 +2,16 @@ from cppy.cp_util import *

 # 如果有连续的对数据加工操作，而且总是把共同加工数据对象当第一个参数，可以用本文件夹方法提升阅读体验

+
 # 框架类
 class FunBind:
-    def bind(self, func,*args, **kwargs):
+
+    def bind(self, func, *args, **kwargs):
        try:
-            self.data = func(self.data,*args, **kwargs)
+            self.data = func(self.data, *args, **kwargs)
        except:
            self.data = func(*args, **kwargs)
-        return self    
+        return self


 data = FunBind()\
@ -19,10 +21,9 @@ data = FunBind()\
    .bind(print_word_freqs,10)\
    .data
 print(data)
-
 '''
 函数是自由函数,还是正常的函数写法
 使用，
  - 列举函数名，首部参数外的其它参数
  - 调用 data 得到最后数据
-'''
+'''
--- a/基础结构/流式调用/3
+++ b/基础结构/流式调用/3
@ -1,28 +1,31 @@
 from cppy.cp_util import *
-
 '''
 函数是自由函数,还是正常的函数写法
 使用，
  - 列举函数名，首部参数外的其它参数
  - 调用 data 得到最后数据
 '''
-  
+
+
 class FunPipe:
-    def __init__(self, func, *args, **kwargs):        
+
+    def __init__(self, func, *args, **kwargs):
        self.func = func
        self.args = args
        self.kwargs = kwargs

    def __or__(self, other):
-        _data = self.func(*self.args, **self.kwargs)                     
-        return FunPipe( other.func,_data,*other.args,**other.kwargs)
+        _data = self.func(*self.args, **self.kwargs)
+        return FunPipe(other.func, _data, *other.args, **other.kwargs)

    @property
    def data(self):
-        return self.func(*self.args, **self.kwargs)   
+        return self.func(*self.args, **self.kwargs)


 # 模仿管道
-pipe = FunPipe(extract_file_words,testfilepath) | FunPipe(get_frequencies) | FunPipe(sort_dict) | FunPipe(print_word_freqs, 10)
+pipe = FunPipe(
+    extract_file_words,
+    testfilepath) | FunPipe(get_frequencies) | FunPipe(sort_dict) | FunPipe(
+        print_word_freqs, 10)
 pipe.data
-
--- a/基础结构/流式调用/4
+++ b/基础结构/流式调用/4
@ -1,17 +1,19 @@
 from cppy.cp_util import *

+
 class Flow:
+
    def extract_file_words(self, filepath):
        self.data = extract_file_words(filepath)
        return self
-    
+
    def get_frequencies(self):
        self.data = get_frequencies(self.data)
        return self
-    
+
    def sort_dict(self):
        self.data = sort_dict(self.data)
-        return self        
+        return self

    def print_word_freqs(self, n):
        print_word_freqs(self.data, n)
@ -19,11 +21,11 @@ class Flow:


 # 顺序调用
-Flow().extract_file_words(testfilepath).get_frequencies().sort_dict().print_word_freqs(10)
-
+Flow().extract_file_words(
+    testfilepath).get_frequencies().sort_dict().print_word_freqs(10)
 '''
 连续方法调用，看起来比较舒服
 但是需要假设：
 - 每一个类方法返回 self  ：否则，没法连续
 - 类方法默认不写第一个参数，数据都在 .data 里面
-'''
+'''
--- a/基础结构/流式调用/5
+++ b/基础结构/流式调用/5
@ -1,45 +1,50 @@
 from cppy.cp_util import *

+
 # 装饰器改写类
 # - 找到以f_开头的方法
 # - 将方法函数的返回值赋值给对象的data属性
-# - 返回对象自身 
+# - 返回对象自身
 def return_self_decorator(cls):
+
    def return_self(func):
        # 定义一个闭包函数，用于接收参数
-        def wrapper(self, *args, **kwargs):            
-            self.data = func(self, *args, **kwargs)            
-            return self # 返回类自身        
+        def wrapper(self, *args, **kwargs):
+            self.data = func(self, *args, **kwargs)
+            return self  # 返回类自身
+
        return wrapper
-    
+
    for name, method in cls.__dict__.items():
        # 判断属性是否可调用，且属性名以f_开头
-        if callable(method) and name.startswith('f_'):            
+        if callable(method) and name.startswith('f_'):
            # 为类改写属性，将封装后的函数赋值
-            setattr(cls, name, return_self(method))    
+            setattr(cls, name, return_self(method))
    return cls

+
@return_self_decorator
-class Flow():        
+class Flow():
+
    def test(self):
        return 'test'
-    
+
    def f_extract_file_words(self, filepath):
-        return extract_file_words(filepath)        
-    
+        return extract_file_words(filepath)
+
    def f_get_frequencies(self):
-        return get_frequencies(self.data)        
-    
+        return get_frequencies(self.data)
+
    def f_sort_dict(self):
-        return sort_dict(self.data)        
+        return sort_dict(self.data)

    def f_print_word_freqs(self, n):
-        print_word_freqs(self.data, n)        
+        print_word_freqs(self.data, n)


 # 顺序调用
-Flow().f_extract_file_words(testfilepath).f_get_frequencies().f_sort_dict().f_print_word_freqs(10)
-
+Flow().f_extract_file_words(
+    testfilepath).f_get_frequencies().f_sort_dict().f_print_word_freqs(10)
 '''
 改写后，参与 function flow 功能的方法 
 - 需要以 'f_' 开头
@ -47,4 +52,4 @@ Flow().f_extract_file_words(testfilepath).f_get_frequencies().f_sort_dict().f_pr

 仍旧需要特殊的方法写法
 所以，还是 1，2种方法比较自然
-'''
+'''
--- a/类生成器/元类.py
+++ b/类生成器/元类.py
@ -1,26 +1,30 @@
 from cppy.cp_util import *
 from collections import Counter

+
 # 定义一个带计数器的元类
 class CounterMetaclass(type):
-    def __new__(mcs, name, bases, attrs):         
-        attrs['_counter'] =  Counter()
+
+    def __new__(mcs, name, bases, attrs):
+        attrs['_counter'] = Counter()
        return super().__new__(mcs, name, bases, attrs)

+
 # 基于元类创建类
-class Word( metaclass=CounterMetaclass ):
+class Word(metaclass=CounterMetaclass):
+
    def __init__(self, word):
        self.word = word
        self._counter[self.word] += 1

    @classmethod
-    def get_word_freqs(cls,n) -> Counter:
+    def get_word_freqs(cls, n) -> Counter:
        return cls._counter.most_common(n)
-    

-for word in  extract_file_words ( testfilepath ) :  Word(word)
-print_word_freqs(Word.get_word_freqs(10))

+for word in extract_file_words(testfilepath):
+    Word(word)
+print_word_freqs(Word.get_word_freqs(10))
 '''
 常用于将依赖项（如服务或配置）自动注入到类中。
-'''
+'''