feat: 修改了代码以实现新的功能或API；

fix: 修复了代码中的一个错误； refactor: 重写/重构代码，但没有改变任何API行为； style: 添加了空格、格式化、缺失的分号等； test: 添加了缺失的测试或修正了现有的测试； docs: 更新了如readme等的文档； build: 更新了依赖项、项目版本； ops: 影响了操作性组件，如基础设施、部署、备份、恢复； chore: 修改了.gitignore等；
2 years ago · e2eab49065
parent 36afa1d669
commit e2eab49065
10 changed files with 117 additions and 84 deletions
--- a/基础结构/函数/1
+++ b/基础结构/函数/1
@ -9,38 +9,46 @@ data = ''
 words = []
 word_freqs = []
 ################################
 # procedures
 ################################
 def read_file(path_to_file):
    """读取文件内容，并赋值给全局变量data"""
    global data
-    with open(path_to_file,encoding='utf-8') as f:
+    with open(path_to_file, encoding='utf-8') as f:
        data = f.read()
-def extractwords():    
+
 def extractwords():
    """提取data中的单词，并赋值给全局变量words"""
    global data
-    global words    
+    global words
    words = data.lower().split()
    with open(stopwordfilepath) as f:
-        stop_words = set(f.read().split(','))    
+        stop_words = set(f.read().split(','))
    stop_words.update(string.ascii_lowercase)
    words = [word for word in words if word not in stop_words]
-def frequencies():    
+
 def frequencies():
    """统计words中单词的频率，并赋值给全局变量word_freqs"""
    global words
    global word_freqs
    word_freqs.extend([(word, 1) for word in words])
-def sort():    
+
-    global word_freqs    
+def sort():
    """对word_freqs按照频率进行排序"""
    global word_freqs
    word_freqs = Counter(words).most_common()
 if __name__ == "__main__":
-    read_file( testfilepath )
+    read_file(testfilepath)
-    extractwords()    
+    extractwords()
    frequencies()
    sort()
    for tf in word_freqs[:10]:
-        print(tf[0], '-', tf[1])
+        print(tf[0], '-', tf[1])
--- a/临时变量传递.py
+++ b/临时变量传递.py
@ -3,25 +3,30 @@ from cppy.cp_util import *
 def extractwords(str_data):
    """提取单词"""
    pattern = re.compile('[\W_]+')
    word_list = pattern.sub(' ', str_data).lower().split()
-    stop_words = get_stopwords()    
+    stop_words = get_stopwords()
-    return [w for w in word_list if not w in stop_words]
+    return [w for w in word_list if w not in stop_words]
-def frequencies(word_list):    
+
-    word_freqs = {}  
+def frequencies(word_list):
-    for word in word_list:  
+    """统计单词频率"""
-        word_freqs[word] = word_freqs.get(word, 0) + 1    
+    word_freqs = {}
    for word in word_list:
        word_freqs[word] = word_freqs.get(word, 0) + 1
    return word_freqs
-def sort(word_freq):    
+
-    return sorted( word_freq.items(), key=lambda x: x[1], reverse=True )
+def sort(word_freq):
    """对单词频率进行排序"""
    return sorted(word_freq.items(), key=lambda x: x[1], reverse=True)
 if __name__ == "__main__":
-    txtcontent = read_file( testfilepath )
+    txtcontent = read_file(testfilepath)
-    word_list = extractwords( txtcontent )
+    word_list = extractwords(txtcontent)
-    word_freqs = frequencies( word_list )     
+    word_freqs = frequencies(word_list)
-    word_sorts = sort ( word_freqs )
+    word_sorts = sort(word_freqs)
    for tf in word_sorts[:10]:
-        print(tf[0], '-', tf[1])  
+        print(tf[0], '-', tf[1])
--- a/基础结构/函数/3
+++ b/基础结构/函数/3
@ -3,7 +3,7 @@ from collections import Counter
 from cppy.cp_util import *
 # 读取文件
-with open(testfilepath,encoding='utf-8') as f:
+with open(testfilepath, encoding='utf-8') as f:
    data = f.read().lower()  # 直接转换为小写
 # 过滤非字母字符
@ -20,6 +20,8 @@ words = [word for word in words if word not in stop_words]
 word_freqs = Counter(words)
 # 排序并打印
-sorted_word_freqs = sorted(word_freqs.items(), key=lambda x: x[1], reverse=True)
+sorted_word_freqs = sorted(word_freqs.items(),
                           key=lambda x: x[1],
                           reverse=True)
-print_word_freqs(sorted_word_freqs)
+print_word_freqs(sorted_word_freqs)
--- a/基础结构/函数/4
+++ b/基础结构/函数/4
@ -3,28 +3,31 @@ from collections import Counter
 stop_words = get_stopwords()
 def process_chunk(chunk):
    # 过滤停用词
-    words = [ w for w in chunk if ( not w in stop_words ) and len(w) >= 3 ]
+    words = [w for w in chunk if (w not in stop_words) and len(w) >= 3]
    return Counter(words)
-def process_chunks( chunks,word_freqs,x,max ):
+
-    next  = x + 1
+def process_chunks(chunks, word_freqs, x, max):
    """递归处理分片"""
    next = x + 1
    if next < max:
-        process_chunks(chunks,word_freqs,next,max)   
+        process_chunks(chunks, word_freqs, next, max)
-    word_list = process_chunk(chunks[x])    
+    word_list = process_chunk(chunks[x])
    word_freqs += Counter(word_list)
 # def process_chunks( chunks,word_freqs,x,max ):
-#     word_list = process_chunk(chunks[x])    
+#     word_list = process_chunk(chunks[x])
 #     word_freqs += Counter(word_list)
 #     next  = x + 1
 #     if next < max:
-#         process_chunks(chunks,word_freqs,next,max)   
+#         process_chunks(chunks,word_freqs,next,max)
-  
+
 # 读数据，按1000个词一组分片
-chunks = get_chunks(testfilepath,2000)
+chunks = get_chunks(testfilepath, 2000)
 word_freqs = Counter()
-process_chunks( chunks,word_freqs,0,len(chunks) )
+process_chunks(chunks, word_freqs, 0, len(chunks))
-print_word_freqs( word_freqs.most_common(10) )
+print_word_freqs(word_freqs.most_common(10))
--- a/基础结构/流式调用/1
+++ b/基础结构/流式调用/1
@ -1,3 +1,3 @@
 from cppy.cp_util import *
-print_word_freqs( sort_dict ( get_frequencies ( extract_file_words(testfilepath) )))
+print_word_freqs(sort_dict(get_frequencies(extract_file_words(testfilepath))))
--- a/基础结构/流式调用/2
+++ b/基础结构/流式调用/2
@ -2,14 +2,16 @@ from cppy.cp_util import *
 # 如果有连续的对数据加工操作，而且总是把共同加工数据对象当第一个参数，可以用本文件夹方法提升阅读体验
 # 框架类
 class FunBind:
-    def bind(self, func,*args, **kwargs):
+
    def bind(self, func, *args, **kwargs):
        try:
-            self.data = func(self.data,*args, **kwargs)
+            self.data = func(self.data, *args, **kwargs)
        except:
            self.data = func(*args, **kwargs)
-        return self    
+        return self
 data = FunBind()\
@ -19,10 +21,9 @@ data = FunBind()\
    .bind(print_word_freqs,10)\
    .data
 print(data)
 '''
 函数是自由函数,还是正常的函数写法
 使用，
  - 列举函数名，首部参数外的其它参数
  - 调用 data 得到最后数据
-'''
+'''
--- a/基础结构/流式调用/3
+++ b/基础结构/流式调用/3
@ -1,28 +1,31 @@
 from cppy.cp_util import *
 '''
 函数是自由函数,还是正常的函数写法
 使用，
  - 列举函数名，首部参数外的其它参数
  - 调用 data 得到最后数据
 '''
-  
+
 class FunPipe:
-    def __init__(self, func, *args, **kwargs):        
+
    def __init__(self, func, *args, **kwargs):
        self.func = func
        self.args = args
        self.kwargs = kwargs
    def __or__(self, other):
-        _data = self.func(*self.args, **self.kwargs)                     
+        _data = self.func(*self.args, **self.kwargs)
-        return FunPipe( other.func,_data,*other.args,**other.kwargs)
+        return FunPipe(other.func, _data, *other.args, **other.kwargs)
    @property
    def data(self):
-        return self.func(*self.args, **self.kwargs)   
+        return self.func(*self.args, **self.kwargs)
 # 模仿管道
-pipe = FunPipe(extract_file_words,testfilepath) | FunPipe(get_frequencies) | FunPipe(sort_dict) | FunPipe(print_word_freqs, 10)
+pipe = FunPipe(
    extract_file_words,
    testfilepath) | FunPipe(get_frequencies) | FunPipe(sort_dict) | FunPipe(
        print_word_freqs, 10)
 pipe.data
--- a/基础结构/流式调用/4
+++ b/基础结构/流式调用/4
@ -1,17 +1,19 @@
 from cppy.cp_util import *
 class Flow:
    def extract_file_words(self, filepath):
        self.data = extract_file_words(filepath)
        return self
-    
+
    def get_frequencies(self):
        self.data = get_frequencies(self.data)
        return self
-    
+
    def sort_dict(self):
        self.data = sort_dict(self.data)
-        return self        
+        return self
    def print_word_freqs(self, n):
        print_word_freqs(self.data, n)
@ -19,11 +21,11 @@ class Flow:
 # 顺序调用
-Flow().extract_file_words(testfilepath).get_frequencies().sort_dict().print_word_freqs(10)
+Flow().extract_file_words(
-
+    testfilepath).get_frequencies().sort_dict().print_word_freqs(10)
 '''
 连续方法调用，看起来比较舒服
 但是需要假设：
 - 每一个类方法返回 self  ：否则，没法连续
 - 类方法默认不写第一个参数，数据都在 .data 里面
-'''
+'''
--- a/基础结构/流式调用/5
+++ b/基础结构/流式调用/5
@ -1,45 +1,50 @@
 from cppy.cp_util import *
 # 装饰器改写类
 # - 找到以f_开头的方法
 # - 将方法函数的返回值赋值给对象的data属性
-# - 返回对象自身 
+# - 返回对象自身
 def return_self_decorator(cls):
    def return_self(func):
        # 定义一个闭包函数，用于接收参数
-        def wrapper(self, *args, **kwargs):            
+        def wrapper(self, *args, **kwargs):
-            self.data = func(self, *args, **kwargs)            
+            self.data = func(self, *args, **kwargs)
-            return self # 返回类自身        
+            return self  # 返回类自身
        return wrapper
-    
+
    for name, method in cls.__dict__.items():
        # 判断属性是否可调用，且属性名以f_开头
-        if callable(method) and name.startswith('f_'):            
+        if callable(method) and name.startswith('f_'):
            # 为类改写属性，将封装后的函数赋值
-            setattr(cls, name, return_self(method))    
+            setattr(cls, name, return_self(method))
    return cls
@return_self_decorator
-class Flow():        
+class Flow():
    def test(self):
        return 'test'
-    
+
    def f_extract_file_words(self, filepath):
-        return extract_file_words(filepath)        
+        return extract_file_words(filepath)
-    
+
    def f_get_frequencies(self):
-        return get_frequencies(self.data)        
+        return get_frequencies(self.data)
-    
+
    def f_sort_dict(self):
-        return sort_dict(self.data)        
+        return sort_dict(self.data)
    def f_print_word_freqs(self, n):
-        print_word_freqs(self.data, n)        
+        print_word_freqs(self.data, n)
 # 顺序调用
-Flow().f_extract_file_words(testfilepath).f_get_frequencies().f_sort_dict().f_print_word_freqs(10)
+Flow().f_extract_file_words(
-
+    testfilepath).f_get_frequencies().f_sort_dict().f_print_word_freqs(10)
 '''
 改写后，参与 function flow 功能的方法 
 - 需要以 'f_' 开头
@ -47,4 +52,4 @@ Flow().f_extract_file_words(testfilepath).f_get_frequencies().f_sort_dict().f_pr
 仍旧需要特殊的方法写法
 所以，还是 1，2种方法比较自然
-'''
+'''
--- a/类生成器/元类.py
+++ b/类生成器/元类.py
@ -1,26 +1,30 @@
 from cppy.cp_util import *
 from collections import Counter
 # 定义一个带计数器的元类
 class CounterMetaclass(type):
-    def __new__(mcs, name, bases, attrs):         
+
-        attrs['_counter'] =  Counter()
+    def __new__(mcs, name, bases, attrs):
        attrs['_counter'] = Counter()
        return super().__new__(mcs, name, bases, attrs)
 # 基于元类创建类
-class Word( metaclass=CounterMetaclass ):
+class Word(metaclass=CounterMetaclass):
    def __init__(self, word):
        self.word = word
        self._counter[self.word] += 1
    @classmethod
-    def get_word_freqs(cls,n) -> Counter:
+    def get_word_freqs(cls, n) -> Counter:
        return cls._counter.most_common(n)
 for word in  extract_file_words ( testfilepath ) :  Word(word)
 print_word_freqs(Word.get_word_freqs(10))
 for word in extract_file_words(testfilepath):
    Word(word)
 print_word_freqs(Word.get_word_freqs(10))
 '''
 常用于将依赖项（如服务或配置）自动注入到类中。
-'''
+'''
`@ -1,3 +1,3 @@`
	`from cppy.cp_util import *`	`from cppy.cp_util import *`

	`print_word_freqs( sort_dict ( get_frequencies ( extract_file_words(testfilepath) )))`	`print_word_freqs(sort_dict(get_frequencies(extract_file_words(testfilepath))))`