feat: 修改了代码以实现新的功能或API；

fix: 修复了代码中的一个错误； refactor: 重写/重构代码，但没有改变任何API行为； style: 添加了空格、格式化、缺失的分号等； test: 添加了缺失的测试或修正了现有的测试； docs: 更新了如readme等的文档； build: 更新了依赖项、项目版本； ops: 影响了操作性组件，如基础设施、部署、备份、恢复； chore: 修改了.gitignore等；
2 months ago · e2eab49065
parent 36afa1d669
commit e2eab49065
10 changed files with 117 additions and 84 deletions
--- a/基础结构/函数/1
+++ b/基础结构/函数/1
@ -9,15 +9,19 @@ data = ''
 words = []
 word_freqs = []

+
 ################################
 # procedures
 ################################
 def read_file(path_to_file):
+    """读取文件内容，并赋值给全局变量data"""
    global data
    with open(path_to_file, encoding='utf-8') as f:
        data = f.read()

+
 def extractwords():
+    """提取data中的单词，并赋值给全局变量words"""
    global data
    global words
    words = data.lower().split()
@ -26,12 +30,16 @@ def extractwords():
    stop_words.update(string.ascii_lowercase)
    words = [word for word in words if word not in stop_words]

+
 def frequencies():
+    """统计words中单词的频率，并赋值给全局变量word_freqs"""
    global words
    global word_freqs
    word_freqs.extend([(word, 1) for word in words])

+
 def sort():
+    """对word_freqs按照频率进行排序"""
    global word_freqs
    word_freqs = Counter(words).most_common()

--- a/临时变量传递.py
+++ b/临时变量传递.py
@ -3,18 +3,23 @@ from cppy.cp_util import *


 def extractwords(str_data):
+    """提取单词"""
    pattern = re.compile('[\W_]+')
    word_list = pattern.sub(' ', str_data).lower().split()
    stop_words = get_stopwords()
-    return [w for w in word_list if not w in stop_words]
+    return [w for w in word_list if w not in stop_words]
+

 def frequencies(word_list):
+    """统计单词频率"""
    word_freqs = {}
    for word in word_list:
        word_freqs[word] = word_freqs.get(word, 0) + 1
    return word_freqs

+
 def sort(word_freq):
+    """对单词频率进行排序"""
    return sorted(word_freq.items(), key=lambda x: x[1], reverse=True)


--- a/基础结构/函数/3
+++ b/基础结构/函数/3
@ -20,6 +20,8 @@ words = [word for word in words if word not in stop_words]
 word_freqs = Counter(words)

 # 排序并打印
-sorted_word_freqs = sorted(word_freqs.items(), key=lambda x: x[1], reverse=True)
+sorted_word_freqs = sorted(word_freqs.items(),
+                           key=lambda x: x[1],
+                           reverse=True)

 print_word_freqs(sorted_word_freqs)
--- a/基础结构/函数/4
+++ b/基础结构/函数/4
@ -3,18 +3,22 @@ from collections import Counter

 stop_words = get_stopwords()

+
 def process_chunk(chunk):
    # 过滤停用词
-    words = [ w for w in chunk if ( not w in stop_words ) and len(w) >= 3 ]
+    words = [w for w in chunk if (w not in stop_words) and len(w) >= 3]
    return Counter(words)

+
 def process_chunks(chunks, word_freqs, x, max):
+    """递归处理分片"""
    next = x + 1
    if next < max:
        process_chunks(chunks, word_freqs, next, max)
    word_list = process_chunk(chunks[x])
    word_freqs += Counter(word_list)

+
 # def process_chunks( chunks,word_freqs,x,max ):
 #     word_list = process_chunk(chunks[x])
 #     word_freqs += Counter(word_list)
@ -22,7 +26,6 @@ def process_chunks( chunks,word_freqs,x,max ):
 #     if next < max:
 #         process_chunks(chunks,word_freqs,next,max)

-  
 # 读数据，按1000个词一组分片
 chunks = get_chunks(testfilepath, 2000)
 word_freqs = Counter()
--- a/基础结构/流式调用/2
+++ b/基础结构/流式调用/2
@ -2,8 +2,10 @@ from cppy.cp_util import *

 # 如果有连续的对数据加工操作，而且总是把共同加工数据对象当第一个参数，可以用本文件夹方法提升阅读体验

+
 # 框架类
 class FunBind:
+
    def bind(self, func, *args, **kwargs):
        try:
            self.data = func(self.data, *args, **kwargs)
@ -19,7 +21,6 @@ data = FunBind()\
    .bind(print_word_freqs,10)\
    .data
 print(data)
-
 '''
 函数是自由函数,还是正常的函数写法
 使用，
--- a/基础结构/流式调用/3
+++ b/基础结构/流式调用/3
@ -1,5 +1,4 @@
 from cppy.cp_util import *
-
 '''
 函数是自由函数,还是正常的函数写法
 使用，
@ -7,7 +6,9 @@ from cppy.cp_util import *
  - 调用 data 得到最后数据
 '''

+
 class FunPipe:
+
    def __init__(self, func, *args, **kwargs):
        self.func = func
        self.args = args
@ -23,6 +24,8 @@ class FunPipe:


 # 模仿管道
-pipe = FunPipe(extract_file_words,testfilepath) | FunPipe(get_frequencies) | FunPipe(sort_dict) | FunPipe(print_word_freqs, 10)
+pipe = FunPipe(
+    extract_file_words,
+    testfilepath) | FunPipe(get_frequencies) | FunPipe(sort_dict) | FunPipe(
+        print_word_freqs, 10)
 pipe.data
-
--- a/基础结构/流式调用/4
+++ b/基础结构/流式调用/4
@ -1,6 +1,8 @@
 from cppy.cp_util import *

+
 class Flow:
+
    def extract_file_words(self, filepath):
        self.data = extract_file_words(filepath)
        return self
@ -19,8 +21,8 @@ class Flow:


 # 顺序调用
-Flow().extract_file_words(testfilepath).get_frequencies().sort_dict().print_word_freqs(10)
-
+Flow().extract_file_words(
+    testfilepath).get_frequencies().sort_dict().print_word_freqs(10)
 '''
 连续方法调用，看起来比较舒服
 但是需要假设：
--- a/基础结构/流式调用/5
+++ b/基础结构/流式调用/5
@ -1,15 +1,18 @@
 from cppy.cp_util import *

+
 # 装饰器改写类
 # - 找到以f_开头的方法
 # - 将方法函数的返回值赋值给对象的data属性
 # - 返回对象自身
 def return_self_decorator(cls):
+
    def return_self(func):
        # 定义一个闭包函数，用于接收参数
        def wrapper(self, *args, **kwargs):
            self.data = func(self, *args, **kwargs)
            return self  # 返回类自身
+
        return wrapper

    for name, method in cls.__dict__.items():
@ -19,8 +22,10 @@ def return_self_decorator(cls):
            setattr(cls, name, return_self(method))
    return cls

+
@return_self_decorator
 class Flow():
+
    def test(self):
        return 'test'

@ -38,8 +43,8 @@ class Flow():


 # 顺序调用
-Flow().f_extract_file_words(testfilepath).f_get_frequencies().f_sort_dict().f_print_word_freqs(10)
-
+Flow().f_extract_file_words(
+    testfilepath).f_get_frequencies().f_sort_dict().f_print_word_freqs(10)
 '''
 改写后，参与 function flow 功能的方法 
 - 需要以 'f_' 开头
--- a/类生成器/元类.py
+++ b/类生成器/元类.py
@ -1,14 +1,18 @@
 from cppy.cp_util import *
 from collections import Counter

+
 # 定义一个带计数器的元类
 class CounterMetaclass(type):
+
    def __new__(mcs, name, bases, attrs):
        attrs['_counter'] = Counter()
        return super().__new__(mcs, name, bases, attrs)

+
 # 基于元类创建类
 class Word(metaclass=CounterMetaclass):
+
    def __init__(self, word):
        self.word = word
        self._counter[self.word] += 1
@ -18,9 +22,9 @@ class Word( metaclass=CounterMetaclass ):
        return cls._counter.most_common(n)


-for word in  extract_file_words ( testfilepath ) :  Word(word)
+for word in extract_file_words(testfilepath):
+    Word(word)
 print_word_freqs(Word.get_word_freqs(10))
-
 '''
 常用于将依赖项（如服务或配置）自动注入到类中。
 '''