大修 9

1 year ago · 44b0c00567
parent 83c156a3d5
commit 44b0c00567
6 changed files with 44 additions and 63 deletions
--- a/语言特性/反射/反射_类.py
+++ b/语言特性/反射/反射_类.py
@ -19,19 +19,17 @@ class sortTaskHandler:
 #  应用框架
 ##########################################
 def handle_task(task_type,*args):    
-    handler_class_name = f"{task_type}TaskHandler" # 构建处理器类名
+    handler_class_name = f"{task_type}TaskHandler" # 构建处理器类名   
-    
+
    # 使用globals()获取当前全局符号表
    handler_class = globals().get(handler_class_name)    
    if handler_class:        
        handler = handler_class() # 实例化处理器类        
        return handler.handle(*args) # 调用处理方法
    else:
-        print(f"No handler found for task type: {task_type}")       
+        print(f"No found for task type: {task_type}")       
-if __name__ == '__main__':   
+word_list = handle_task("words",util.testfilepath)
-    word_list = handle_task("words",util.testfilepath)
+word_freq = handle_task("frequencies",word_list)
-    word_freq = handle_task("frequencies",word_list)
+word_sort = handle_task("sort",word_freq)        
-    word_sort = handle_task("sort",word_freq)        
+util.print_word_freqs(word_sort)
    util.print_word_freqs(word_sort)
--- a/语言特性/异步.py
+++ b/语言特性/异步.py
@ -5,7 +5,7 @@ from cppy.cp_util import *
 #
-# 协程: 有点复杂
+# 协程: 有点复杂; 读文件的Io还是太快，的爬虫
 #
 async def read_file(file_path):
    async with aiofiles.open(file_path, 'r', encoding='utf-8') as file:
--- a/语言特性/递归.py
+++ b/语言特性/递归.py
@ -0,0 +1,30 @@
 from cppy.cp_util import *
 from collections import Counter
 stop_words = get_stopwords()
 def process_chunk(chunk):
    # 过滤停用词
    words = [ w for w in chunk if ( not w in stop_words ) and len(w) >= 3 ]
    return Counter(words)
 def process_chunks( chunks,word_freqs,x,max ):
    next  = x + 1
    if next < max:
        process_chunks(chunks,word_freqs,next,max)   
    word_list = process_chunk(chunks[x])    
    word_freqs += Counter(word_list)
 # def process_chunks( chunks,word_freqs,x,max ):
 #     word_list = process_chunk(chunks[x])    
 #     word_freqs += Counter(word_list)
 #     next  = x + 1
 #     if next < max:
 #         process_chunks(chunks,word_freqs,next,max)   
 # 读数据，按1000个词一组分片
 chunks = get_chunks(testfilepath,2000)
 word_freqs = Counter()
 process_chunks( chunks,word_freqs,0,len(chunks) )
 print_word_freqs( word_freqs.most_common(10) )
--- a/语言特性/递归/test.txt
+++ b/语言特性/递归/test.txt
@ -1,2 +0,0 @@
 " my Some sure acquaintance or other, my dear, sure,other  I suppose; I am sure I do not
 know. sure "
--- a/语言特性/递归/tf-08.py
+++ b/语言特性/递归/tf-08.py
@ -1,29 +0,0 @@
 import sys
 from cppy.cp_util import *
 ## 切分任务这个工作，可以统一为一个通用函数。做成一个生成器！！
 script_dir = os.path.dirname(os.path.abspath(__file__))  
 testfile = os.path.join(script_dir, 'test.txt')
 stop_words = get_stopwords()
 # 如果崩溃，把 5000 改下
 RECURSION_LIMIT = 5000
 sys.setrecursionlimit( RECURSION_LIMIT )
 def count( i,chunks, stopwords, wordfreqs):        
    if i < 0 : return       
    for word in  chunks[i]:                     
        if word not in stopwords:  
            wordfreqs[word] = wordfreqs.get(word, 0) + 1    
    count( i-1, chunks,stopwords, wordfreqs )
 word_list = re_split( open(testfile,encoding='utf-8').read() )
 filesize = len( word_list )    
 chunk_size = ( filesize // RECURSION_LIMIT ) + 1
 chunks = [  word_list[ x*chunk_size:(x+1)*RECURSION_LIMIT ] 
                for x in range(chunk_size)  ]
 word_freqs = {}
 count( chunk_size -1 ,chunks, stop_words, word_freqs )
 print_word_freqs(sort_dict(word_freqs))
--- a/计算设备/存储/数据库/数据库.py
+++ b/计算设备/存储/数据库/数据库.py
@ -4,20 +4,10 @@ from cppy.cp_util import testfilepath,db_filename,extract_file_words
 # 数据库表结构
 TABLES = {
    'documents': '''CREATE TABLE IF NOT EXISTS documents (
                        id INTEGER PRIMARY KEY AUTOINCREMENT,
                        name TEXT NOT NULL
                    )''',
    'words': '''CREATE TABLE IF NOT EXISTS words (
-                    doc_id INTEGER NOT NULL,
+                    doc_name INTEGER NOT NULL,
-                    value TEXT NOT NULL,
+                    value TEXT NOT NULL
-                    FOREIGN KEY (doc_id) REFERENCES documents (id)
+                )''',    
                )''',
    'characters': '''CREATE TABLE IF NOT EXISTS characters (
                        word_id INTEGER NOT NULL,
                        value TEXT NOT NULL,
                        FOREIGN KEY (word_id) REFERENCES words (id)
                )'''
 }
@ -33,15 +23,10 @@ def create_db_schema(connection):
 def load_file_into_database(path_to_file, connection):        
    words = extract_file_words( path_to_file )
-    c = connection.cursor()
+    doc_name = os.path.basename(testfilepath).split('.')[0]     
-    c.execute("INSERT INTO documents (name) VALUES (?)", (path_to_file,))
+    c = connection.cursor()        
    doc_id = c.lastrowid
    for w in words:
-        c.execute("INSERT INTO words (doc_id, value) VALUES (?, ?)", (doc_id, w))
+        c.execute("INSERT INTO words (doc_name, value) VALUES (?, ?)", (doc_name, w))        
        word_id = c.lastrowid
        for char in w:
            c.execute("INSERT INTO characters (word_id, value) VALUES (?, ?)", (word_id, char))
    connection.commit()
    c.close()
@ -52,7 +37,6 @@ def load_file_into_database(path_to_file, connection):
 # 构造数据库文件的完整路径  
 current_dir = os.path.dirname(os.path.abspath(__file__))   
 db_file_path = os.path.join(current_dir, db_filename)    
 if os.path.exists(db_file_path):          
    os.remove(db_file_path)
		`@ -1,2 +0,0 @@`
			`" my Some sure acquaintance or other, my dear, sure,other I suppose; I am sure I do not`
			`know. sure "`