update

2 years ago · 3c439ef8d7
parent 445088fde8
commit 3c439ef8d7
5 changed files with 90 additions and 81 deletions
--- a/交互/Agent/80.py
+++ b/交互/Agent/80.py
@ -27,12 +27,7 @@ def compute_all_word_frequencies(agents):
        future_to_agent = {executor.submit(agent.compute_word_frequency): agent for agent in agents}  
        for future in concurrent.futures.as_completed(future_to_agent):  
            agent = future_to_agent[future]  
-            try:  
-                # 获取计算结果，但不处理异常  
-                data = future.result()  
-            except Exception as exc:  
-                print(f'生成 {agent.text_chunk[:10]}... 的词频时出错: {exc}')  
-            # 词频已经被保存在agent中
+            data = future.result()   # 词频被保存在agent中


 # 所有Agent计算完成后，合并它们的词频结果
@ -48,5 +43,4 @@ if __name__ == '__main__':
    agents = create_agents(words)  # 创建代理
    compute_all_word_frequencies(agents)  # 计算
    merged_word_freq = merge_word_frequencies(agents)   # 合并结果    
-    for (w, c) in merged_word_freq.most_common(10):  # 排序输出
-        print(w, '-', c)
+    util.print_word_freqs(merged_word_freq.most_common(10))  # 排序输出    
--- a/基本结构/共享对象/tf-38.py
+++ b/基本结构/共享对象/tf-38.py
--- a/基本结构/函数缓存/84.py
+++ b/基本结构/函数缓存/84.py
--- a/计算设备/map-reduce/tf_91.py
+++ b/计算设备/map-reduce/tf_91.py
@ -2,16 +2,23 @@ import threading
 from collections import Counter
 from cppy.cp_util import *

-stop_words = get_stopwords()  
-
-# 定义一个函数来计算每个线程的词频
-def count_words(start, end, text, result_index, results):    
-    words = re_split( text[start:end] )
-    words = [w for w in words if not w in stop_words]    
-    result = Counter(words)
-    results[result_index] = result
-
-if __name__ == '__main__':
+#
+# 多线程
+# 
+def process_chunk(start, end, text, result_index, results):   
+    # 切词并过滤停用词            
+    words = extract_str_words( text[start:end] )     
+    results[result_index] = Counter(words)
+
+def merge_counts(counts_list):  
+    # 合并多个Counter对象  
+    total_counts = Counter()  
+    for counts in counts_list:  
+        total_counts += counts  
+    return total_counts  
+
+@timing_decorator
+def main():  
    # 读取文件内容
    text = read_file(testfilepath)    

@ -29,16 +36,19 @@ if __name__ == '__main__':
        start = i * chunk_size
        # 确保最后一个线程能够读取文件的末尾
        end = text_length if i == num_threads - 1 else (i + 1) * chunk_size
-        t = threading.Thread(target=count_words, args=(start, end, text, i, results))
+        t = threading.Thread(target=process_chunk, args=(start, end, text, i, results))
        threads.append(t)
        t.start()

    # 等待所有线程完成
    for t in threads: t.join()

-    # 合并结果
-    total_count = Counter()
-    for result in results:  total_count += result
+    # 合并计数  
+    total_counts = merge_counts(results)  

-    # 打印词频最高的10个单词    
-    print_word_freqs( total_count.most_common(10) )     
+    # 输出最高频的n个词
+    print_word_freqs( total_counts.most_common(10) )     
+
+    
+if __name__ == '__main__':
+    main()
--- a/计算设备/map-reduce/tf_92.py
+++ b/计算设备/map-reduce/tf_92.py
@ -1,10 +1,10 @@
-import re  
 import multiprocessing  
 from collections import Counter
 from cppy.cp_util import *  

-stopwords = get_stopwords()
-
+#
+# 多进程
+#
 def process_chunk(chunk):  
    # 切词并过滤停用词       
    words = extract_str_words( chunk.lower() )
@ -17,7 +17,8 @@ def merge_counts(counts_list):
        total_counts += counts  
    return total_counts    

-if __name__ == '__main__':  
+@timing_decorator
+def main():  
    # 读取文件内容  
    content = read_file(testfilepath)    

@ -35,5 +36,9 @@ if __name__ == '__main__':
    total_counts = merge_counts(counts_list)  
  
    # 输出最高频的n个词
-    for word, count in total_counts.most_common(10):  
-        print(f"{word}-- {count}")
+    print_word_freqs( total_counts.most_common(10) )        
+
+
+if __name__ == '__main__':  
+    main()
+