import concurrent.futures  
from collections import Counter  
import cppy.cp_util as util

'''
concurrent.futures模块为Python中的并发编程提供了一个统一接口,
这个模块隐藏了低层次的线程和进程创建、同步和清理的细节,提供了一个更高层次的API来处理并发任务。
当前版本推荐它与asyncio模块结合使用完成Python中的各种异步编程任务。
'''
stop_words = util.get_stopwords()

class WordFrequencyAgent:  
    def __init__(self, words):          
        self.words = words  
  
    def compute_word_frequency(self):
        words = [ w for w in self.words if ( not w in stop_words ) and len(w) >= 3 ]          
        self.word_freq = Counter( words)  
  
    def get_word_frequency(self):  
        return self.word_freq

    
# 将文本分割成多个部分，并为每个部分创建一个Agent
def create_agents( words ):          
    return [ WordFrequencyAgent(chunk) for chunk in words ]    

def compute_all_word_frequencies(agents):  
    with concurrent.futures.ThreadPoolExecutor() as executor:  
        # 使用线程池来并行计算词频  
        future_to_agent = {executor.submit(agent.compute_word_frequency): agent for agent in agents}  
        for future in concurrent.futures.as_completed(future_to_agent):  
            agent = future_to_agent[future]  
            data = future.result()   # 词频被保存在agent中

# 所有Agent计算完成后，合并它们的词频结果
def merge_word_frequencies(agents):  
    merged_freq = Counter()  
    for agent in agents:  
        merged_freq.update(agent.get_word_frequency())  
    return merged_freq  

@util.timing_decorator
def main():   
    words = util.get_chunks(util.testfilepath)
    agents = create_agents(words)  # 创建代理
    compute_all_word_frequencies(agents)  # 计算
    merged_word_freq = merge_word_frequencies(agents)   # 合并结果    
    util.print_word_freqs(merged_word_freq.most_common(10))  # 排序输出  

if __name__ == '__main__':       
  main()