import concurrent.futures from collections import Counter import cppy.cp_util as util ''' concurrent.futures模块为Python中的并发编程提供了一个统一接口, 这个模块隐藏了低层次的线程和进程创建、同步和清理的细节,提供了一个更高层次的API来处理并发任务。 当前版本推荐它与asyncio模块结合使用完成Python中的各种异步编程任务。 ''' stop_words = util.get_stopwords() class WordFrequencyAgent: def __init__(self, words): self.words = words def compute_word_frequency(self): words = [ w for w in self.words if ( not w in stop_words ) and len(w) >= 3 ] self.word_freq = Counter( words) def get_word_frequency(self): return self.word_freq # 将文本分割成多个部分,并为每个部分创建一个Agent def create_agents( words ): return [ WordFrequencyAgent(chunk) for chunk in words ] def compute_all_word_frequencies(agents): with concurrent.futures.ThreadPoolExecutor() as executor: # 使用线程池来并行计算词频 future_to_agent = {executor.submit(agent.compute_word_frequency): agent for agent in agents} for future in concurrent.futures.as_completed(future_to_agent): agent = future_to_agent[future] data = future.result() # 词频被保存在agent中 # 所有Agent计算完成后,合并它们的词频结果 def merge_word_frequencies(agents): merged_freq = Counter() for agent in agents: merged_freq.update(agent.get_word_frequency()) return merged_freq @util.timing_decorator def main(): words = util.get_chunks(util.testfilepath) agents = create_agents(words) # 创建代理 compute_all_word_frequencies(agents) # 计算 merged_word_freq = merge_word_frequencies(agents) # 合并结果 util.print_word_freqs(merged_word_freq.most_common(10)) # 排序输出 if __name__ == '__main__': main()