import concurrent.futures from collections import Counter import cppy.cp_util as util class WordFrequencyAgent: def __init__(self, words): self.words = words def compute_word_frequency(self): self.word_freq = Counter(self.words) def get_word_frequency(self): return self.word_freq # 将文本分割成多个部分,并为每个部分创建一个Agent def create_agents(words, num_agents = 4 ): text_chunks = [ words[i::num_agents] for i in range(num_agents) ] agents = [ WordFrequencyAgent(chunk) for chunk in text_chunks ] return agents def compute_all_word_frequencies(agents): with concurrent.futures.ThreadPoolExecutor() as executor: # 使用线程池来并行计算词频 future_to_agent = {executor.submit(agent.compute_word_frequency): agent for agent in agents} for future in concurrent.futures.as_completed(future_to_agent): agent = future_to_agent[future] try: # 获取计算结果,但不处理异常 data = future.result() except Exception as exc: print(f'生成 {agent.text_chunk[:10]}... 的词频时出错: {exc}') # 词频已经被保存在agent中 # 所有Agent计算完成后,合并它们的词频结果 def merge_word_frequencies(agents): merged_freq = Counter() for agent in agents: merged_freq.update(agent.get_word_frequency()) return merged_freq if __name__ == '__main__': words = util.extract_file_words(util.testfilepath) # 从文本抽词 agents = create_agents(words) # 创建代理 compute_all_word_frequencies(agents) # 计算 merged_word_freq = merge_word_frequencies(agents) # 合并结果 for (w, c) in merged_word_freq.most_common(10): # 排序输出 print(w, '-', c)