You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

47 lines
1.7 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

import concurrent.futures
from collections import Counter
import cppy.cp_util as util
# 价值不大,就是多线程的一个表现,说明松耦合不如消息驱动的组件
class WordFrequencyAgent:
def __init__(self, words):
self.words = words
def compute_word_frequency(self):
self.word_freq = Counter(self.words)
def get_word_frequency(self):
return self.word_freq
# 将文本分割成多个部分并为每个部分创建一个Agent
def create_agents(words, num_agents = 4 ):
text_chunks = [ words[i::num_agents] for i in range(num_agents) ]
agents = [ WordFrequencyAgent(chunk) for chunk in text_chunks ]
return agents
def compute_all_word_frequencies(agents):
with concurrent.futures.ThreadPoolExecutor() as executor:
# 使用线程池来并行计算词频
future_to_agent = {executor.submit(agent.compute_word_frequency): agent for agent in agents}
for future in concurrent.futures.as_completed(future_to_agent):
agent = future_to_agent[future]
data = future.result() # 词频被保存在agent中
# 所有Agent计算完成后合并它们的词频结果
def merge_word_frequencies(agents):
merged_freq = Counter()
for agent in agents:
merged_freq.update(agent.get_word_frequency())
return merged_freq
if __name__ == '__main__':
words = util.extract_file_words(util.testfilepath) # 从文本抽词
agents = create_agents(words) # 创建代理
compute_all_word_frequencies(agents) # 计算
merged_word_freq = merge_word_frequencies(agents) # 合并结果
util.print_word_freqs(merged_word_freq.most_common(10)) # 排序输出