zj3D 9 months ago
parent 51cb037429
commit e779762530

@ -0,0 +1,52 @@
import concurrent.futures
from collections import Counter
import cppy.cp_util as util
class WordFrequencyAgent:
def __init__(self, words):
self.words = words
def compute_word_frequency(self):
self.word_freq = Counter(self.words)
def get_word_frequency(self):
return self.word_freq
# 将文本分割成多个部分并为每个部分创建一个Agent
def create_agents(words, num_agents = 4 ):
text_chunks = [ words[i::num_agents] for i in range(num_agents) ]
agents = [ WordFrequencyAgent(chunk) for chunk in text_chunks ]
return agents
def compute_all_word_frequencies(agents):
with concurrent.futures.ThreadPoolExecutor() as executor:
# 使用线程池来并行计算词频
future_to_agent = {executor.submit(agent.compute_word_frequency): agent for agent in agents}
for future in concurrent.futures.as_completed(future_to_agent):
agent = future_to_agent[future]
try:
# 获取计算结果,但不处理异常
data = future.result()
except Exception as exc:
print(f'生成 {agent.text_chunk[:10]}... 的词频时出错: {exc}')
# 词频已经被保存在agent中
# 所有Agent计算完成后合并它们的词频结果
def merge_word_frequencies(agents):
merged_freq = Counter()
for agent in agents:
merged_freq.update(agent.get_word_frequency())
return merged_freq
if __name__ == '__main__':
words = util.extract_file_words(util.testfilepath) # 从文本抽词
agents = create_agents(words) # 创建代理
compute_all_word_frequencies(agents) # 计算
merged_word_freq = merge_word_frequencies(agents) # 合并结果
for (w, c) in merged_word_freq.most_common(10): # 排序输出
print(w, '-', c)
Loading…
Cancel
Save