handler = handler_class() # 实例化处理器类
return handler.handle(*args) # 调用处理方法
print(f"No handler found for task type: {task_type}")
if __name__ == '__main__':

from cppy.cp_util import *
from functools import reduce
# map - reduce
def process_chunk(chunk):
# 过滤停用词
stop_words = get_stopwords()
words = [ w for w in chunk if ( not w in stop_words ) and len(w) >= 3 ]
return Counter(words)
def merge_counts(count1,count2):
sum_counts = count1 + count2
return sum_counts
def main():
# 读取文件内容
content = re_split(read_file(testfilepath))
# 分割文件内容为多个块,每个块由一个进程处理
chunk_size = 1000 # 可以根据实际情况调整块大小
chunks = [content[i:i + chunk_size] for i in range(0, len(content), chunk_size)]
# 使用 map 方法和 process_chunk 函数处理每个分区
counts_list = list(map(process_chunk, chunks))
# 使用 reduce 和 merge_counts 函数统计所有分区的词频
total_counts = (reduce(merge_counts,counts_list))
if __name__ == '__main__':

# 多线程
def process_chunk(chunk):
# 过滤停用词
# 过滤停用词
words = [ w for w in chunk if ( not w in stop_words ) and len(w) >= 3 ]
return Counter(words)
def main():
# 读数据按1000个词一组分片
chunks = get_chunks(testfilepath,1000)
# 线程池
pool = ThreadPool(len(chunks)) # 随意指定的线程数
counts_list = pool.map(process_chunk, chunks)

# 多进程
def process_chunk(chunk):
# 过滤停用词
# 过滤停用词
words = [ w for w in chunk if ( not w in stop_words ) and len(w) >= 3 ]
return Counter(words)
def merge_counts(counts_list):
# 合并多个Counter对象
total_counts = Counter()
# 读取文件内容,分割文件内容为多个块,每个块由一个进程处理
chunks = get_chunks(testfilepath,1000)
# 使用多进程处理每个块
pool = multiprocessing.Pool(processes=multiprocessing.cpu_count())

stop_words = util.get_stopwords()
class WordFrequencyAgent:
def __init__(self, words):
self.words = words
def compute_word_frequency(self):
words = [ w for w in self.words if ( not w in stop_words ) and len(w) >= 3 ]
self.word_freq = Counter( words)
def get_word_frequency(self):
return self.word_freq
# 将文本分割成多个部分并为每个部分创建一个Agent
def create_agents( words ):
return [ WordFrequencyAgent(chunk) for chunk in words ]
def compute_all_word_frequencies(agents):
with concurrent.futures.ThreadPoolExecutor() as executor:
agent = future_to_agent[future]
data = future.result() # 词频被保存在agent中
# 所有Agent计算完成后合并它们的词频结果
def merge_word_frequencies(agents):
merged_freq = Counter()
return merged_freq
if __name__ == '__main__':
def main():
words = util.get_chunks(util.testfilepath)
agents = create_agents(words) # 创建代理
compute_all_word_frequencies(agents) # 计算
merged_word_freq = merge_word_frequencies(agents) # 合并结果
util.print_word_freqs(merged_word_freq.most_common(10)) # 排序输出
if __name__ == '__main__':

wordlist = re_split( read_file(testfilepath) )
for word in wordlist:
if word not in stopwords:
subject.notify(word) # 触发
# 打印最高的N个词频
top_words = observer.get_top_n(top_n)

IBook 是一个接口定义了书籍应有的行为比如获取标题和作者
NovelBook 是一个具体书籍类实现了 IBook 接口
BookCategory 是一个书籍分类类它可以包含多个书籍实例
DisplayPlatform 是一个抽象展示平台类定义了如何展示书籍
WebDisplayPlatform MobileDisplayPlatform 是具体展示平台类分别实现了 DisplayPlatform 接口以提供不同的展示方式
BookShop 是一个桥接类它将书籍分类与展示平台连接起来通过 show_books 方法可以展示分类中的所有书籍

return data
def get_chunks( file_path = testfilepath, chunk_size = 1000):
# 读取文件内容,分割文件内容为多个块,每个块由一个进程处理
# 可以根据实际情况调整块大小
content = re_split(read_file(file_path))
chunks = [content[i:i + chunk_size] for i in range(0, len(content), chunk_size)]
return chunks
def extract_file_words(path_to_file):
word_list = re_split( read_file(path_to_file) )
stop_words = get_stopwords()
