import threading from collections import Counter from cppy.cp_util import * # # 多线程 # def process_chunk(start, end, text, result_index, results): # 切词并过滤停用词 words = extract_str_words( text[start:end] ) results[result_index] = Counter(words) def merge_counts(counts_list): # 合并多个Counter对象 total_counts = Counter() for counts in counts_list: total_counts += counts return total_counts @timing_decorator def main(): # 读取文件内容 text = read_file(testfilepath) # 确定线程数量 num_threads = 4 text_length = len(text) chunk_size = text_length // num_threads # 存储每个线程的结果 results = [None] * num_threads threads = [] # 创建并启动线程 for i in range(num_threads): start = i * chunk_size # 确保最后一个线程能够读取文件的末尾 end = text_length if i == num_threads - 1 else (i + 1) * chunk_size t = threading.Thread(target=process_chunk, args=(start, end, text, i, results)) threads.append(t) t.start() # 等待所有线程完成 for t in threads: t.join() # 合并计数 total_counts = merge_counts(results) # 输出最高频的n个词 print_word_freqs( total_counts.most_common(10) ) if __name__ == '__main__': main()