import threading, queue from cppy.cp_util import * # 能否简单的共享全局变量 ? # 处理单词 def process_words(word_space, freq_space, stopwords): word_freqs = {} while True: try: word = word_space.get(timeout=1) except queue.Empty: break count_word(word, word_freqs, stopwords) freq_space.put(word_freqs) # 创建并启动线程 def start_threads(word_space, freq_space, stopwords): workers = [] for i in range(5): worker = threading.Thread(target=process_words, args=(word_space, freq_space, stopwords)) worker.start() workers.append(worker) return workers if __name__ == "__main__": stopwords = get_stopwords() word_space = queue.Queue() freq_space = queue.Queue() # 将数据压入 word_space for word in extract_file_words(testfilepath): word_space.put(word) # 创建并启动线程 workers = start_threads(word_space, freq_space, stopwords) # 等待所有线程完成 for worker in workers: worker.join() # 合并处理结果 word_freqs = {} while not freq_space.empty(): freqs = freq_space.get() for (k, v) in freqs.items(): word_freqs[k] = word_freqs.get(k,0) + v # 打印 print_word_freqs ( sort_dict (word_freqs) )