You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

49 lines
1.3 KiB

import threading, queue
from cppy.cp_util import *
# 处理单词
def process_words(word_space, freq_space, stopwords):
word_freqs = {}
while True:
try:
word = word_space.get(timeout=1)
except queue.Empty:
break
count_word(word, word_freqs, stopwords)
freq_space.put(word_freqs)
# 创建并启动线程
def start_threads(word_space, freq_space, stopwords):
workers = []
for i in range(5):
worker = threading.Thread(target=process_words,
args=(word_space, freq_space, stopwords))
worker.start()
workers.append(worker)
return workers
if __name__ == "__main__":
stopwords = get_stopwords()
word_space = queue.Queue()
freq_space = queue.Queue()
# 将数据压入 word_space
for word in extract_file_words(testfilepath):
word_space.put(word)
# 创建并启动线程
workers = start_threads(word_space, freq_space, stopwords)
# 等待所有线程完成
for worker in workers: worker.join()
# 合并处理结果
word_freqs = {}
while not freq_space.empty():
freqs = freq_space.get()
for (k, v) in freqs.items():
word_freqs[k] = word_freqs.get(k,0) + v
# 打印
print_word_freqs ( sort_dict (word_freqs) )