You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
|
|
|
import threading, queue
|
|
|
|
from cppy.cp_util import *
|
|
|
|
|
|
|
|
# 处理单词
|
|
|
|
def process_words(word_space, freq_space, stopwords):
|
|
|
|
word_freqs = {}
|
|
|
|
while True:
|
|
|
|
try:
|
|
|
|
word = word_space.get(timeout=1)
|
|
|
|
except queue.Empty:
|
|
|
|
break
|
|
|
|
count_word(word, word_freqs, stopwords)
|
|
|
|
freq_space.put(word_freqs)
|
|
|
|
|
|
|
|
# 创建并启动线程
|
|
|
|
def start_threads(word_space, freq_space, stopwords):
|
|
|
|
workers = []
|
|
|
|
for i in range(5):
|
|
|
|
worker = threading.Thread(target=process_words,
|
|
|
|
args=(word_space, freq_space, stopwords))
|
|
|
|
worker.start()
|
|
|
|
workers.append(worker)
|
|
|
|
return workers
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
stopwords = get_stopwords()
|
|
|
|
word_space = queue.Queue()
|
|
|
|
freq_space = queue.Queue()
|
|
|
|
|
|
|
|
# 将数据压入 word_space
|
|
|
|
for word in extract_file_words(testfilepath):
|
|
|
|
word_space.put(word)
|
|
|
|
|
|
|
|
# 创建并启动线程
|
|
|
|
workers = start_threads(word_space, freq_space, stopwords)
|
|
|
|
|
|
|
|
# 等待所有线程完成
|
|
|
|
for worker in workers: worker.join()
|
|
|
|
|
|
|
|
# 合并处理结果
|
|
|
|
word_freqs = {}
|
|
|
|
while not freq_space.empty():
|
|
|
|
freqs = freq_space.get()
|
|
|
|
for (k, v) in freqs.items():
|
|
|
|
word_freqs[k] = word_freqs.get(k,0) + v
|
|
|
|
|
|
|
|
# 打印
|
|
|
|
print_word_freqs ( sort_dict (word_freqs) )
|