You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
|
|
|
import threading, queue
|
|
|
|
from cppy.cp_util import *
|
|
|
|
|
|
|
|
class WordFrequencyCounter:
|
|
|
|
def __init__(self, input_file):
|
|
|
|
self.word_space = queue.Queue()
|
|
|
|
self.freq_space = queue.Queue()
|
|
|
|
for word in extract_file_words(input_file):
|
|
|
|
self.word_space.put(word)
|
|
|
|
|
|
|
|
def process_words(self):
|
|
|
|
word_freqs = {}
|
|
|
|
while not self.word_space.empty():
|
|
|
|
try:
|
|
|
|
word = self.word_space.get(timeout=1)
|
|
|
|
word_freqs[word] = word_freqs.get(word, 0) + 1
|
|
|
|
except queue.Empty:
|
|
|
|
break
|
|
|
|
self.freq_space.put(word_freqs)
|
|
|
|
|
|
|
|
def run(self):
|
|
|
|
workers = [threading.Thread(target=self.process_words) for _ in range(5)]
|
|
|
|
for worker in workers: worker.start()
|
|
|
|
for worker in workers: worker.join()
|
|
|
|
|
|
|
|
word_freqs = {}
|
|
|
|
while not self.freq_space.empty():
|
|
|
|
freqs = self.freq_space.get()
|
|
|
|
for word, count in freqs.items():
|
|
|
|
word_freqs[word] = word_freqs.get(word, 0) + count
|
|
|
|
|
|
|
|
print_word_freqs ( sort_dict (word_freqs) )
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
counter = WordFrequencyCounter( testfilepath )
|
|
|
|
counter.run()
|