You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

37 lines
1.2 KiB

9 months ago
import threading, queue
9 months ago
from cppy.cp_util import *
class WordFrequencyCounter:
def __init__(self, input_file):
self.word_space = queue.Queue()
self.freq_space = queue.Queue()
for word in extract_file_words(input_file):
self.word_space.put(word)
def process_words(self):
word_freqs = {}
while not self.word_space.empty():
try:
word = self.word_space.get(timeout=1)
word_freqs[word] = word_freqs.get(word, 0) + 1
except queue.Empty:
break
self.freq_space.put(word_freqs)
def run(self):
workers = [threading.Thread(target=self.process_words) for _ in range(5)]
for worker in workers: worker.start()
for worker in workers: worker.join()
word_freqs = {}
while not self.freq_space.empty():
freqs = self.freq_space.get()
for word, count in freqs.items():
word_freqs[word] = word_freqs.get(word, 0) + count
print_word_freqs ( sort_dict (word_freqs) )
if __name__ == '__main__':
counter = WordFrequencyCounter( testfilepath )
counter.run()