print统一

master
zj3D 9 months ago
parent 5a619bfbc1
commit b04a404b3a

@ -16,6 +16,4 @@ def load_plugins():
load_plugins() load_plugins()
word_freqs = tffreqs.top25(tfwords.extract_words( testfilepath )) word_freqs = tffreqs.top25(tfwords.extract_words( testfilepath ))
for (w, c) in word_freqs: print_word_freqs(word_freqs)
print(w, '-', c)

@ -22,7 +22,7 @@ class WordFrequencyController():
#定义具体的享元类 #定义具体的享元类
class ConcreteWordFrequencyController(WordFrequencyController): class ConcreteWordFrequencyController(WordFrequencyController):
def __init__(self, controllertype,filepath): def __init__(self, controllertype,filepath):
self.word_list = extract_words(filepath) self.word_list = extract_file_words(filepath)
self.word_freq = get_frequencies(self.word_list) self.word_freq = get_frequencies(self.word_list)
self.word_freq = sort_dict(self.word_freq) self.word_freq = sort_dict(self.word_freq)
def print_word_freqs(self, number): def print_word_freqs(self, number):
@ -51,7 +51,7 @@ if __name__ == "__main__":
factory = WordFrequencyControllerFactory() factory = WordFrequencyControllerFactory()
while True: while True:
try: try:
number = input("请输入需要显示词频前几的单词") number = input("请输入需要显示词频前几的单词: ")
process_command(factory, number) process_command(factory, number)
except EOFError: except EOFError:
break break

@ -1,4 +1,4 @@
import sys, re, operator, string, inspect import re, operator, string
from cppy.cp_util import * from cppy.cp_util import *
# #
@ -30,12 +30,7 @@ def remove_stop_words(word_list):
def frequencies(word_list): def frequencies(word_list):
if type(word_list) is not list or word_list == []: return {} if type(word_list) is not list or word_list == []: return {}
word_freqs = {} word_freqs = get_frequencies( word_list )
for w in word_list:
if w in word_freqs:
word_freqs[w] += 1
else:
word_freqs[w] = 1
return word_freqs return word_freqs
def sort(word_freq): def sort(word_freq):

@ -6,9 +6,8 @@ from cppy.cp_util import *
stopwords = get_stopwords() stopwords = get_stopwords()
def process_chunk(chunk): def process_chunk(chunk):
# 切词并过滤停用词 # 切词并过滤停用词
words = re.findall(r'\w+', chunk.lower()) words = extract_str_words( chunk.lower() )
words = [ word for word in words if word not in stopwords and len(word) > 2]
return Counter(words) return Counter(words)
def merge_counts(counts_list): def merge_counts(counts_list):

@ -20,8 +20,7 @@ def calculate_word_frequency(file_path):
# 测试函数 # 测试函数
top_10_words = calculate_word_frequency(testfilepath) top_10_words = calculate_word_frequency(testfilepath)
for word, freq in top_10_words: print_word_freqs(top_10_words)
print(f"{word}: {freq}")
''' '''
python 提供了一种缓存调用函数的机制 python 提供了一种缓存调用函数的机制

@ -40,6 +40,5 @@ if __name__ == '__main__':
total_count = Counter() total_count = Counter()
for result in results: total_count += result for result in results: total_count += result
# 打印词频最高的10个单词 # 打印词频最高的10个单词
for w,c in total_count.most_common(10): print_word_freqs( total_count.most_common(10) )
print(w, '--',c)

@ -15,9 +15,9 @@ def word_frequency( top_n=10 ):
word_counts[word] += 1 word_counts[word] += 1
# 输出所有词的频率最高的n个词 # 输出所有词的频率最高的n个词
most_common = word_counts.most_common(top_n) most_common = word_counts.most_common(top_n)
for w, count in most_common: util.print_word_freqs( most_common )
print(f"{w} - {count}")
return result return result
return wrapper return wrapper

Loading…
Cancel
Save