You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

20 lines
451 B

9 months ago
from cppy.cp_util import *
8 months ago
#
9 months ago
# 生成器
8 months ago
#
def non_stop_words(testfilepath):
9 months ago
stopwords = get_stopwords()
8 months ago
data_str = read_file(testfilepath)
wordlist = re_split( data_str )
for word in wordlist:
if word not in stopwords:
yield word # 弹出一个非停用词
9 months ago
8 months ago
freqs = {}
for word in non_stop_words(testfilepath):
freqs[word] = freqs.get(word, 0) + 1
9 months ago
8 months ago
data = sort_dict(freqs)
print_word_freqs(data)