forked from p46318075/CodePattern
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
20 lines
451 B
20 lines
451 B
from cppy.cp_util import *
|
|
|
|
#
|
|
# 生成器
|
|
#
|
|
def non_stop_words(testfilepath):
|
|
stopwords = get_stopwords()
|
|
data_str = read_file(testfilepath)
|
|
wordlist = re_split( data_str )
|
|
for word in wordlist:
|
|
if word not in stopwords:
|
|
yield word # 弹出一个非停用词
|
|
|
|
|
|
freqs = {}
|
|
for word in non_stop_words(testfilepath):
|
|
freqs[word] = freqs.get(word, 0) + 1
|
|
|
|
data = sort_dict(freqs)
|
|
print_word_freqs(data) |