forked from p46318075/CodePattern
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
20 lines
451 B
20 lines
451 B
10 months ago
|
from cppy.cp_util import *
|
||
|
|
||
9 months ago
|
#
|
||
10 months ago
|
# 生成器
|
||
9 months ago
|
#
|
||
|
def non_stop_words(testfilepath):
|
||
10 months ago
|
stopwords = get_stopwords()
|
||
9 months ago
|
data_str = read_file(testfilepath)
|
||
|
wordlist = re_split( data_str )
|
||
|
for word in wordlist:
|
||
|
if word not in stopwords:
|
||
|
yield word # 弹出一个非停用词
|
||
|
|
||
10 months ago
|
|
||
9 months ago
|
freqs = {}
|
||
|
for word in non_stop_words(testfilepath):
|
||
|
freqs[word] = freqs.get(word, 0) + 1
|
||
10 months ago
|
|
||
9 months ago
|
data = sort_dict(freqs)
|
||
|
print_word_freqs(data)
|