You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
|
|
|
|
import re
|
|
|
|
|
import collections
|
|
|
|
|
from cppy.cp_util import stopwordfilepath, testfilepath
|
|
|
|
|
|
|
|
|
|
stopwords = set(open(stopwordfilepath, encoding='utf8').read().split(','))
|
|
|
|
|
words = re.findall('[a-z]{2,}',
|
|
|
|
|
open(testfilepath, encoding='utf8').read().lower())
|
|
|
|
|
counts = collections.Counter(w for w in words if w not in stopwords)
|
|
|
|
|
for (w, c) in counts.most_common(10):
|
|
|
|
|
print(w, '-', c)
|
|
|
|
|
'''
|
|
|
|
|
熟练的软件工程师,会如此简单完成任务
|
|
|
|
|
后面的例子,我们必须变的啰嗦一些,不能用这种太 hacker 的写法
|
|
|
|
|
'''
|