zj3D 4 months ago
parent 4606a87618
commit ceb9955051

@ -3,7 +3,7 @@ from cppy.cp_util import stopwordfilepath,testfilepath
stopwords = set(open( stopwordfilepath,encoding = 'utf8' ).read().split(','))
words = re.findall('[a-z]{2,}', open( testfilepath,encoding = 'utf8').read().lower())
counts = collections.Counter(w for w in words if w not in stopwords)
counts = collections.Counter( w for w in words if w not in stopwords )
for (w, c) in counts.most_common(10) : print(w, '-', c)
'''

@ -0,0 +1,4 @@
## 任务
本项目的主要功能任务:做文本文件的分词,过滤常见词,求词频,并排序输出。

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save