parent
83c156a3d5
commit
44b0c00567
@ -1,2 +0,0 @@
|
|||||||
" my Some sure acquaintance or other, my dear, sure,other I suppose; I am sure I do not
|
|
||||||
know. sure "
|
|
@ -1,29 +0,0 @@
|
|||||||
import sys
|
|
||||||
from cppy.cp_util import *
|
|
||||||
|
|
||||||
## 切分任务这个工作,可以统一为一个通用函数。做成一个生成器!!
|
|
||||||
|
|
||||||
script_dir = os.path.dirname(os.path.abspath(__file__))
|
|
||||||
testfile = os.path.join(script_dir, 'test.txt')
|
|
||||||
stop_words = get_stopwords()
|
|
||||||
|
|
||||||
# 如果崩溃,把 5000 改下
|
|
||||||
RECURSION_LIMIT = 5000
|
|
||||||
sys.setrecursionlimit( RECURSION_LIMIT )
|
|
||||||
|
|
||||||
def count( i,chunks, stopwords, wordfreqs):
|
|
||||||
if i < 0 : return
|
|
||||||
for word in chunks[i]:
|
|
||||||
if word not in stopwords:
|
|
||||||
wordfreqs[word] = wordfreqs.get(word, 0) + 1
|
|
||||||
count( i-1, chunks,stopwords, wordfreqs )
|
|
||||||
|
|
||||||
word_list = re_split( open(testfile,encoding='utf-8').read() )
|
|
||||||
filesize = len( word_list )
|
|
||||||
chunk_size = ( filesize // RECURSION_LIMIT ) + 1
|
|
||||||
chunks = [ word_list[ x*chunk_size:(x+1)*RECURSION_LIMIT ]
|
|
||||||
for x in range(chunk_size) ]
|
|
||||||
word_freqs = {}
|
|
||||||
count( chunk_size -1 ,chunks, stop_words, word_freqs )
|
|
||||||
|
|
||||||
print_word_freqs(sort_dict(word_freqs))
|
|
Loading…
Reference in new issue