import sys from cppy.cp_util import * ## 切分任务这个工作,可以统一为一个通用函数。做成一个生成器 script_dir = os.path.dirname(os.path.abspath(__file__)) testfile = os.path.join(script_dir, 'test.txt') stop_words = get_stopwords() # 如果崩溃,把 5000 改下 RECURSION_LIMIT = 5000 sys.setrecursionlimit( RECURSION_LIMIT ) def count( i,chunks, stopwords, wordfreqs): if i < 0 : return for word in chunks[i]: if word not in stopwords: wordfreqs[word] = wordfreqs.get(word, 0) + 1 count( i-1, chunks,stopwords, wordfreqs ) word_list = re_split( open(testfile,encoding='utf-8').read() ) filesize = len( word_list ) chunk_size = ( filesize // RECURSION_LIMIT ) + 1 chunks = [ word_list[ x*chunk_size:(x+1)*RECURSION_LIMIT ] for x in range(chunk_size) ] word_freqs = {} count( chunk_size -1 ,chunks, stop_words, word_freqs ) print_word_freqs(sort_dict(word_freqs))