CodePattern/一盘大棋/A01.py

import string
from cppy.cp_util import *

# 准备词和停用词表
word_freqs = []
with open( stopwordfilepath,encoding='utf-8' ) as f:
    stop_words = f.read().split(',')
stop_words.extend(list(string.ascii_lowercase))

for line in open( testfilepath ,encoding='utf-8' ):
    start_char = None
    i = 0
    for c in line:
        if start_char == None:
            if c.isalnum():
                # 一个单词开始
                start_char = i
        else:
            if not c.isalnum():
                # 一个单词结束
                found = False
                word = line[start_char:i].lower()
                # 跳过停用词
                if word not in stop_words:
                    pair_index = 0
                    # 单词是否第一次出现
                    for pair in word_freqs:
                        if word == pair[0]:
                            pair[1] += 1
                            found = True
                            break
                        pair_index += 1
                    if not found:
                        word_freqs.append([word, 1])
                    elif len(word_freqs) > 1:                        
                        for n in reversed(range(pair_index)):
                            if word_freqs[pair_index][1] > word_freqs[n][1]:
                                # 交换
                                word_freqs[n], word_freqs[pair_index] = word_freqs[pair_index], word_freqs[n]
                                pair_index = n
                # 重置开始标记
                start_char = None
        i += 1

for tf in word_freqs[0:10]:
    print(tf[0], '-', tf[1])
01 9 months ago			`import string`
			`from cppy.cp_util import *`

			`# 准备词和停用词表`
			`word_freqs = []`
			`with open( stopwordfilepath,encoding='utf-8' ) as f:`
			`stop_words = f.read().split(',')`
			`stop_words.extend(list(string.ascii_lowercase))`

			`for line in open( testfilepath ,encoding='utf-8' ):`
			`start_char = None`
			`i = 0`
			`for c in line:`
			`if start_char == None:`
			`if c.isalnum():`
			`# 一个单词开始`
			`start_char = i`
			`else:`
			`if not c.isalnum():`
			`# 一个单词结束`
			`found = False`
			`word = line[start_char:i].lower()`
			`# 跳过停用词`
			`if word not in stop_words:`
			`pair_index = 0`
			`# 单词是否第一次出现`
			`for pair in word_freqs:`
			`if word == pair[0]:`
			`pair[1] += 1`
			`found = True`
			`break`
			`pair_index += 1`
			`if not found:`
			`word_freqs.append([word, 1])`
			`elif len(word_freqs) > 1:`
			`for n in reversed(range(pair_index)):`
			`if word_freqs[pair_index][1] > word_freqs[n][1]:`
			`# 交换`
			`word_freqs[n], word_freqs[pair_index] = word_freqs[pair_index], word_freqs[n]`
			`pair_index = n`
			`# 重置开始标记`
			`start_char = None`
			`i += 1`

			`for tf in word_freqs[0:10]:`
			`print(tf[0], '-', tf[1])`