You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

53 lines
1.7 KiB

9 months ago
from cppy.cp_util import *
def extract_words(obj, path_to_file):
"""
从文件中提取单词并存储在对象的 'data' 字段中
Args:
obj (dict): 存储数据的字典对象
path_to_file (str): 文件路径
"""
9 months ago
obj['data'] = extract_file_words(path_to_file)
9 months ago
9 months ago
def increment_count(obj, w):
"""
增加单词的计数如果单词不存在则将其计数设置为1
9 months ago
参数:
obj (dict): 存储单词频率的字典对象
w (str): 单词
"""
obj['freqs'][w] = 1 if w not in obj['freqs'] else obj['freqs'][w] + 1
# 数据存储对象,包含初始化和获取单词的方法
9 months ago
data_storage_obj = {
'data': [], # 存储单词列表
'init': lambda path_to_file: extract_words(data_storage_obj, path_to_file
), # 初始化方法,提取文件中的单词
'words': lambda: data_storage_obj['data'] # 获取单词列表的方法
9 months ago
}
# 单词频率对象,包含增加计数和排序的方法
9 months ago
word_freqs_obj = {
'freqs': {}, # 存储单词频率的字典
'increment_count':
lambda w: increment_count(word_freqs_obj, w), # 增加单词计数的方法
'sorted': lambda: sort_dict(word_freqs_obj['freqs']) # 获取排序后的单词频率的方法
9 months ago
}
if __name__ == '__main__':
# 初始化数据存储对象,提取文件中的单词
data_storage_obj['init'](testfilepath)
9 months ago
# 遍历单词列表,增加单词的计数
8 months ago
for word in data_storage_obj['words']():
word_freqs_obj['increment_count'](word)
9 months ago
# 获取排序后的单词频率并打印
9 months ago
word_freqs = word_freqs_obj['sorted']()
print_word_freqs(word_freqs)