import re from collections import Counter from cppy.cp_util import * # 缓存系统,使用字典来存储文件路径和对应的词频结果; 可以存到本地磁盘,就可重复利用计算结果 word_freq_cache = {} def calculate_word_frequency(file_path): # 如果文件路径已经在缓存中,直接返回缓存的结果 if file_path in word_freq_cache: return word_freq_cache[file_path] # 计算词频 words = extract_file_words(file_path) word_counts = Counter(words) # 将结果存储到缓存中 word_freq_cache[file_path] = word_counts.most_common(10) return word_freq_cache[file_path] # 测试函数 top_10_words = calculate_word_frequency(testfilepath) for word, freq in top_10_words: print(f"{word}: {freq}")