You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
34 lines
971 B
34 lines
971 B
9 months ago
|
from collections import Counter
|
||
|
from cppy.cp_util import *
|
||
|
|
||
|
# 缓存系统,使用字典来存储文件路径和对应的词频结果; 可以存到本地磁盘,就可重复利用计算结果
|
||
|
word_freq_cache = {}
|
||
|
|
||
|
def calculate_word_frequency(file_path):
|
||
|
# 如果文件路径已经在缓存中,直接返回缓存的结果
|
||
|
if file_path in word_freq_cache:
|
||
|
return word_freq_cache[file_path]
|
||
|
|
||
|
# 计算词频
|
||
|
words = extract_file_words(file_path)
|
||
|
word_counts = Counter(words)
|
||
|
|
||
|
# 将结果存储到缓存中
|
||
|
word_freq_cache[file_path] = word_counts.most_common(10)
|
||
|
|
||
|
return word_freq_cache[file_path]
|
||
|
|
||
|
# 测试函数
|
||
|
top_10_words = calculate_word_frequency(testfilepath)
|
||
9 months ago
|
print_word_freqs(top_10_words)
|
||
9 months ago
|
|
||
|
'''
|
||
9 months ago
|
Python 提供了一个缓存调用函数的装饰器
|
||
|
import functools
|
||
|
|
||
9 months ago
|
# 使用 functools.lru_cache 缓存结果
|
||
|
@functools.lru_cache(maxsize=None)
|
||
|
def calculate_word_frequency(file_path):
|
||
|
...
|
||
|
retrun result
|
||
|
'''
|