forked from p46318075/CodePattern
master
parent
269a5425b0
commit
d183fa0f74
@ -0,0 +1,25 @@
|
||||
import re
|
||||
from collections import Counter
|
||||
from cppy.cp_util import *
|
||||
|
||||
# 缓存系统,使用字典来存储文件路径和对应的词频结果; 可以存到本地磁盘,就可重复利用计算结果
|
||||
word_freq_cache = {}
|
||||
|
||||
def calculate_word_frequency(file_path):
|
||||
# 如果文件路径已经在缓存中,直接返回缓存的结果
|
||||
if file_path in word_freq_cache:
|
||||
return word_freq_cache[file_path]
|
||||
|
||||
# 计算词频
|
||||
words = extract_file_words(file_path)
|
||||
word_counts = Counter(words)
|
||||
|
||||
# 将结果存储到缓存中
|
||||
word_freq_cache[file_path] = word_counts.most_common(10)
|
||||
|
||||
return word_freq_cache[file_path]
|
||||
|
||||
# 测试函数
|
||||
top_10_words = calculate_word_frequency(testfilepath)
|
||||
for word, freq in top_10_words:
|
||||
print(f"{word}: {freq}")
|
Loading…
Reference in new issue