master
			
			
		
		
							parent
							
								
									269a5425b0
								
							
						
					
					
						commit
						d183fa0f74
					
				@ -0,0 +1,25 @@
 | 
				
			||||
import re
 | 
				
			||||
from collections import Counter
 | 
				
			||||
from cppy.cp_util import *
 | 
				
			||||
 | 
				
			||||
# 缓存系统,使用字典来存储文件路径和对应的词频结果; 可以存到本地磁盘,就可重复利用计算结果
 | 
				
			||||
word_freq_cache = {}
 | 
				
			||||
 | 
				
			||||
def calculate_word_frequency(file_path):
 | 
				
			||||
    # 如果文件路径已经在缓存中,直接返回缓存的结果
 | 
				
			||||
    if file_path in word_freq_cache:
 | 
				
			||||
        return word_freq_cache[file_path]
 | 
				
			||||
 | 
				
			||||
    # 计算词频
 | 
				
			||||
    words = extract_file_words(file_path)
 | 
				
			||||
    word_counts = Counter(words)
 | 
				
			||||
 | 
				
			||||
    # 将结果存储到缓存中
 | 
				
			||||
    word_freq_cache[file_path] = word_counts.most_common(10)
 | 
				
			||||
 | 
				
			||||
    return word_freq_cache[file_path]
 | 
				
			||||
 | 
				
			||||
# 测试函数
 | 
				
			||||
top_10_words = calculate_word_frequency(testfilepath)
 | 
				
			||||
for word, freq in top_10_words:
 | 
				
			||||
    print(f"{word}: {freq}")
 | 
				
			||||
					Loading…
					
					
				
		Reference in new issue