import re from collections import Counter from cppy.cp_util import * # 读取文件 with open(testfilepath, encoding='utf-8') as f: data = f.read().lower() # 直接转换为小写 # 过滤非字母字符 data = re.sub('[\W_]+', ' ', data) # 分词 words = data.split() # 移除停用词 stop_words = get_stopwords() words = [word for word in words if word not in stop_words] # 计算词频 word_freqs = Counter(words) # 排序并打印 sorted_word_freqs = sorted(word_freqs.items(), key=lambda x: x[1], reverse=True) print_word_freqs(sorted_word_freqs)