import re
from collections import Counter
from cppy.cp_util import *

# 读取文件
with open(testfilepath, encoding='utf-8') as f:
    data = f.read().lower()  # 直接转换为小写

# 过滤非字母字符
data = re.sub('[\W_]+', ' ', data)

# 分词
words = data.split()

# 移除停用词
stop_words = get_stopwords()
words = [word for word in words if word not in stop_words]

# 计算词频
word_freqs = Counter(words)

# 排序并打印
sorted_word_freqs = sorted(word_freqs.items(),
                           key=lambda x: x[1],
                           reverse=True)

print_word_freqs(sorted_word_freqs)