import string from collections import Counter from cppy.cp_util import * ################################ # data ################################ data = '' words = [] word_freqs = [] ################################ # procedures ################################ def read_file(path_to_file): """读取文件内容,并赋值给全局变量data""" global data with open(path_to_file, encoding='utf-8') as f: data = f.read() def extractwords(): """提取data中的单词,并赋值给全局变量words""" global data global words words = data.lower().split() with open(stopwordfilepath) as f: stop_words = set(f.read().split(',')) stop_words.update(string.ascii_lowercase) words = [word for word in words if word not in stop_words] def frequencies(): """统计words中单词的频率,并赋值给全局变量word_freqs""" global words global word_freqs word_freqs.extend([(word, 1) for word in words]) def sort(): """对word_freqs按照频率进行排序""" global word_freqs word_freqs = Counter(words).most_common() if __name__ == "__main__": read_file(testfilepath) extractwords() frequencies() sort() for tf in word_freqs[:10]: print(tf[0], '-', tf[1])