from operator import itemgetter import jieba import string # 统计词汇数 def count_word(filename): file_txt = open(filename, "r", encoding='utf-8').read() file_txt = file_txt.lower() # 将文本文件内容的大写字母换成小写字母 for ch in string.punctuation: # 将文本中的特殊字符替换为空格 file_txt = file_txt.replace(ch, " ") words = list(jieba.lcut(file_txt, cut_all=False)) # 分词 counts = {} # 用于统计词汇数的字典 for i in set(words): # 统计词汇数 if len(i) > 1: counts[i] = words.count(i) counts = sorted(counts.items(), key=itemgetter(1), reverse=True) # 按字典元素的值进行逆序排序 for i in range(20): # 输出词汇数最多的10个词 print(counts[i]) return counts