import jieba #阅读文本文件 txt = open(r'C:\Users\86158\爬虫作业\ai吧.txt','r',encoding='UTF-8').read() rp_str = ': , ; 。 、 ? ———— ‘’ “” () ! # 《》\n\ufeff' for i in rp_str: txt = txt.replace(i,'') txt = ''.join(txt.split()) jieba.load_userdict(r'C:\Users\86158\爬虫作业\userdict.txt') words = jieba.lcut(txt) stopwords = open(r'C:\Users\86158\爬虫作业\stopwords.txt','r',encoding='UTF-8').read() stopwords_list = list(stopwords) #将无意义的高频词写入remov_words筛掉 remove_words = ['哈哈','可以','紫薯','整齐','开始','以为','这人','我们','好像'] words_counts = {} for i in words: if len(i)==1: continue if i in remove_words: continue if i not in stopwords_list: words_counts[i]=words_counts.get(i,0)+1 words_list = list(words_counts.items()) words_list.sort(key=lambda x:x[1],reverse=True) ranking8_list = words_list[:8] ranking8_dict = dict(ranking8_list) print(ranking8_dict) #将得到的关键词保存为文本文件 f = open(r'C:\Users\86158\爬虫作业\ciyuntu.txt','w',encoding='UTF-8') for i in range(len(words_list)): k,v = words_list[i] f.write('{:<8}{:>2}\n'.format(k,v)) f.close()