# coding=gbk from __future__ import print_function import jieba.analyse import wordCloud.Convert from PIL import Image import numpy as np from matplotlib import pyplot as plt from wordcloud import WordCloud, ImageColorGenerator def clean_using_stopword(text,stopwords_path): mywordlist = [] seg_list = jieba.cut(text, cut_all=False) liststr = "/".join(seg_list) with open(stopwords_path,'r', encoding='UTF-8') as f_stop: f_stop_text = f_stop.read() f_stop_text = str(f_stop_text) f_stop_seg_list = f_stop_text.split('\n') for myword in liststr.split('/'): # 去除停顿词,生成新文档 if not (myword.strip() in f_stop_seg_list) and len(myword.strip()) > 1: mywordlist.append(myword) return ''.join(mywordlist) def preprocessing(text_path,stopwords_path): with open(text_path, 'r', encoding='UTF-8') as f: content = f.read() return clean_using_stopword(content, stopwords_path) return content def extract_keywords(max_words,text_path,stopwords_path): # 抽取1000个关键词,带权重,后面需要根据权重来生成词云 allow_pos = ('nr',) # 词性 tags = jieba.analyse.extract_tags(preprocessing(text_path,stopwords_path), max_words, withWeight=True) keywords = dict() for i in tags: print("%s---%f" % (i[0], i[1])) keywords[i[0]] = i[1] return keywords def draw_wordcloud(bg_image_path, font_path,text_path,stopwords_path, background_color, max_words,save_path, filepath, savepath, namedict_path): wordCloud.Convert.convert(filepath, savepath) jieba.load_userdict(namedict_path) back_coloring = plt.imread(bg_image_path) # 设置背景图片 # 设置词云属性 wc = WordCloud(font_path=font_path, # 设置字体 background_color=background_color, # 背景颜色 max_words=max_words, # 词云显示的最大词数 mask=back_coloring, # 设置背景图片 ) # 根据频率生成词云 wc.generate_from_frequencies(extract_keywords(max_words,text_path,stopwords_path)) wc.to_file(save_path) if __name__ == '__main__': filepath = 'pic/image1.png' savepath = 'pic/convert.jpg' bg_image_path = "pic/convert.jpg" text_path = 'text/jsjs.txt' font_path = 'text/msyh.ttf' stopwords_path = 'text/stopword.txt' background_color = "white" # 背景颜色 max_words = 2000 # 词云显示的最大词数 save_path = "out/wordcloud.jpg" namedict_path = "text/namedict.txt" draw_wordcloud(bg_image_path, font_path,text_path,stopwords_path, background_color, max_words,save_path,filepath, savepath, namedict_path)