|
|
# coding=gbk
|
|
|
from __future__ import print_function
|
|
|
|
|
|
import jieba.analyse
|
|
|
import wordCloud.Convert
|
|
|
from PIL import Image
|
|
|
import numpy as np
|
|
|
from matplotlib import pyplot as plt
|
|
|
from wordcloud import WordCloud, ImageColorGenerator
|
|
|
|
|
|
|
|
|
def clean_using_stopword(text,stopwords_path):
|
|
|
mywordlist = []
|
|
|
seg_list = jieba.cut(text, cut_all=False)
|
|
|
liststr = "/".join(seg_list)
|
|
|
with open(stopwords_path,'r', encoding='UTF-8') as f_stop:
|
|
|
f_stop_text = f_stop.read()
|
|
|
f_stop_text = str(f_stop_text)
|
|
|
f_stop_seg_list = f_stop_text.split('\n')
|
|
|
for myword in liststr.split('/'): # 去除停顿词,生成新文档
|
|
|
if not (myword.strip() in f_stop_seg_list) and len(myword.strip()) > 1:
|
|
|
mywordlist.append(myword)
|
|
|
return ''.join(mywordlist)
|
|
|
|
|
|
|
|
|
def preprocessing(text_path,stopwords_path):
|
|
|
with open(text_path,'r', encoding='UTF-8') as f:
|
|
|
content = f.read()
|
|
|
return clean_using_stopword(content,stopwords_path)
|
|
|
return content
|
|
|
|
|
|
|
|
|
def extract_keywords(max_words,text_path,stopwords_path):
|
|
|
# 抽取1000个关键词,带权重,后面需要根据权重来生成词云
|
|
|
allow_pos = ('nr',) # 词性
|
|
|
tags = jieba.analyse.extract_tags(preprocessing(text_path,stopwords_path), max_words, withWeight=True)
|
|
|
keywords = dict()
|
|
|
for i in tags:
|
|
|
print("%s---%f" % (i[0], i[1]))
|
|
|
keywords[i[0]] = i[1]
|
|
|
return keywords
|
|
|
|
|
|
|
|
|
def draw_wordcloud(bg_image_path, font_path,text_path,stopwords_path, background_color, max_words,save_path, filepath, savepath, namedict_path):
|
|
|
wordCloud.Convert.convert(filepath, savepath)
|
|
|
jieba.load_userdict(namedict_path)
|
|
|
back_coloring = plt.imread(bg_image_path) # 设置背景图片
|
|
|
# 设置词云属性
|
|
|
wc = WordCloud(font_path=font_path, # 设置字体
|
|
|
background_color=background_color, # 背景颜色
|
|
|
max_words=max_words, # 词云显示的最大词数
|
|
|
mask=back_coloring, # 设置背景图片
|
|
|
)
|
|
|
|
|
|
# 根据频率生成词云
|
|
|
wc.generate_from_frequencies(extract_keywords(max_words,text_path,stopwords_path))
|
|
|
|
|
|
mask = np.array(Image.open(filepath))
|
|
|
image_colors = ImageColorGenerator(mask)
|
|
|
wc.recolor(color_func=image_colors)
|
|
|
|
|
|
wc.to_file(save_path)
|
|
|
|
|
|
wc.to_file(save_path)
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
filepath = 'pic/image1.png'
|
|
|
savepath = 'pic/convert.jpg'
|
|
|
bg_image_path = "pic/convert.jpg"
|
|
|
text_path = 'text/jsjs.txt'
|
|
|
font_path = 'text/msyh.ttf'
|
|
|
stopwords_path = 'text/stopword.txt'
|
|
|
background_color = "white" # 背景颜色
|
|
|
max_words = 2000 # 词云显示的最大词数
|
|
|
save_path = "out/wordcloud.jpg"
|
|
|
namedict_path = "text/namedict.txt"
|
|
|
|
|
|
|
|
|
draw_wordcloud(bg_image_path, font_path,text_path,stopwords_path, background_color, max_words,save_path,filepath, savepath, namedict_path)
|
|
|
|
|
|
|
|
|
|
|
|
|