wordcloud_create_tool/wordCloud/DrawBackground.py

# coding=gbk
from __future__ import print_function

import jieba.analyse
import wordCloud.Convert
from PIL import Image
import numpy as np
from matplotlib import pyplot as plt
from wordcloud import WordCloud, ImageColorGenerator


def clean_using_stopword(text,stopwords_path):
    mywordlist = []
    seg_list = jieba.cut(text, cut_all=False)
    liststr = "/".join(seg_list)
    with open(stopwords_path,'r', encoding='UTF-8') as f_stop:
        f_stop_text = f_stop.read()
        f_stop_text = str(f_stop_text)
    f_stop_seg_list = f_stop_text.split('\n')
    for myword in liststr.split('/'):  # 去除停顿词，生成新文档
        if not (myword.strip() in f_stop_seg_list) and len(myword.strip()) > 1:
            mywordlist.append(myword)
    return ''.join(mywordlist)


def preprocessing(text_path,stopwords_path):
    with open(text_path,'r', encoding='UTF-8') as f:
        content = f.read()
    return clean_using_stopword(content,stopwords_path)
    return content


def extract_keywords(max_words,text_path,stopwords_path):
    # 抽取1000个关键词，带权重，后面需要根据权重来生成词云
    allow_pos = ('nr',)  # 词性
    tags = jieba.analyse.extract_tags(preprocessing(text_path,stopwords_path), max_words, withWeight=True)
    keywords = dict()
    for i in tags:
        print("%s---%f" % (i[0], i[1]))
        keywords[i[0]] = i[1]
    return keywords


def draw_wordcloud(bg_image_path, font_path,text_path,stopwords_path, background_color, max_words,save_path, filepath, savepath, namedict_path):
    wordCloud.Convert.convert(filepath, savepath)
    jieba.load_userdict(namedict_path)
    back_coloring = plt.imread(bg_image_path)  # 设置背景图片
    # 设置词云属性
    wc = WordCloud(font_path=font_path,  # 设置字体
                   background_color=background_color,  # 背景颜色
                   max_words=max_words,  # 词云显示的最大词数
                   mask=back_coloring,  # 设置背景图片
                   )

    # 根据频率生成词云
    wc.generate_from_frequencies(extract_keywords(max_words,text_path,stopwords_path))

    mask = np.array(Image.open(filepath))
    image_colors = ImageColorGenerator(mask)
    wc.recolor(color_func=image_colors)

    wc.to_file(save_path)

    wc.to_file(save_path)


if __name__ == '__main__':
    filepath = 'pic/image1.png'
    savepath = 'pic/convert.jpg'
    bg_image_path = "pic/convert.jpg"
    text_path = 'text/jsjs.txt'
    font_path = 'text/msyh.ttf'
    stopwords_path = 'text/stopword.txt'
    background_color = "white"  # 背景颜色
    max_words = 2000  # 词云显示的最大词数
    save_path = "out/wordcloud.jpg"
    namedict_path = "text/namedict.txt"


    draw_wordcloud(bg_image_path, font_path,text_path,stopwords_path, background_color, max_words,save_path,filepath, savepath, namedict_path)