lin/python豆瓣电影数据可视化分析/项目/word_cloud.py

import jieba
from PIL import Image
import numpy as np
from wordcloud import WordCloud
import random
from utils.query import querys
from utils.utils import typeList
import matplotlib
import re

matplotlib.use('Agg')  # 使用非交互式后端
import matplotlib.pyplot as plt


# 评论
def getImageByComments(comments):
    # 调用函数，并传入要排除的关键字列表
    exclude_words_pl = []
    text = ''
    for i in comments:
        text = text + i['content']

    # 分词
    cut = jieba.cut(text)
    string = ' '.join(cut)

    # 排除指定关键字
    if exclude_words_pl:
        for word in exclude_words_pl:
            string = re.sub(word, '', string)

    # 过滤掉长度为1的单词
    filtered_string = ' '.join(word for word in string.split() if len(word) > 1)

    img = Image.open('./static/img/2.png')
    img_arr = np.array(img)
    wc = WordCloud(
        background_color='white',
        # mask=img_arr,
        # font_path=r'C:\Windows\Fonts\simsun.ttc',
        font_path='.\飞波正点体.otf',
    )
    wc.generate_from_text(filtered_string)

    # 绘制图片
    flg = plt.figure(1)
    plt.imshow(wc)
    plt.axis('off')

    randomInt = random.randint(1, 100000000)
    plt.savefig(f'./static/img/{randomInt}.png')
    # 关闭图形
    plt.close()
    return f'./static/img/{randomInt}.png'


# 标题
def getImageByAuthor(field, targetImage, resImage, exclude_words=None):
    sql = 'select {} from movie'.format(field)
    data = querys(sql, [], 'select')
    text = ''
    for i in data:
        if i[0] is not None:
            text = text + i[0]

    # 分词
    cut = jieba.cut(text)
    string = ' '.join(cut)

    # 排除指定关键字
    if exclude_words:
        for word in exclude_words:
            string = re.sub(word, '', string)

    # 过滤掉长度为1的单词
    filtered_string = ' '.join(word for word in string.split() if len(word) > 1)

    img = Image.open(targetImage)
    img_arr = np.array(img)
    wc = WordCloud(
        background_color='white',
        # mask=img_arr,
        font_path='.\飞波正点体.otf',
    )
    wc.generate_from_text(filtered_string)

    # 绘制图片
    plt.figure(1)
    plt.imshow(wc, interpolation='bilinear')
    plt.axis('off')

    randomInt = random.randint(1, 100000000)
    plt.savefig(resImage)
    # 关闭图形
    plt.close()


# 调用函数，并传入要排除的关键字列表
exclude_words = []
getImageByAuthor('title', './static/img/2.png', './static/img/title_cloud.png', exclude_words)


def getCastsDataTop():
    castsList = typeList('casts')
    castsObj = {}
    for i in castsList:
        if castsObj.get(i, -1) == -1:
            castsObj[i] = 1
        else:
            castsObj[i] = castsObj[i] + 1
    castsObj = sorted(castsObj.items(), key=lambda x: x[1], reverse=True)[:100]
    row = []
    columns = []
    for i in castsObj:
        row.append(i[0])
        columns.append(i[1])
    # print(row,columns)
    return row, columns


# 演员
# def getImageByCasts(targetImage, resImage):
#     # 假设getCastsDataTop20()返回演员名单和频率列表
#     castsList, castsFrequency = getCastsDataTop()
#     # print(castsList, castsFrequency)
#     # 将演员名单和频率结合起来，生成一个用于生成词云的文本字符串
#     text = ' '.join([name + ' ' * freq for name, freq in zip(castsList, castsFrequency)])
#
#     # 打开目标图片并获取其形状
#     img = Image.open(targetImage)
#     img_arr = np.array(img)
#
#     # 创建词云对象
#     wc = WordCloud(
#         background_color='white',
#         # mask=img_arr,
#         font_path='STHUPO.TTF'
#     )
#
#     # 生成词云
#     wc.generate_from_text(text)
#
#     # 绘制词云图
#     flg = plt.figure(1)
#     plt.imshow(wc)
#     plt.axis('off')
#
#     # 保存词云图
#     randomInt = random.randint(1, 100000000)
#     plt.savefig(resImage)
#
#     # 关闭绘图
#     plt.close()

# 演员
def getImageByCasts(targetImage, resImage):
    # 假设getCastsDataTop20()返回演员名单和频率列表
    castsList, castsFrequency = getCastsDataTop()

    # 将演员名单和频率结合起来，生成一个字典，用于生成词云
    frequency_dict = {name: freq for name, freq in zip(castsList, castsFrequency)}

    # 打开目标图片并获取其形状
    img = Image.open(targetImage)
    img_arr = np.array(img)

    # 创建词云对象
    wc = WordCloud(
        background_color='white',
        # mask=img_arr,  # 使用目标图片的形状作为词云的形状
        font_path='.\飞波正点体.otf',
        max_font_size=100,  # 设置最大字体大小
        font_step=1,  # 设置字体大小变化的步长
        random_state=30,  # 为字体大小和颜色的随机性设置一个种子
        max_words=200  # 设置词云显示的最大单词数
    )

    # 生成词云
    wc.generate_from_frequencies(frequency_dict)  # 使用频率字典生成词云

    # 绘制词云图
    # plt.figure(figsize=(10, 8))  # 设置图形的大小
    plt.imshow(wc, interpolation='bilinear')
    plt.axis('off')  # 不显示坐标轴

    # 保存词云图
    randomInt = random.randint(1, 100000000)
    plt.savefig(resImage, bbox_inches=0)  # 使用bbox_inches=0来确保没有额外的空白边界

    # 关闭绘图
    plt.close()


# 调用函数生成词云图片
getImageByCasts('./static/img/2.png', './static/img/cloud_cloud.png')

# getImageByAuthor('title', './static/img/1.jpg', './static/img/title_cloud.png')
getImageByAuthor('summary', './static/img/2.png', './static/img/summary_cloud.png')

print('生成词云图成功！')

# def getImageByComments(comments):
#     text = ''
#     for i in comments:
#         text = text + i['content']
#
#     # 分词
#     cut = jieba.cut(text)
#     string = ' '.join(cut)
#
#     img = Image.open('./static/img/2.png')
#     img_arr = np.array(img)
#     wc = WordCloud(
#         background_color='white',
#         mask=img_arr,
#         font_path='STHUPO.TTF'
#     )
#     wc.generate_from_text(string)
#
#     # 绘制图片
#     flg = plt.figure(1)
#     plt.imshow(wc)
#     plt.axis('off')
#
#     randomInt = random.randint(1, 100000000)
#     plt.savefig(f'./static/img/{randomInt}.png')
#     return f'./static/img/{randomInt}.png'
#
#
# def getImageByAuthor(field, targetImage, resImage):
#     sql = 'select {} from movie'.format(field)
#     data = querys(sql, [], 'select')
#     text = ''
#     for i in data:
#         text = text + i[0]
#
#     # 分词
#     cut = jieba.cut(text)
#     string = ' '.join(cut)
#
#     img = Image.open(targetImage)
#     img_arr = np.array(img)
#     wc = WordCloud(
#         background_color='white',
#         mask=img_arr,
#         font_path='STHUPO.TTF'
#     )
#     wc.generate_from_text(string)
#
#     # 绘制图片
#     flg = plt.figure(1)
#     plt.imshow(wc)
#     plt.axis('off')
#
#     randomInt = random.randint(1, 100000000)
#     plt.savefig(resImage)
#
#
# getImageByAuthor('title', './static/img/2.png', './static/img/title_cloud.png')
# getImageByAuthor('summary', './static/img/2.png', './static/img/summary_cloud.png')
#
#
# def getImageByCasts(targetImage, resImage):
#     castsList = typeList('casts')
#     text = ''
#     for i in castsList:
#         text = text + i
#
#     # 分词
#     cut = jieba.cut(text)
#     string = ' '.join(cut)
#
#     img = Image.open(targetImage)
#     img_arr = np.array(img)
#     wc = WordCloud(
#         background_color='white',
#         mask=img_arr,
#         font_path='STHUPO.TTF'
#     )
#     wc.generate_from_text(string)
#
#     # 绘制图片
#     flg = plt.figure(1)
#     plt.imshow(wc)
#     plt.axis('off')
#
#     randomInt = random.randint(1, 100000000)
#     plt.savefig(resImage)
#
#
# getImageByCasts('./static/img/2.png', './static/img/cloud_cloud.png')
0529 6 months ago			`import jieba`
			`from PIL import Image`
			`import numpy as np`
			`from wordcloud import WordCloud`
			`import random`
			`from utils.query import querys`
			`from utils.utils import typeList`
			`import matplotlib`
			`import re`

			`matplotlib.use('Agg') # 使用非交互式后端`
			`import matplotlib.pyplot as plt`


			`# 评论`
			`def getImageByComments(comments):`
			`# 调用函数，并传入要排除的关键字列表`
			`exclude_words_pl = []`
			`text = ''`
			`for i in comments:`
			`text = text + i['content']`

			`# 分词`
			`cut = jieba.cut(text)`
			`string = ' '.join(cut)`

			`# 排除指定关键字`
			`if exclude_words_pl:`
			`for word in exclude_words_pl:`
			`string = re.sub(word, '', string)`

			`# 过滤掉长度为1的单词`
			`filtered_string = ' '.join(word for word in string.split() if len(word) > 1)`

			`img = Image.open('./static/img/2.png')`
			`img_arr = np.array(img)`
			`wc = WordCloud(`
			`background_color='white',`
			`# mask=img_arr,`
			`# font_path=r'C:\Windows\Fonts\simsun.ttc',`
			`font_path='.\飞波正点体.otf',`
			`)`
			`wc.generate_from_text(filtered_string)`

			`# 绘制图片`
			`flg = plt.figure(1)`
			`plt.imshow(wc)`
			`plt.axis('off')`

			`randomInt = random.randint(1, 100000000)`
			`plt.savefig(f'./static/img/{randomInt}.png')`
			`# 关闭图形`
			`plt.close()`
			`return f'./static/img/{randomInt}.png'`


			`# 标题`
			`def getImageByAuthor(field, targetImage, resImage, exclude_words=None):`
			`sql = 'select {} from movie'.format(field)`
			`data = querys(sql, [], 'select')`
			`text = ''`
			`for i in data:`
			`if i[0] is not None:`
			`text = text + i[0]`

			`# 分词`
			`cut = jieba.cut(text)`
			`string = ' '.join(cut)`

			`# 排除指定关键字`
			`if exclude_words:`
			`for word in exclude_words:`
			`string = re.sub(word, '', string)`

			`# 过滤掉长度为1的单词`
			`filtered_string = ' '.join(word for word in string.split() if len(word) > 1)`

			`img = Image.open(targetImage)`
			`img_arr = np.array(img)`
			`wc = WordCloud(`
			`background_color='white',`
			`# mask=img_arr,`
			`font_path='.\飞波正点体.otf',`
			`)`
			`wc.generate_from_text(filtered_string)`

			`# 绘制图片`
			`plt.figure(1)`
			`plt.imshow(wc, interpolation='bilinear')`
			`plt.axis('off')`

			`randomInt = random.randint(1, 100000000)`
			`plt.savefig(resImage)`
			`# 关闭图形`
			`plt.close()`


			`# 调用函数，并传入要排除的关键字列表`
			`exclude_words = []`
			`getImageByAuthor('title', './static/img/2.png', './static/img/title_cloud.png', exclude_words)`


			`def getCastsDataTop():`
			`castsList = typeList('casts')`
			`castsObj = {}`
			`for i in castsList:`
			`if castsObj.get(i, -1) == -1:`
			`castsObj[i] = 1`
			`else:`
			`castsObj[i] = castsObj[i] + 1`
			`castsObj = sorted(castsObj.items(), key=lambda x: x[1], reverse=True)[:100]`
			`row = []`
			`columns = []`
			`for i in castsObj:`
			`row.append(i[0])`
			`columns.append(i[1])`
			`# print(row,columns)`
			`return row, columns`


			`# 演员`
			`# def getImageByCasts(targetImage, resImage):`
			`# # 假设getCastsDataTop20()返回演员名单和频率列表`
			`# castsList, castsFrequency = getCastsDataTop()`
			`# # print(castsList, castsFrequency)`
			`# # 将演员名单和频率结合起来，生成一个用于生成词云的文本字符串`
			`# text = ' '.join([name + ' ' * freq for name, freq in zip(castsList, castsFrequency)])`
			`#`
			`# # 打开目标图片并获取其形状`
			`# img = Image.open(targetImage)`
			`# img_arr = np.array(img)`
			`#`
			`# # 创建词云对象`
			`# wc = WordCloud(`
			`# background_color='white',`
			`# # mask=img_arr,`
			`# font_path='STHUPO.TTF'`
			`# )`
			`#`
			`# # 生成词云`
			`# wc.generate_from_text(text)`
			`#`
			`# # 绘制词云图`
			`# flg = plt.figure(1)`
			`# plt.imshow(wc)`
			`# plt.axis('off')`
			`#`
			`# # 保存词云图`
			`# randomInt = random.randint(1, 100000000)`
			`# plt.savefig(resImage)`
			`#`
			`# # 关闭绘图`
			`# plt.close()`

			`# 演员`
			`def getImageByCasts(targetImage, resImage):`
			`# 假设getCastsDataTop20()返回演员名单和频率列表`
			`castsList, castsFrequency = getCastsDataTop()`

			`# 将演员名单和频率结合起来，生成一个字典，用于生成词云`
			`frequency_dict = {name: freq for name, freq in zip(castsList, castsFrequency)}`

			`# 打开目标图片并获取其形状`
			`img = Image.open(targetImage)`
			`img_arr = np.array(img)`

			`# 创建词云对象`
			`wc = WordCloud(`
			`background_color='white',`
			`# mask=img_arr, # 使用目标图片的形状作为词云的形状`
			`font_path='.\飞波正点体.otf',`
			`max_font_size=100, # 设置最大字体大小`
			`font_step=1, # 设置字体大小变化的步长`
			`random_state=30, # 为字体大小和颜色的随机性设置一个种子`
			`max_words=200 # 设置词云显示的最大单词数`
			`)`

			`# 生成词云`
			`wc.generate_from_frequencies(frequency_dict) # 使用频率字典生成词云`

			`# 绘制词云图`
			`# plt.figure(figsize=(10, 8)) # 设置图形的大小`
			`plt.imshow(wc, interpolation='bilinear')`
			`plt.axis('off') # 不显示坐标轴`

			`# 保存词云图`
			`randomInt = random.randint(1, 100000000)`
			`plt.savefig(resImage, bbox_inches=0) # 使用bbox_inches=0来确保没有额外的空白边界`

			`# 关闭绘图`
			`plt.close()`


			`# 调用函数生成词云图片`
			`getImageByCasts('./static/img/2.png', './static/img/cloud_cloud.png')`

			`# getImageByAuthor('title', './static/img/1.jpg', './static/img/title_cloud.png')`
			`getImageByAuthor('summary', './static/img/2.png', './static/img/summary_cloud.png')`

			`print('生成词云图成功！')`

			`# def getImageByComments(comments):`
			`# text = ''`
			`# for i in comments:`
			`# text = text + i['content']`
			`#`
			`# # 分词`
			`# cut = jieba.cut(text)`
			`# string = ' '.join(cut)`
			`#`
			`# img = Image.open('./static/img/2.png')`
			`# img_arr = np.array(img)`
			`# wc = WordCloud(`
			`# background_color='white',`
			`# mask=img_arr,`
			`# font_path='STHUPO.TTF'`
			`# )`
			`# wc.generate_from_text(string)`
			`#`
			`# # 绘制图片`
			`# flg = plt.figure(1)`
			`# plt.imshow(wc)`
			`# plt.axis('off')`
			`#`
			`# randomInt = random.randint(1, 100000000)`
			`# plt.savefig(f'./static/img/{randomInt}.png')`
			`# return f'./static/img/{randomInt}.png'`
			`#`
			`#`
			`# def getImageByAuthor(field, targetImage, resImage):`
			`# sql = 'select {} from movie'.format(field)`
			`# data = querys(sql, [], 'select')`
			`# text = ''`
			`# for i in data:`
			`# text = text + i[0]`
			`#`
			`# # 分词`
			`# cut = jieba.cut(text)`
			`# string = ' '.join(cut)`
			`#`
			`# img = Image.open(targetImage)`
			`# img_arr = np.array(img)`
			`# wc = WordCloud(`
			`# background_color='white',`
			`# mask=img_arr,`
			`# font_path='STHUPO.TTF'`
			`# )`
			`# wc.generate_from_text(string)`
			`#`
			`# # 绘制图片`
			`# flg = plt.figure(1)`
			`# plt.imshow(wc)`
			`# plt.axis('off')`
			`#`
			`# randomInt = random.randint(1, 100000000)`
			`# plt.savefig(resImage)`
			`#`
			`#`
			`# getImageByAuthor('title', './static/img/2.png', './static/img/title_cloud.png')`
			`# getImageByAuthor('summary', './static/img/2.png', './static/img/summary_cloud.png')`
			`#`
			`#`
			`# def getImageByCasts(targetImage, resImage):`
			`# castsList = typeList('casts')`
			`# text = ''`
			`# for i in castsList:`
			`# text = text + i`
			`#`
			`# # 分词`
			`# cut = jieba.cut(text)`
			`# string = ' '.join(cut)`
			`#`
			`# img = Image.open(targetImage)`
			`# img_arr = np.array(img)`
			`# wc = WordCloud(`
			`# background_color='white',`
			`# mask=img_arr,`
			`# font_path='STHUPO.TTF'`
			`# )`
			`# wc.generate_from_text(string)`
			`#`
			`# # 绘制图片`
			`# flg = plt.figure(1)`
			`# plt.imshow(wc)`
			`# plt.axis('off')`
			`#`
			`# randomInt = random.randint(1, 100000000)`
			`# plt.savefig(resImage)`
			`#`
			`#`
			`# getImageByCasts('./static/img/2.png', './static/img/cloud_cloud.png')`