You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

292 lines
7.7 KiB

6 months ago
import jieba
from PIL import Image
import numpy as np
from wordcloud import WordCloud
import random
from utils.query import querys
from utils.utils import typeList
import matplotlib
import re
matplotlib.use('Agg') # 使用非交互式后端
import matplotlib.pyplot as plt
# 评论
def getImageByComments(comments):
# 调用函数,并传入要排除的关键字列表
exclude_words_pl = []
text = ''
for i in comments:
text = text + i['content']
# 分词
cut = jieba.cut(text)
string = ' '.join(cut)
# 排除指定关键字
if exclude_words_pl:
for word in exclude_words_pl:
string = re.sub(word, '', string)
# 过滤掉长度为1的单词
filtered_string = ' '.join(word for word in string.split() if len(word) > 1)
img = Image.open('./static/img/2.png')
img_arr = np.array(img)
wc = WordCloud(
background_color='white',
# mask=img_arr,
# font_path=r'C:\Windows\Fonts\simsun.ttc',
font_path='.\飞波正点体.otf',
)
wc.generate_from_text(filtered_string)
# 绘制图片
flg = plt.figure(1)
plt.imshow(wc)
plt.axis('off')
randomInt = random.randint(1, 100000000)
plt.savefig(f'./static/img/{randomInt}.png')
# 关闭图形
plt.close()
return f'./static/img/{randomInt}.png'
# 标题
def getImageByAuthor(field, targetImage, resImage, exclude_words=None):
sql = 'select {} from movie'.format(field)
data = querys(sql, [], 'select')
text = ''
for i in data:
if i[0] is not None:
text = text + i[0]
# 分词
cut = jieba.cut(text)
string = ' '.join(cut)
# 排除指定关键字
if exclude_words:
for word in exclude_words:
string = re.sub(word, '', string)
# 过滤掉长度为1的单词
filtered_string = ' '.join(word for word in string.split() if len(word) > 1)
img = Image.open(targetImage)
img_arr = np.array(img)
wc = WordCloud(
background_color='white',
# mask=img_arr,
font_path='.\飞波正点体.otf',
)
wc.generate_from_text(filtered_string)
# 绘制图片
plt.figure(1)
plt.imshow(wc, interpolation='bilinear')
plt.axis('off')
randomInt = random.randint(1, 100000000)
plt.savefig(resImage)
# 关闭图形
plt.close()
# 调用函数,并传入要排除的关键字列表
exclude_words = []
getImageByAuthor('title', './static/img/2.png', './static/img/title_cloud.png', exclude_words)
def getCastsDataTop():
castsList = typeList('casts')
castsObj = {}
for i in castsList:
if castsObj.get(i, -1) == -1:
castsObj[i] = 1
else:
castsObj[i] = castsObj[i] + 1
castsObj = sorted(castsObj.items(), key=lambda x: x[1], reverse=True)[:100]
row = []
columns = []
for i in castsObj:
row.append(i[0])
columns.append(i[1])
# print(row,columns)
return row, columns
# 演员
# def getImageByCasts(targetImage, resImage):
# # 假设getCastsDataTop20()返回演员名单和频率列表
# castsList, castsFrequency = getCastsDataTop()
# # print(castsList, castsFrequency)
# # 将演员名单和频率结合起来,生成一个用于生成词云的文本字符串
# text = ' '.join([name + ' ' * freq for name, freq in zip(castsList, castsFrequency)])
#
# # 打开目标图片并获取其形状
# img = Image.open(targetImage)
# img_arr = np.array(img)
#
# # 创建词云对象
# wc = WordCloud(
# background_color='white',
# # mask=img_arr,
# font_path='STHUPO.TTF'
# )
#
# # 生成词云
# wc.generate_from_text(text)
#
# # 绘制词云图
# flg = plt.figure(1)
# plt.imshow(wc)
# plt.axis('off')
#
# # 保存词云图
# randomInt = random.randint(1, 100000000)
# plt.savefig(resImage)
#
# # 关闭绘图
# plt.close()
# 演员
def getImageByCasts(targetImage, resImage):
# 假设getCastsDataTop20()返回演员名单和频率列表
castsList, castsFrequency = getCastsDataTop()
# 将演员名单和频率结合起来,生成一个字典,用于生成词云
frequency_dict = {name: freq for name, freq in zip(castsList, castsFrequency)}
# 打开目标图片并获取其形状
img = Image.open(targetImage)
img_arr = np.array(img)
# 创建词云对象
wc = WordCloud(
background_color='white',
# mask=img_arr, # 使用目标图片的形状作为词云的形状
font_path='.\飞波正点体.otf',
max_font_size=100, # 设置最大字体大小
font_step=1, # 设置字体大小变化的步长
random_state=30, # 为字体大小和颜色的随机性设置一个种子
max_words=200 # 设置词云显示的最大单词数
)
# 生成词云
wc.generate_from_frequencies(frequency_dict) # 使用频率字典生成词云
# 绘制词云图
# plt.figure(figsize=(10, 8)) # 设置图形的大小
plt.imshow(wc, interpolation='bilinear')
plt.axis('off') # 不显示坐标轴
# 保存词云图
randomInt = random.randint(1, 100000000)
plt.savefig(resImage, bbox_inches=0) # 使用bbox_inches=0来确保没有额外的空白边界
# 关闭绘图
plt.close()
# 调用函数生成词云图片
getImageByCasts('./static/img/2.png', './static/img/cloud_cloud.png')
# getImageByAuthor('title', './static/img/1.jpg', './static/img/title_cloud.png')
getImageByAuthor('summary', './static/img/2.png', './static/img/summary_cloud.png')
print('生成词云图成功!')
# def getImageByComments(comments):
# text = ''
# for i in comments:
# text = text + i['content']
#
# # 分词
# cut = jieba.cut(text)
# string = ' '.join(cut)
#
# img = Image.open('./static/img/2.png')
# img_arr = np.array(img)
# wc = WordCloud(
# background_color='white',
# mask=img_arr,
# font_path='STHUPO.TTF'
# )
# wc.generate_from_text(string)
#
# # 绘制图片
# flg = plt.figure(1)
# plt.imshow(wc)
# plt.axis('off')
#
# randomInt = random.randint(1, 100000000)
# plt.savefig(f'./static/img/{randomInt}.png')
# return f'./static/img/{randomInt}.png'
#
#
# def getImageByAuthor(field, targetImage, resImage):
# sql = 'select {} from movie'.format(field)
# data = querys(sql, [], 'select')
# text = ''
# for i in data:
# text = text + i[0]
#
# # 分词
# cut = jieba.cut(text)
# string = ' '.join(cut)
#
# img = Image.open(targetImage)
# img_arr = np.array(img)
# wc = WordCloud(
# background_color='white',
# mask=img_arr,
# font_path='STHUPO.TTF'
# )
# wc.generate_from_text(string)
#
# # 绘制图片
# flg = plt.figure(1)
# plt.imshow(wc)
# plt.axis('off')
#
# randomInt = random.randint(1, 100000000)
# plt.savefig(resImage)
#
#
# getImageByAuthor('title', './static/img/2.png', './static/img/title_cloud.png')
# getImageByAuthor('summary', './static/img/2.png', './static/img/summary_cloud.png')
#
#
# def getImageByCasts(targetImage, resImage):
# castsList = typeList('casts')
# text = ''
# for i in castsList:
# text = text + i
#
# # 分词
# cut = jieba.cut(text)
# string = ' '.join(cut)
#
# img = Image.open(targetImage)
# img_arr = np.array(img)
# wc = WordCloud(
# background_color='white',
# mask=img_arr,
# font_path='STHUPO.TTF'
# )
# wc.generate_from_text(string)
#
# # 绘制图片
# flg = plt.figure(1)
# plt.imshow(wc)
# plt.axis('off')
#
# randomInt = random.randint(1, 100000000)
# plt.savefig(resImage)
#
#
# getImageByCasts('./static/img/2.png', './static/img/cloud_cloud.png')