You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
292 lines
7.7 KiB
292 lines
7.7 KiB
6 months ago
|
import jieba
|
||
|
from PIL import Image
|
||
|
import numpy as np
|
||
|
from wordcloud import WordCloud
|
||
|
import random
|
||
|
from utils.query import querys
|
||
|
from utils.utils import typeList
|
||
|
import matplotlib
|
||
|
import re
|
||
|
|
||
|
matplotlib.use('Agg') # 使用非交互式后端
|
||
|
import matplotlib.pyplot as plt
|
||
|
|
||
|
|
||
|
# 评论
|
||
|
def getImageByComments(comments):
|
||
|
# 调用函数,并传入要排除的关键字列表
|
||
|
exclude_words_pl = []
|
||
|
text = ''
|
||
|
for i in comments:
|
||
|
text = text + i['content']
|
||
|
|
||
|
# 分词
|
||
|
cut = jieba.cut(text)
|
||
|
string = ' '.join(cut)
|
||
|
|
||
|
# 排除指定关键字
|
||
|
if exclude_words_pl:
|
||
|
for word in exclude_words_pl:
|
||
|
string = re.sub(word, '', string)
|
||
|
|
||
|
# 过滤掉长度为1的单词
|
||
|
filtered_string = ' '.join(word for word in string.split() if len(word) > 1)
|
||
|
|
||
|
img = Image.open('./static/img/2.png')
|
||
|
img_arr = np.array(img)
|
||
|
wc = WordCloud(
|
||
|
background_color='white',
|
||
|
# mask=img_arr,
|
||
|
# font_path=r'C:\Windows\Fonts\simsun.ttc',
|
||
|
font_path='.\飞波正点体.otf',
|
||
|
)
|
||
|
wc.generate_from_text(filtered_string)
|
||
|
|
||
|
# 绘制图片
|
||
|
flg = plt.figure(1)
|
||
|
plt.imshow(wc)
|
||
|
plt.axis('off')
|
||
|
|
||
|
randomInt = random.randint(1, 100000000)
|
||
|
plt.savefig(f'./static/img/{randomInt}.png')
|
||
|
# 关闭图形
|
||
|
plt.close()
|
||
|
return f'./static/img/{randomInt}.png'
|
||
|
|
||
|
|
||
|
# 标题
|
||
|
def getImageByAuthor(field, targetImage, resImage, exclude_words=None):
|
||
|
sql = 'select {} from movie'.format(field)
|
||
|
data = querys(sql, [], 'select')
|
||
|
text = ''
|
||
|
for i in data:
|
||
|
if i[0] is not None:
|
||
|
text = text + i[0]
|
||
|
|
||
|
# 分词
|
||
|
cut = jieba.cut(text)
|
||
|
string = ' '.join(cut)
|
||
|
|
||
|
# 排除指定关键字
|
||
|
if exclude_words:
|
||
|
for word in exclude_words:
|
||
|
string = re.sub(word, '', string)
|
||
|
|
||
|
# 过滤掉长度为1的单词
|
||
|
filtered_string = ' '.join(word for word in string.split() if len(word) > 1)
|
||
|
|
||
|
img = Image.open(targetImage)
|
||
|
img_arr = np.array(img)
|
||
|
wc = WordCloud(
|
||
|
background_color='white',
|
||
|
# mask=img_arr,
|
||
|
font_path='.\飞波正点体.otf',
|
||
|
)
|
||
|
wc.generate_from_text(filtered_string)
|
||
|
|
||
|
# 绘制图片
|
||
|
plt.figure(1)
|
||
|
plt.imshow(wc, interpolation='bilinear')
|
||
|
plt.axis('off')
|
||
|
|
||
|
randomInt = random.randint(1, 100000000)
|
||
|
plt.savefig(resImage)
|
||
|
# 关闭图形
|
||
|
plt.close()
|
||
|
|
||
|
|
||
|
# 调用函数,并传入要排除的关键字列表
|
||
|
exclude_words = []
|
||
|
getImageByAuthor('title', './static/img/2.png', './static/img/title_cloud.png', exclude_words)
|
||
|
|
||
|
|
||
|
def getCastsDataTop():
|
||
|
castsList = typeList('casts')
|
||
|
castsObj = {}
|
||
|
for i in castsList:
|
||
|
if castsObj.get(i, -1) == -1:
|
||
|
castsObj[i] = 1
|
||
|
else:
|
||
|
castsObj[i] = castsObj[i] + 1
|
||
|
castsObj = sorted(castsObj.items(), key=lambda x: x[1], reverse=True)[:100]
|
||
|
row = []
|
||
|
columns = []
|
||
|
for i in castsObj:
|
||
|
row.append(i[0])
|
||
|
columns.append(i[1])
|
||
|
# print(row,columns)
|
||
|
return row, columns
|
||
|
|
||
|
|
||
|
# 演员
|
||
|
# def getImageByCasts(targetImage, resImage):
|
||
|
# # 假设getCastsDataTop20()返回演员名单和频率列表
|
||
|
# castsList, castsFrequency = getCastsDataTop()
|
||
|
# # print(castsList, castsFrequency)
|
||
|
# # 将演员名单和频率结合起来,生成一个用于生成词云的文本字符串
|
||
|
# text = ' '.join([name + ' ' * freq for name, freq in zip(castsList, castsFrequency)])
|
||
|
#
|
||
|
# # 打开目标图片并获取其形状
|
||
|
# img = Image.open(targetImage)
|
||
|
# img_arr = np.array(img)
|
||
|
#
|
||
|
# # 创建词云对象
|
||
|
# wc = WordCloud(
|
||
|
# background_color='white',
|
||
|
# # mask=img_arr,
|
||
|
# font_path='STHUPO.TTF'
|
||
|
# )
|
||
|
#
|
||
|
# # 生成词云
|
||
|
# wc.generate_from_text(text)
|
||
|
#
|
||
|
# # 绘制词云图
|
||
|
# flg = plt.figure(1)
|
||
|
# plt.imshow(wc)
|
||
|
# plt.axis('off')
|
||
|
#
|
||
|
# # 保存词云图
|
||
|
# randomInt = random.randint(1, 100000000)
|
||
|
# plt.savefig(resImage)
|
||
|
#
|
||
|
# # 关闭绘图
|
||
|
# plt.close()
|
||
|
|
||
|
# 演员
|
||
|
def getImageByCasts(targetImage, resImage):
|
||
|
# 假设getCastsDataTop20()返回演员名单和频率列表
|
||
|
castsList, castsFrequency = getCastsDataTop()
|
||
|
|
||
|
# 将演员名单和频率结合起来,生成一个字典,用于生成词云
|
||
|
frequency_dict = {name: freq for name, freq in zip(castsList, castsFrequency)}
|
||
|
|
||
|
# 打开目标图片并获取其形状
|
||
|
img = Image.open(targetImage)
|
||
|
img_arr = np.array(img)
|
||
|
|
||
|
# 创建词云对象
|
||
|
wc = WordCloud(
|
||
|
background_color='white',
|
||
|
# mask=img_arr, # 使用目标图片的形状作为词云的形状
|
||
|
font_path='.\飞波正点体.otf',
|
||
|
max_font_size=100, # 设置最大字体大小
|
||
|
font_step=1, # 设置字体大小变化的步长
|
||
|
random_state=30, # 为字体大小和颜色的随机性设置一个种子
|
||
|
max_words=200 # 设置词云显示的最大单词数
|
||
|
)
|
||
|
|
||
|
# 生成词云
|
||
|
wc.generate_from_frequencies(frequency_dict) # 使用频率字典生成词云
|
||
|
|
||
|
# 绘制词云图
|
||
|
# plt.figure(figsize=(10, 8)) # 设置图形的大小
|
||
|
plt.imshow(wc, interpolation='bilinear')
|
||
|
plt.axis('off') # 不显示坐标轴
|
||
|
|
||
|
# 保存词云图
|
||
|
randomInt = random.randint(1, 100000000)
|
||
|
plt.savefig(resImage, bbox_inches=0) # 使用bbox_inches=0来确保没有额外的空白边界
|
||
|
|
||
|
# 关闭绘图
|
||
|
plt.close()
|
||
|
|
||
|
|
||
|
# 调用函数生成词云图片
|
||
|
getImageByCasts('./static/img/2.png', './static/img/cloud_cloud.png')
|
||
|
|
||
|
# getImageByAuthor('title', './static/img/1.jpg', './static/img/title_cloud.png')
|
||
|
getImageByAuthor('summary', './static/img/2.png', './static/img/summary_cloud.png')
|
||
|
|
||
|
print('生成词云图成功!')
|
||
|
|
||
|
# def getImageByComments(comments):
|
||
|
# text = ''
|
||
|
# for i in comments:
|
||
|
# text = text + i['content']
|
||
|
#
|
||
|
# # 分词
|
||
|
# cut = jieba.cut(text)
|
||
|
# string = ' '.join(cut)
|
||
|
#
|
||
|
# img = Image.open('./static/img/2.png')
|
||
|
# img_arr = np.array(img)
|
||
|
# wc = WordCloud(
|
||
|
# background_color='white',
|
||
|
# mask=img_arr,
|
||
|
# font_path='STHUPO.TTF'
|
||
|
# )
|
||
|
# wc.generate_from_text(string)
|
||
|
#
|
||
|
# # 绘制图片
|
||
|
# flg = plt.figure(1)
|
||
|
# plt.imshow(wc)
|
||
|
# plt.axis('off')
|
||
|
#
|
||
|
# randomInt = random.randint(1, 100000000)
|
||
|
# plt.savefig(f'./static/img/{randomInt}.png')
|
||
|
# return f'./static/img/{randomInt}.png'
|
||
|
#
|
||
|
#
|
||
|
# def getImageByAuthor(field, targetImage, resImage):
|
||
|
# sql = 'select {} from movie'.format(field)
|
||
|
# data = querys(sql, [], 'select')
|
||
|
# text = ''
|
||
|
# for i in data:
|
||
|
# text = text + i[0]
|
||
|
#
|
||
|
# # 分词
|
||
|
# cut = jieba.cut(text)
|
||
|
# string = ' '.join(cut)
|
||
|
#
|
||
|
# img = Image.open(targetImage)
|
||
|
# img_arr = np.array(img)
|
||
|
# wc = WordCloud(
|
||
|
# background_color='white',
|
||
|
# mask=img_arr,
|
||
|
# font_path='STHUPO.TTF'
|
||
|
# )
|
||
|
# wc.generate_from_text(string)
|
||
|
#
|
||
|
# # 绘制图片
|
||
|
# flg = plt.figure(1)
|
||
|
# plt.imshow(wc)
|
||
|
# plt.axis('off')
|
||
|
#
|
||
|
# randomInt = random.randint(1, 100000000)
|
||
|
# plt.savefig(resImage)
|
||
|
#
|
||
|
#
|
||
|
# getImageByAuthor('title', './static/img/2.png', './static/img/title_cloud.png')
|
||
|
# getImageByAuthor('summary', './static/img/2.png', './static/img/summary_cloud.png')
|
||
|
#
|
||
|
#
|
||
|
# def getImageByCasts(targetImage, resImage):
|
||
|
# castsList = typeList('casts')
|
||
|
# text = ''
|
||
|
# for i in castsList:
|
||
|
# text = text + i
|
||
|
#
|
||
|
# # 分词
|
||
|
# cut = jieba.cut(text)
|
||
|
# string = ' '.join(cut)
|
||
|
#
|
||
|
# img = Image.open(targetImage)
|
||
|
# img_arr = np.array(img)
|
||
|
# wc = WordCloud(
|
||
|
# background_color='white',
|
||
|
# mask=img_arr,
|
||
|
# font_path='STHUPO.TTF'
|
||
|
# )
|
||
|
# wc.generate_from_text(string)
|
||
|
#
|
||
|
# # 绘制图片
|
||
|
# flg = plt.figure(1)
|
||
|
# plt.imshow(wc)
|
||
|
# plt.axis('off')
|
||
|
#
|
||
|
# randomInt = random.randint(1, 100000000)
|
||
|
# plt.savefig(resImage)
|
||
|
#
|
||
|
#
|
||
|
# getImageByCasts('./static/img/2.png', './static/img/cloud_cloud.png')
|