You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

59 lines
2.1 KiB

import pandas as pd
from PIL import Image
import numpy as np
import jieba
from wordcloud import WordCloud
import matplotlib.pyplot as plt
def generate_beautiful_wordcloud(file_name='ai_danmaku_statistics.xlsx', mask_image_path=None,font_path=r"C:\Users\wyk93\Desktop\苹方黑体-准-简.ttf"):
# 读取 Excel 文件
df = pd.read_excel(file_name)
# 合并所有弹幕内容为一个大字符串
text = ' '.join(df['弹幕内容'].dropna().astype(str))
# 使用 jieba 进行分词
seg_list = jieba.cut(text, cut_all=True)
segmented_text = " ".join(seg_list)
# # 打印合并后的文本以检查内容
# print("Text content:")
# print(text[:500]) # 打印前500个字符以检查
# 读取自定义形状图片
if mask_image_path:
mask_image = np.array(Image.open(mask_image_path).convert('L'))
mask_image[mask_image > 200] = 255
mask_image[mask_image <= 200] = 0
else:
mask_image = None
background_image = Image.open(mask_image_path)
# 创建词云对象
wordcloud = WordCloud(
background_color='white',# 透明背景
mask=mask_image,
contour_color=None,
contour_width=2,
width=2000, # 词云图的宽度
height=2000, # 词云图的高度
colormap='viridis' , # 使用 'viridis' 颜色映射
max_words=20000,
# min_font_size=10,
# max_font_size=100,
# prefer_horizontal=0.9, # 更高的水平优先级
font_path=font_path # 如果需要显示中文,需要指定中文字体路径
).generate(segmented_text) # 这里替换为你自己的文本数据
# 显示词云图
plt.figure(figsize=(10, 8))
plt.imshow(background_image, interpolation='bilinear')
plt.imshow(wordcloud, interpolation='bilinear', alpha=0.4) # 设置透明度
plt.axis('off') # 不显示坐标轴
plt.show()
# 调用函数生成词云
# 可选: 使用自定义形状的掩膜图像
generate_beautiful_wordcloud(mask_image_path=r"C:\Users\wyk93\Desktop\图片素材\sucai3.jpg")