You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
59 lines
2.1 KiB
59 lines
2.1 KiB
2 months ago
|
import pandas as pd
|
||
|
from PIL import Image
|
||
|
import numpy as np
|
||
|
import jieba
|
||
|
from wordcloud import WordCloud
|
||
|
import matplotlib.pyplot as plt
|
||
|
|
||
|
|
||
|
def generate_beautiful_wordcloud(file_name='ai_danmaku_statistics.xlsx', mask_image_path=None,font_path=r"C:\Users\wyk93\Desktop\苹方黑体-准-简.ttf"):
|
||
|
# 读取 Excel 文件
|
||
|
df = pd.read_excel(file_name)
|
||
|
|
||
|
# 合并所有弹幕内容为一个大字符串
|
||
|
text = ' '.join(df['弹幕内容'].dropna().astype(str))
|
||
|
|
||
|
# 使用 jieba 进行分词
|
||
|
seg_list = jieba.cut(text, cut_all=True)
|
||
|
segmented_text = " ".join(seg_list)
|
||
|
|
||
|
# # 打印合并后的文本以检查内容
|
||
|
# print("Text content:")
|
||
|
# print(text[:500]) # 打印前500个字符以检查
|
||
|
|
||
|
# 读取自定义形状图片
|
||
|
if mask_image_path:
|
||
|
mask_image = np.array(Image.open(mask_image_path).convert('L'))
|
||
|
mask_image[mask_image > 200] = 255
|
||
|
mask_image[mask_image <= 200] = 0
|
||
|
else:
|
||
|
mask_image = None
|
||
|
|
||
|
background_image = Image.open(mask_image_path)
|
||
|
|
||
|
# 创建词云对象
|
||
|
wordcloud = WordCloud(
|
||
|
background_color='white',# 透明背景
|
||
|
mask=mask_image,
|
||
|
contour_color=None,
|
||
|
contour_width=2,
|
||
|
width=2000, # 词云图的宽度
|
||
|
height=2000, # 词云图的高度
|
||
|
colormap='viridis' , # 使用 'viridis' 颜色映射
|
||
|
max_words=20000,
|
||
|
# min_font_size=10,
|
||
|
# max_font_size=100,
|
||
|
# prefer_horizontal=0.9, # 更高的水平优先级
|
||
|
font_path=font_path # 如果需要显示中文,需要指定中文字体路径
|
||
|
).generate(segmented_text) # 这里替换为你自己的文本数据
|
||
|
|
||
|
# 显示词云图
|
||
|
plt.figure(figsize=(10, 8))
|
||
|
plt.imshow(background_image, interpolation='bilinear')
|
||
|
plt.imshow(wordcloud, interpolation='bilinear', alpha=0.4) # 设置透明度
|
||
|
plt.axis('off') # 不显示坐标轴
|
||
|
plt.show()
|
||
|
|
||
|
# 调用函数生成词云
|
||
|
# 可选: 使用自定义形状的掩膜图像
|
||
|
generate_beautiful_wordcloud(mask_image_path=r"C:\Users\wyk93\Desktop\图片素材\sucai3.jpg")
|