diff --git a/wordcloud1.py b/wordcloud1.py new file mode 100644 index 0000000..ede601e --- /dev/null +++ b/wordcloud1.py @@ -0,0 +1,59 @@ +import pandas as pd +from PIL import Image +import numpy as np +import jieba +from wordcloud import WordCloud +import matplotlib.pyplot as plt + + +def generate_beautiful_wordcloud(file_name='ai_danmaku_statistics.xlsx', mask_image_path=None,font_path=r"C:\Users\wyk93\Desktop\苹方黑体-准-简.ttf"): + # 读取 Excel 文件 + df = pd.read_excel(file_name) + + # 合并所有弹幕内容为一个大字符串 + text = ' '.join(df['弹幕内容'].dropna().astype(str)) + + # 使用 jieba 进行分词 + seg_list = jieba.cut(text, cut_all=True) + segmented_text = " ".join(seg_list) + + # # 打印合并后的文本以检查内容 + # print("Text content:") + # print(text[:500]) # 打印前500个字符以检查 + + # 读取自定义形状图片 + if mask_image_path: + mask_image = np.array(Image.open(mask_image_path).convert('L')) + mask_image[mask_image > 200] = 255 + mask_image[mask_image <= 200] = 0 + else: + mask_image = None + + background_image = Image.open(mask_image_path) + + # 创建词云对象 + wordcloud = WordCloud( + background_color='white',# 透明背景 + mask=mask_image, + contour_color=None, + contour_width=2, + width=2000, # 词云图的宽度 + height=2000, # 词云图的高度 + colormap='viridis' , # 使用 'viridis' 颜色映射 + max_words=20000, + # min_font_size=10, + # max_font_size=100, + # prefer_horizontal=0.9, # 更高的水平优先级 + font_path=font_path # 如果需要显示中文,需要指定中文字体路径 + ).generate(segmented_text) # 这里替换为你自己的文本数据 + + # 显示词云图 + plt.figure(figsize=(10, 8)) + plt.imshow(background_image, interpolation='bilinear') + plt.imshow(wordcloud, interpolation='bilinear', alpha=0.4) # 设置透明度 + plt.axis('off') # 不显示坐标轴 + plt.show() + +# 调用函数生成词云 +# 可选: 使用自定义形状的掩膜图像 +generate_beautiful_wordcloud(mask_image_path=r"C:\Users\wyk93\Desktop\图片素材\sucai3.jpg") \ No newline at end of file