From 66e551b8c3d716485dc04d42c962ccaba0070e48 Mon Sep 17 00:00:00 2001 From: ph3x54fsi <431792974@qq.com> Date: Tue, 17 Sep 2024 23:37:59 +0800 Subject: [PATCH] =?UTF-8?q?feat=EF=BC=9A=E7=94=9F=E6=88=90=E8=AF=8D?= =?UTF-8?q?=E4=BA=91=E5=9B=BE=EF=BC=88ai=E5=85=B3=E9=94=AE=E8=AF=8D?= =?UTF-8?q?=E7=89=88=EF=BC=89?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- a_wordcloud.py | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) create mode 100644 a_wordcloud.py diff --git a/a_wordcloud.py b/a_wordcloud.py new file mode 100644 index 0000000..69bce02 --- /dev/null +++ b/a_wordcloud.py @@ -0,0 +1,38 @@ +import pandas as pd +import numpy as np +import wordcloud +from matplotlib.image import imread +import jieba + +# 定义蓝色调色板 +def blue_color_func(word, font_size, position, orientation, random_state=None, **kwargs): + return "hsl(210, 100%%, %d%%)" % np.random.randint(50, 90) + +# 将数据导入 +#dm = pd.read_excel('All_Danmu.xlsx',sheet_name = 'Sheet1') +dm = pd.read_excel('Top8_Danmu.xlsx',sheet_name = 'Sheet1') + +my_stopwords = ['我','你','他','这','个','是', '的', '了', '啊','吗','吧','就','都','不','也'] # 自定义的停用词列表 + +# 词云图生成 +def wordcloud_generation(dm): + dm_list = dm['danmu'].dropna().astype(str).tolist() + + dm_string = ' '.join(dm_list) # 弹幕字符串 + dmreal_string = ' '.join(jieba.lcut(dm_string)) # 分词 + img = imread("OIP.jpg") + # 词云生成 + wc = wordcloud.WordCloud( + stopwords=my_stopwords, + width=1920, + height=1200, + background_color='white', + font_path='msyhl.ttc', + mask=img, + max_words=100, + color_func=blue_color_func, + ).generate(dmreal_string) + wc.to_file('danmu_dwordcloud.png') + +# 调用词云生成 +wordcloud_generation(dm) \ No newline at end of file