parent
885032c2c7
commit
66e551b8c3
@ -0,0 +1,38 @@
|
|||||||
|
import pandas as pd
|
||||||
|
import numpy as np
|
||||||
|
import wordcloud
|
||||||
|
from matplotlib.image import imread
|
||||||
|
import jieba
|
||||||
|
|
||||||
|
# 定义蓝色调色板
|
||||||
|
def blue_color_func(word, font_size, position, orientation, random_state=None, **kwargs):
|
||||||
|
return "hsl(210, 100%%, %d%%)" % np.random.randint(50, 90)
|
||||||
|
|
||||||
|
# 将数据导入
|
||||||
|
#dm = pd.read_excel('All_Danmu.xlsx',sheet_name = 'Sheet1')
|
||||||
|
dm = pd.read_excel('Top8_Danmu.xlsx',sheet_name = 'Sheet1')
|
||||||
|
|
||||||
|
my_stopwords = ['我','你','他','这','个','是', '的', '了', '啊','吗','吧','就','都','不','也'] # 自定义的停用词列表
|
||||||
|
|
||||||
|
# 词云图生成
|
||||||
|
def wordcloud_generation(dm):
|
||||||
|
dm_list = dm['danmu'].dropna().astype(str).tolist()
|
||||||
|
|
||||||
|
dm_string = ' '.join(dm_list) # 弹幕字符串
|
||||||
|
dmreal_string = ' '.join(jieba.lcut(dm_string)) # 分词
|
||||||
|
img = imread("OIP.jpg")
|
||||||
|
# 词云生成
|
||||||
|
wc = wordcloud.WordCloud(
|
||||||
|
stopwords=my_stopwords,
|
||||||
|
width=1920,
|
||||||
|
height=1200,
|
||||||
|
background_color='white',
|
||||||
|
font_path='msyhl.ttc',
|
||||||
|
mask=img,
|
||||||
|
max_words=100,
|
||||||
|
color_func=blue_color_func,
|
||||||
|
).generate(dmreal_string)
|
||||||
|
wc.to_file('danmu_dwordcloud.png')
|
||||||
|
|
||||||
|
# 调用词云生成
|
||||||
|
wordcloud_generation(dm)
|
Loading…
Reference in new issue