feat:生成词云图(所有弹幕版)

main
ph3x54fsi 2 months ago
parent 6258d204d1
commit 8d22d0402e

@ -0,0 +1,61 @@
import pandas as pd
import numpy as np
import wordcloud
from matplotlib.image import imread
import jieba
import jieba.analyse as analyse
import re
# 定义蓝色调色板
def blue_color_func(word, font_size, position, orientation, random_state=None, **kwargs):
return "hsl(210, 100%%, %d%%)" % np.random.randint(50, 90)
# 归一化“哈哈哈”
def normalize_hahaha(text):
return re.sub(r'{3,}', '哈哈哈', text)
# 将数据导入
dm = pd.read_excel('All_Danmu.xlsx', sheet_name='Sheet1')
# 扩展停用词列表
my_stopwords = set(['', '', '', '', '', '', '', '', '', '', '', '', '', '不是', '', '哈哈哈',
'', '', '', '', '', '', '', '呵呵', '', '嘿嘿', '哎呀', '', '', ''])
# 词云图生成
def wordcloud_generation(dm):
dm_list = dm['danmu'].dropna().astype(str).tolist()
# 归一化处理
dm_list = [normalize_hahaha(text) for text in dm_list]
dm_string = ' '.join(dm_list) # 弹幕字符串
# 使用TF-IDF提取关键词
keywords = analyse.extract_tags(dm_string, topK=100, withWeight=False, allowPOS=())
# 去掉停用词后的关键词
keywords = [word for word in keywords if word not in my_stopwords]
# 将关键词拼接为一个字符串
dmreal_string = ' '.join(keywords)
img = imread("OIP.jpg")
# 词云生成
wc = wordcloud.WordCloud(
stopwords=my_stopwords,
width=1920,
height=1200,
background_color='white',
font_path='msyhl.ttc',
mask=img,
max_words=100,
color_func=blue_color_func,
).generate(dmreal_string)
wc.to_file('alldanmu_dwordcloud.png')
# 调用词云生成
wordcloud_generation(dm)
Loading…
Cancel
Save