You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

49 lines
1.5 KiB

"""
生成基于弹幕数据的词云图
"""
import pandas as pd
import numpy as np
import wordcloud
from matplotlib.image import imread
import jieba
from concurrent.futures import ThreadPoolExecutor
def blue_color_func(_random_state=None, **_kwargs):
"""Generates a color in the HSL format with a random lightness value."""
return f"hsl(210, 100%, {np.random.randint(50, 90)}%)"
def process_text(danmu_list):
"""并行处理弹幕文本,进行分词等操作"""
with ThreadPoolExecutor() as executor:
dm_string = ' '.join(executor.map(lambda text: ' '.join(jieba.lcut(text)), danmu_list))
return dm_string
def wordcloud_generation(danmu_data, stopwords, output_path):
"""生成词云图并保存"""
dm_list = danmu_data['danmu'].dropna().astype(str).tolist()
dmreal_string = process_text(dm_list)
img = imread("/output/OIP.jpg")
wc = wordcloud.WordCloud(
stopwords=stopwords,
width=1920,
height=1200,
background_color='white',
font_path='msyhl.ttc',
mask=img,
max_words=100,
color_func=blue_color_func,
).generate(dmreal_string)
wc.to_file(output_path)
def main():
"""加载数据并生成词云"""
dm = pd.read_excel('E:/Crawler/output/Top8_Danmu.xlsx', sheet_name='Sheet1')
stopwords = {'', '', '', '', '', '', '', '', '', '', '', '', '', '', ''}
wordcloud_generation(dm, stopwords, '/output/danmu_dwordcloud.png')
print("词云图生成完成!")
if __name__ == '__main__':
main()