Python/word_cloud.py

"""
    说明：写入execl，生成词云图
"""
# import pandas as pd
from wordcloud import WordCloud
import matplotlib.pyplot as plt
from collections import Counter
import openpyxl
import jieba
import re
from bullet_screen import get_bullet_screen
from bv_maker import get_bv, BV_NUM
jieba.setLogLevel(jieba.logging.INFO)

def confirm(bullet):
    keywords = ['人工智能', 'AI', '机器学习', '深度学习', '神经网络', '自动驾驶', '自然语言处理', '智能', 'ai']  # 设置AI应用关键词
    bullet = bullet.lower()
    obj = r'(?<![a-zA-Z])(?:' + '|'.join(re.escape(keyword) for keyword in keywords) + r')(?![a-zA-Z])'
    if re.search(obj, bullet):
        return True
    else:
        return False

def make_word_cloud(bullet_screen_list):
    ai_related_bullet = []
    for bullet in bullet_screen_list:
        if confirm(bullet):  # 忽略大小写
            ai_related_bullet.append(bullet)
    # print(ai_related_bullet)
    top8 =Counter(ai_related_bullet).most_common(8)
    wb = openpyxl.Workbook()
    sheet = wb.active
    sheet.title = "AI Related Bullet-Screen"
    sheet.append(["弹幕内容", "出现次数"])
    for item in top8:
        sheet.append([item[0], item[1]])
    wb.save('ai_bullet_screen.xlsx')
    text = ' '.join(ai_related_bullet)
    cut_text = ' '.join(jieba.cut(text))  # 使用jieba分词
    wordcloud = WordCloud(
        font_path='msyh.ttc',
        width=800,
        height=400,
        background_color='white',
        # colormap='cool'
    ).generate(cut_text)
    plt.figure(figsize=(10, 5))
    plt.imshow(wordcloud, interpolation='bilinear')
    plt.axis('off')
    plt.show()  #展示词云图

if __name__ == '__main__':
    bv_list = get_bv(BV_NUM)
    bullet_screen_list = get_bullet_screen(bv_list)
    make_word_cloud(bullet_screen_list)