You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

56 lines
1.8 KiB

"""
说明写入execl生成词云图
"""
# import pandas as pd
from wordcloud import WordCloud
import matplotlib.pyplot as plt
from collections import Counter
import openpyxl
import jieba
import re
from bullet_screen import get_bullet_screen
from bv_maker import get_bv, BV_NUM
jieba.setLogLevel(jieba.logging.INFO)
def confirm(bullet):
keywords = ['人工智能', 'AI', '机器学习', '深度学习', '神经网络', '自动驾驶', '自然语言处理', '智能', 'ai'] # 设置AI应用关键词
bullet = bullet.lower()
obj = r'(?<![a-zA-Z])(?:' + '|'.join(re.escape(keyword) for keyword in keywords) + r')(?![a-zA-Z])'
if re.search(obj, bullet):
return True
else:
return False
def make_word_cloud(bullet_screen_list):
ai_related_bullet = []
for bullet in bullet_screen_list:
if confirm(bullet): # 忽略大小写
ai_related_bullet.append(bullet)
# print(ai_related_bullet)
top8 =Counter(ai_related_bullet).most_common(8)
wb = openpyxl.Workbook()
sheet = wb.active
sheet.title = "AI Related Bullet-Screen"
sheet.append(["弹幕内容", "出现次数"])
for item in top8:
sheet.append([item[0], item[1]])
wb.save('ai_bullet_screen.xlsx')
text = ' '.join(ai_related_bullet)
cut_text = ' '.join(jieba.cut(text)) # 使用jieba分词
wordcloud = WordCloud(
font_path='msyh.ttc',
width=800,
height=400,
background_color='white',
# colormap='cool'
).generate(cut_text)
plt.figure(figsize=(10, 5))
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis('off')
plt.show() #展示词云图
if __name__ == '__main__':
bv_list = get_bv(BV_NUM)
bullet_screen_list = get_bullet_screen(bv_list)
make_word_cloud(bullet_screen_list)