You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

56 lines
1.8 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

"""
说明写入execl生成词云图
"""
# import pandas as pd
from wordcloud import WordCloud
import matplotlib.pyplot as plt
from collections import Counter
import openpyxl
import jieba
import re
from bullet_screen import get_bullet_screen
from bv_maker import get_bv, BV_NUM
jieba.setLogLevel(jieba.logging.INFO)
def confirm(bullet):
keywords = ['人工智能', 'AI', '机器学习', '深度学习', '神经网络', '自动驾驶', '自然语言处理', '智能', 'ai'] # 设置AI应用关键词
bullet = bullet.lower()
obj = r'(?<![a-zA-Z])(?:' + '|'.join(re.escape(keyword) for keyword in keywords) + r')(?![a-zA-Z])'
if re.search(obj, bullet):
return True
else:
return False
def make_word_cloud(bullet_screen_list):
ai_related_bullet = []
for bullet in bullet_screen_list:
if confirm(bullet): # 忽略大小写
ai_related_bullet.append(bullet)
# print(ai_related_bullet)
top8 =Counter(ai_related_bullet).most_common(8)
wb = openpyxl.Workbook()
sheet = wb.active
sheet.title = "AI Related Bullet-Screen"
sheet.append(["弹幕内容", "出现次数"])
for item in top8:
sheet.append([item[0], item[1]])
wb.save('ai_bullet_screen.xlsx')
text = ' '.join(ai_related_bullet)
cut_text = ' '.join(jieba.cut(text)) # 使用jieba分词
wordcloud = WordCloud(
font_path='msyh.ttc',
width=800,
height=400,
background_color='white',
# colormap='cool'
).generate(cut_text)
plt.figure(figsize=(10, 5))
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis('off')
plt.show() #展示词云图
if __name__ == '__main__':
bv_list = get_bv(BV_NUM)
bullet_screen_list = get_bullet_screen(bv_list)
make_word_cloud(bullet_screen_list)