You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

68 lines
2.2 KiB

"""
说明写入execl生成词云图
"""
# import pandas as pd
from wordcloud import WordCloud
import matplotlib.pyplot as plt
from collections import Counter
import openpyxl
import jieba
import re
from bullet_screen import get_bullet_screen
from bv_maker import get_bv, BV_NUM
jieba.setLogLevel(jieba.logging.INFO) #去除warning
def confirm(bullet):
"""
确认是否与AI应用相关
:param bullet:
:return:True/False
"""
keywords = ['人工智能', 'AI', '机器学习', '深度学习', '神经网络', '自动驾驶', '自然语言处理', '智能', 'ai'] # 设置AI应用关键词
bullet = bullet.lower()
obj = r'(?<![a-zA-Z])(?:' + '|'.join(re.escape(keyword) for keyword in keywords) + r')(?![a-zA-Z])'
if re.search(obj, bullet):
return True
else:
return False
def make_word_cloud(bullet_screen_list):
"""
将AI应用相关弹幕写入execcl并完成词云图展示
:param bullet_screen_list:
:return:
"""
ai_related_bullet = []
for bullet in bullet_screen_list:
if confirm(bullet):
ai_related_bullet.append(bullet) #经确认后添加到ai相关弹幕列表
# print(ai_related_bullet)
top8 =Counter(ai_related_bullet).most_common(8) #找到数量前八的弹幕
print(top8) #展示数量前8弹幕
#使用openpyxl将前八的弹幕写入execl表
wb = openpyxl.Workbook()
sheet = wb.active
sheet.title = "AI Related Bullet-Screen"
sheet.append(["弹幕内容", "出现次数"])
for item in top8:
sheet.append([item[0], item[1]])
wb.save('ai_bullet_screen示例.xlsx')
text = ' '.join(ai_related_bullet)
cut_text = ' '.join(jieba.cut(text)) # 使用jieba分词
wordcloud = WordCloud( #词云图设置
font_path='msyh.ttc',
width=800,
height=400,
background_color='white',
colormap='cool'
).generate(cut_text)
plt.figure(figsize=(10, 5))
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis('off')
plt.show() #展示词云图
if __name__ == '__main__':
bv_list = get_bv(BV_NUM)
bullet_screen_list = get_bullet_screen(bv_list)
make_word_cloud(bullet_screen_list)