You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

68 lines
2.2 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

"""
说明写入execl生成词云图
"""
# import pandas as pd
from wordcloud import WordCloud
import matplotlib.pyplot as plt
from collections import Counter
import openpyxl
import jieba
import re
from bullet_screen import get_bullet_screen
from bv_maker import get_bv, BV_NUM
jieba.setLogLevel(jieba.logging.INFO) #去除warning
def confirm(bullet):
"""
确认是否与AI应用相关
:param bullet:
:return:True/False
"""
keywords = ['人工智能', 'AI', '机器学习', '深度学习', '神经网络', '自动驾驶', '自然语言处理', '智能', 'ai'] # 设置AI应用关键词
bullet = bullet.lower()
obj = r'(?<![a-zA-Z])(?:' + '|'.join(re.escape(keyword) for keyword in keywords) + r')(?![a-zA-Z])'
if re.search(obj, bullet):
return True
else:
return False
def make_word_cloud(bullet_screen_list):
"""
将AI应用相关弹幕写入execcl并完成词云图展示
:param bullet_screen_list:
:return:
"""
ai_related_bullet = []
for bullet in bullet_screen_list:
if confirm(bullet):
ai_related_bullet.append(bullet) #经确认后添加到ai相关弹幕列表
# print(ai_related_bullet)
top8 =Counter(ai_related_bullet).most_common(8) #找到数量前八的弹幕
print(top8) #展示数量前8弹幕
#使用openpyxl将前八的弹幕写入execl表
wb = openpyxl.Workbook()
sheet = wb.active
sheet.title = "AI Related Bullet-Screen"
sheet.append(["弹幕内容", "出现次数"])
for item in top8:
sheet.append([item[0], item[1]])
wb.save('ai_bullet_screen示例.xlsx')
text = ' '.join(ai_related_bullet)
cut_text = ' '.join(jieba.cut(text)) # 使用jieba分词
wordcloud = WordCloud( #词云图设置
font_path='msyh.ttc',
width=800,
height=400,
background_color='white',
colormap='cool'
).generate(cut_text)
plt.figure(figsize=(10, 5))
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis('off')
plt.show() #展示词云图
if __name__ == '__main__':
bv_list = get_bv(BV_NUM)
bullet_screen_list = get_bullet_screen(bv_list)
make_word_cloud(bullet_screen_list)