|
|
|
@ -0,0 +1,55 @@
|
|
|
|
|
"""
|
|
|
|
|
说明:写入execl,生成词云图
|
|
|
|
|
"""
|
|
|
|
|
# import pandas as pd
|
|
|
|
|
from wordcloud import WordCloud
|
|
|
|
|
import matplotlib.pyplot as plt
|
|
|
|
|
from collections import Counter
|
|
|
|
|
import openpyxl
|
|
|
|
|
import jieba
|
|
|
|
|
import re
|
|
|
|
|
from bullet_screen import get_bullet_screen
|
|
|
|
|
from bv_maker import get_bv, BV_NUM
|
|
|
|
|
jieba.setLogLevel(jieba.logging.INFO)
|
|
|
|
|
|
|
|
|
|
def confirm(bullet):
|
|
|
|
|
keywords = ['人工智能', 'AI', '机器学习', '深度学习', '神经网络', '自动驾驶', '自然语言处理', '智能', 'ai'] # 设置AI应用关键词
|
|
|
|
|
bullet = bullet.lower()
|
|
|
|
|
obj = r'(?<![a-zA-Z])(?:' + '|'.join(re.escape(keyword) for keyword in keywords) + r')(?![a-zA-Z])'
|
|
|
|
|
if re.search(obj, bullet):
|
|
|
|
|
return True
|
|
|
|
|
else:
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
def make_word_cloud(bullet_screen_list):
|
|
|
|
|
ai_related_bullet = []
|
|
|
|
|
for bullet in bullet_screen_list:
|
|
|
|
|
if confirm(bullet): # 忽略大小写
|
|
|
|
|
ai_related_bullet.append(bullet)
|
|
|
|
|
# print(ai_related_bullet)
|
|
|
|
|
top8 =Counter(ai_related_bullet).most_common(8)
|
|
|
|
|
wb = openpyxl.Workbook()
|
|
|
|
|
sheet = wb.active
|
|
|
|
|
sheet.title = "AI Related Danmakus"
|
|
|
|
|
sheet.append(["弹幕内容", "出现次数"])
|
|
|
|
|
for item in top8:
|
|
|
|
|
sheet.append([item[0], item[1]])
|
|
|
|
|
wb.save('ai_bullet_screen.xlsx')
|
|
|
|
|
text = ' '.join(ai_related_bullet)
|
|
|
|
|
cut_text = ' '.join(jieba.cut(text)) # 使用jieba分词
|
|
|
|
|
wordcloud = WordCloud(
|
|
|
|
|
font_path='msyh.ttc',
|
|
|
|
|
width=800,
|
|
|
|
|
height=400,
|
|
|
|
|
background_color='white',
|
|
|
|
|
# colormap='cool'
|
|
|
|
|
).generate(cut_text)
|
|
|
|
|
plt.figure(figsize=(10, 5))
|
|
|
|
|
plt.imshow(wordcloud, interpolation='bilinear')
|
|
|
|
|
plt.axis('off')
|
|
|
|
|
plt.show() #展示词云图
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
|
bv_list = get_bv(BV_NUM)
|
|
|
|
|
bullet_screen_list = get_bullet_screen(bv_list)
|
|
|
|
|
make_word_cloud(bullet_screen_list)
|