|
|
'''
|
|
|
弹幕情感分析
|
|
|
利用自然语言处理模型进行弹幕的情感分析,如果没有对应的模型,则会自动下载模型(450MB)
|
|
|
模型分析速度较慢,文本量大,可以酌情考虑减少一些弹幕数量,当前程序仅选取了1000条弹幕
|
|
|
如果弹幕情感难以分析或是有敏感词,则会跳过当前弹幕
|
|
|
|
|
|
sep是弹幕文本的分隔标志
|
|
|
filePath是弹幕文本的路径
|
|
|
schema是模型数据获取的键值
|
|
|
model是模型文件
|
|
|
savePath是柱形图保存路径
|
|
|
|
|
|
ps.柱形图的可能性是指情感识别为真的平均可能性,不是弹幕情感为正向或负向的可能性
|
|
|
'''
|
|
|
|
|
|
from paddlenlp import Taskflow
|
|
|
import matplotlib.pyplot as plt
|
|
|
import random
|
|
|
|
|
|
# 加载弹幕字符文本
|
|
|
def loadText(sep, filePath):
|
|
|
with open(filePath, 'r', encoding='utf-8') as file:
|
|
|
text = file.read()
|
|
|
t_list = text.split(sep)
|
|
|
return t_list
|
|
|
|
|
|
# 加载自然语言处理模型
|
|
|
def loadModel(schema, model):
|
|
|
ie = Taskflow('information_extraction', schema=schema, model=model)
|
|
|
return ie
|
|
|
|
|
|
# 计算情感方向的数量以及平均的可能性
|
|
|
def emoChange(emo, pro, count, probability):
|
|
|
if emo == '正向':
|
|
|
count[0] += 1
|
|
|
probability[0] = probability[0] + (pro - probability[0])/count[0]
|
|
|
else:
|
|
|
count[1] += 1
|
|
|
probability[1] = probability[1] + (pro - probability[1])/count[1]
|
|
|
|
|
|
# 绘制柱形图
|
|
|
def createBar(count, probability, savePath):
|
|
|
x_data = [f'正向(可能性:{probability[0]})', f'负向(可能性:{probability[1]})']
|
|
|
y_data = count
|
|
|
plt.rcParams["font.sans-serif"] = ["SimHei"]
|
|
|
plt.rcParams["axes.unicode_minus"] = False
|
|
|
plt.figure(figsize=(10, 7))
|
|
|
for i in range(len(x_data)):
|
|
|
plt.bar(x_data[i], y_data[i], width=0.7)
|
|
|
plt.title("弹幕情感方向数量统计")
|
|
|
plt.text(x_data[0], y_data[0]+0.01, count[0], ha="center", va="bottom", fontsize=17)
|
|
|
plt.text(x_data[1], y_data[1]+0.01, count[1], ha="center", va="bottom", fontsize=17)
|
|
|
plt.xlabel("弹幕情感方向")
|
|
|
plt.ylabel("数量")
|
|
|
plt.savefig(fname=savePath, dpi=500)
|
|
|
plt.show()
|
|
|
|
|
|
|
|
|
def main():
|
|
|
sep = ','
|
|
|
filePath = './docs/allBarrage.txt'
|
|
|
schema = '情感倾向[正向,负向]'
|
|
|
model = 'uie-base'
|
|
|
savePath = './docs/emoImg.png'
|
|
|
|
|
|
t_list = loadText(sep, filePath)
|
|
|
ie = loadModel(schema, model)
|
|
|
|
|
|
count = [0, 0]
|
|
|
probability = [0, 0]
|
|
|
|
|
|
for i in range(1000):
|
|
|
if i%100 ==0:
|
|
|
print(f'当前正在处理第{i}条弹幕')
|
|
|
num = random.randint(0, 20000)
|
|
|
if schema not in ie(t_list[num])[0]:
|
|
|
continue
|
|
|
emo = ie(t_list[num])[0][schema][0]['text']
|
|
|
pro = ie(t_list[num])[0][schema][0]['probability']
|
|
|
emoChange(emo, pro, count, probability)
|
|
|
|
|
|
createBar(count, probability, savePath)
|
|
|
|
|
|
print(count)
|
|
|
print(probability)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
main()
|