''' 弹幕情感分析 利用自然语言处理模型进行弹幕的情感分析,如果没有对应的模型,则会自动下载模型(450MB) 模型分析速度较慢,文本量大,可以酌情考虑减少一些弹幕数量,当前程序仅选取了1000条弹幕 如果弹幕情感难以分析或是有敏感词,则会跳过当前弹幕 sep是弹幕文本的分隔标志 filePath是弹幕文本的路径 schema是模型数据获取的键值 model是模型文件 savePath是柱形图保存路径 ps.柱形图的可能性是指情感识别为真的平均可能性,不是弹幕情感为正向或负向的可能性 ''' from paddlenlp import Taskflow import matplotlib.pyplot as plt import random # 加载弹幕字符文本 def loadText(sep, filePath): with open(filePath, 'r', encoding='utf-8') as file: text = file.read() t_list = text.split(sep) return t_list # 加载自然语言处理模型 def loadModel(schema, model): ie = Taskflow('information_extraction', schema=schema, model=model) return ie # 计算情感方向的数量以及平均的可能性 def emoChange(emo, pro, count, probability): if emo == '正向': count[0] += 1 probability[0] = probability[0] + (pro - probability[0])/count[0] else: count[1] += 1 probability[1] = probability[1] + (pro - probability[1])/count[1] # 绘制柱形图 def createBar(count, probability, savePath): x_data = [f'正向(可能性:{probability[0]})', f'负向(可能性:{probability[1]})'] y_data = count plt.rcParams["font.sans-serif"] = ["SimHei"] plt.rcParams["axes.unicode_minus"] = False plt.figure(figsize=(10, 7)) for i in range(len(x_data)): plt.bar(x_data[i], y_data[i], width=0.7) plt.title("弹幕情感方向数量统计") plt.text(x_data[0], y_data[0]+0.01, count[0], ha="center", va="bottom", fontsize=17) plt.text(x_data[1], y_data[1]+0.01, count[1], ha="center", va="bottom", fontsize=17) plt.xlabel("弹幕情感方向") plt.ylabel("数量") plt.savefig(fname=savePath, dpi=500) plt.show() def main(): sep = ',' filePath = './docs/allBarrage.txt' schema = '情感倾向[正向,负向]' model = 'uie-base' savePath = './docs/emoImg.png' t_list = loadText(sep, filePath) ie = loadModel(schema, model) count = [0, 0] probability = [0, 0] for i in range(1000): if i%100 ==0: print(f'当前正在处理第{i}条弹幕') num = random.randint(0, 20000) if schema not in ie(t_list[num])[0]: continue emo = ie(t_list[num])[0][schema][0]['text'] pro = ie(t_list[num])[0][schema][0]['probability'] emoChange(emo, pro, count, probability) createBar(count, probability, savePath) print(count) print(probability) if __name__ == '__main__': main()