You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

87 lines
2.8 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

'''
弹幕情感分析
利用自然语言处理模型进行弹幕的情感分析,如果没有对应的模型,则会自动下载模型(450MB)
模型分析速度较慢文本量大可以酌情考虑减少一些弹幕数量当前程序仅选取了1000条弹幕
如果弹幕情感难以分析或是有敏感词,则会跳过当前弹幕
sep是弹幕文本的分隔标志
filePath是弹幕文本的路径
schema是模型数据获取的键值
model是模型文件
savePath是柱形图保存路径
ps.柱形图的可能性是指情感识别为真的平均可能性,不是弹幕情感为正向或负向的可能性
'''
from paddlenlp import Taskflow
import matplotlib.pyplot as plt
# 加载弹幕字符文本
def loadText(sep, filePath):
with open(filePath, 'r', encoding='utf-8') as file:
text = file.read()
t_list = text.split(sep)
return t_list
# 加载自然语言处理模型
def loadModel(schema, model):
ie = Taskflow('information_extraction', schema=schema, model=model)
return ie
# 计算情感方向的数量以及平均的可能性
def emoChange(emo, pro, count, probability):
if emo == '正向':
count[0] += 1
probability[0] = probability[0] + (pro - probability[0])/count[0]
else:
count[1] += 1
probability[1] = probability[1] + (pro - probability[1])/count[1]
# 绘制柱形图
def createBar(count, probability, savePath):
x_data = [f'正向(可能性:{probability[0]})', f'负向(可能性:{probability[1]})']
y_data = count
plt.rcParams["font.sans-serif"] = ["SimHei"]
plt.rcParams["axes.unicode_minus"] = False
plt.figure(figsize=(10, 7))
for i in range(len(x_data)):
plt.bar(x_data[i], y_data[i], width=0.7)
plt.title("弹幕情感方向数量统计")
plt.text(x_data[0], y_data[0]+0.01, count[0], ha="center", va="bottom", fontsize=17)
plt.text(x_data[1], y_data[1]+0.01, count[1], ha="center", va="bottom", fontsize=17)
plt.xlabel("弹幕情感方向")
plt.ylabel("数量")
plt.savefig(fname=savePath, dpi=500)
plt.show()
def main():
sep = ','
filePath = './docs/allBarrage.txt'
schema = '情感倾向[正向,负向]'
model = 'uie-base'
savePath = './docs/emoImg.png'
t_list = loadText(sep, filePath)
ie = loadModel(schema, model)
count = [0, 0]
probability = [0, 0]
for i in range(1000):
if i%100 ==0:
print(f'当前正在处理第{i}条弹幕')
if schema not in ie(t_list[i])[0]:
continue
emo = ie(t_list[i])[0][schema][0]['text']
pro = ie(t_list[i])[0][schema][0]['probability']
emoChange(emo, pro, count, probability)
createBar(count, probability, savePath)
print(count)
print(probability)
if __name__ == '__main__':
main()