弹幕情感分析

main
xxxiix 11 months ago
parent 2943c776c8
commit da84c99f77

@ -0,0 +1,86 @@
'''
弹幕情感分析
利用自然语言处理模型进行弹幕的情感分析如果没有对应的模型则会自动下载模型(450MB)
模型分析速度较慢文本量大可以酌情考虑减少一些弹幕数量当前程序仅选取了1000条弹幕
如果弹幕情感难以分析或是有敏感词则会跳过当前弹幕
sep是弹幕文本的分隔标志
filePath是弹幕文本的路径
schema是模型数据获取的键值
model是模型文件
savePath是柱形图保存路径
ps.柱形图的可能性是指情感识别为真的平均可能性不是弹幕情感为正向或负向的可能性
'''
from paddlenlp import Taskflow
import matplotlib.pyplot as plt
# 加载弹幕字符文本
def loadText(sep, filePath):
with open(filePath, 'r', encoding='utf-8') as file:
text = file.read()
t_list = text.split(sep)
return t_list
# 加载自然语言处理模型
def loadModel(schema, model):
ie = Taskflow('information_extraction', schema=schema, model=model)
return ie
# 计算情感方向的数量以及平均的可能性
def emoChange(emo, pro, count, probability):
if emo == '正向':
count[0] += 1
probability[0] = probability[0] + (pro - probability[0])/count[0]
else:
count[1] += 1
probability[1] = probability[1] + (pro - probability[1])/count[1]
# 绘制柱形图
def createBar(count, probability, savePath):
x_data = [f'正向(可能性:{probability[0]})', f'负向(可能性:{probability[1]})']
y_data = count
plt.rcParams["font.sans-serif"] = ["SimHei"]
plt.rcParams["axes.unicode_minus"] = False
plt.figure(figsize=(10, 7))
for i in range(len(x_data)):
plt.bar(x_data[i], y_data[i], width=0.7)
plt.title("弹幕情感方向数量统计")
plt.text(x_data[0], y_data[0]+0.01, count[0], ha="center", va="bottom", fontsize=17)
plt.text(x_data[1], y_data[1]+0.01, count[1], ha="center", va="bottom", fontsize=17)
plt.xlabel("弹幕情感方向")
plt.ylabel("数量")
plt.savefig(fname=savePath, dpi=500)
plt.show()
def main():
sep = ','
filePath = './docs/allBarrage.txt'
schema = '情感倾向[正向,负向]'
model = 'uie-base'
savePath = './docs/emoImg.png'
t_list = loadText(sep, filePath)
ie = loadModel(schema, model)
count = [0, 0]
probability = [0, 0]
for i in range(1000):
if i%100 ==0:
print(f'当前正在处理第{i}条弹幕')
if schema not in ie(t_list[i])[0]:
continue
emo = ie(t_list[i])[0][schema][0]['text']
pro = ie(t_list[i])[0][schema][0]['probability']
emoChange(emo, pro, count, probability)
createBar(count, probability, savePath)
print(count)
print(probability)
if __name__ == '__main__':
main()

Binary file not shown.

After

Width:  |  Height:  |  Size: 219 KiB

@ -0,0 +1,11 @@
jieba==0.42.1
matplotlib==3.9.2
numpy==2.1.1
openpyxl==3.1.5
paddlepaddle==3.0.0b1
paddlenlp==2.6.1
pandas==2.2.2
Pillow==10.4.0
Requests==2.32.3
scikit_learn==1.5.2
wordcloud==1.9.3
Loading…
Cancel
Save