You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
matching_dependency/draw/draw_confidence_histogram.py

57 lines
2.9 KiB

import os
import pandas as pd
from pyecharts import options as opts
from pyecharts.charts import Bar
from pyecharts.faker import Faker
from pyecharts.globals import ThemeType
if __name__ == '__main__':
outcome_dir = r'E:\Data\Research\Outcome'
inter_list = ['0', '0.5', '0.7', '0.9', '1']
configs_dir = r'\Magellan+Smac+roberta-large-nli-stsb-mean-tokens+inter-'
datasets_list = os.listdir(outcome_dir)
for _ in datasets_list:
for inter in inter_list:
path = outcome_dir + rf'\{_}' + configs_dir + inter
statistics_files = os.listdir(path)
length = 0
for file in statistics_files:
if file.startswith('predictions'):
preds = pd.read_csv(path + rf'\{file}', encoding='ISO-8859-1')
preds = preds[['predicted', 'confidence']]
preds = preds.astype(float)
preds = preds[preds['predicted'] == 1.0]
length = len(preds)
li = []
zeros = len(preds[preds['confidence'] == 0])
dot_02 = len(preds[(preds['confidence'] > 0) & (preds['confidence'] <= 0.2)])
dot_24 = len(preds[(preds['confidence'] > 0.2) & (preds['confidence'] <= 0.4)])
dot_46 = len(preds[(preds['confidence'] > 0.4) & (preds['confidence'] <= 0.6)])
dot_68 = len(preds[(preds['confidence'] > 0.6) & (preds['confidence'] <= 0.8)])
dot_80 = len(preds[(preds['confidence'] > 0.8) & (preds['confidence'] <= 1.0)])
for number in [zeros, dot_02, dot_24, dot_46, dot_68, dot_80]:
li.append(round(number * 100 / length, ndigits=3))
c = (
Bar(init_opts=opts.InitOpts(theme=ThemeType.WALDEN))
.add_xaxis(['conf=0', '0<conf≤0.2', '0.2<conf≤0.4', '0.4<conf≤0.6', '0.6<conf≤0.8', '0.8<conf≤1'])
.add_yaxis(_, li, category_gap=2)
.set_global_opts(
yaxis_opts=opts.AxisOpts(
name="Proportion",
type_="value",
min_=0,
max_=100,
position="left",
axisline_opts=opts.AxisLineOpts(
linestyle_opts=opts.LineStyleOpts()
),
axislabel_opts=opts.LabelOpts(formatter="{value}%"),
),
title_opts=opts.TitleOpts(title="Confidence Histogram"),
xaxis_opts=opts.AxisOpts(name="Intervals")
)
.render(path + r"\confidence_histogram.html")
)