298 lines
6.6 KiB
JSON
298 lines
6.6 KiB
JSON
{
|
|
"best-base_weighted_ce-ep5_vs_GPT-5.4": {
|
|
"cat_macro_f1": 0.9360575579144376,
|
|
"cat_weighted_f1": 0.9360564701876355,
|
|
"cat_macro_precision": 0.9336791798534633,
|
|
"cat_macro_recall": 0.9414080218768329,
|
|
"cat_mcc": 0.9248088496355107,
|
|
"cat_auc": 0.991343460842945,
|
|
"cat_ece": 0.0441274690628052,
|
|
"cat_confusion_matrix": [
|
|
[
|
|
220,
|
|
0,
|
|
8,
|
|
0,
|
|
1,
|
|
0,
|
|
1
|
|
],
|
|
[
|
|
0,
|
|
86,
|
|
0,
|
|
0,
|
|
1,
|
|
1,
|
|
0
|
|
],
|
|
[
|
|
1,
|
|
0,
|
|
143,
|
|
1,
|
|
5,
|
|
0,
|
|
0
|
|
],
|
|
[
|
|
0,
|
|
0,
|
|
2,
|
|
133,
|
|
0,
|
|
1,
|
|
0
|
|
],
|
|
[
|
|
6,
|
|
0,
|
|
7,
|
|
18,
|
|
165,
|
|
1,
|
|
1
|
|
],
|
|
[
|
|
0,
|
|
3,
|
|
1,
|
|
8,
|
|
2,
|
|
207,
|
|
0
|
|
],
|
|
[
|
|
0,
|
|
0,
|
|
0,
|
|
1,
|
|
6,
|
|
1,
|
|
169
|
|
]
|
|
],
|
|
"cat_f1_BoardGov": 0.962800875273523,
|
|
"cat_prec_BoardGov": 0.9691629955947136,
|
|
"cat_recall_BoardGov": 0.9565217391304348,
|
|
"cat_f1_Incident": 0.9717514124293786,
|
|
"cat_prec_Incident": 0.9662921348314607,
|
|
"cat_recall_Incident": 0.9772727272727273,
|
|
"cat_f1_Manageme": 0.9196141479099679,
|
|
"cat_prec_Manageme": 0.8881987577639752,
|
|
"cat_recall_Manageme": 0.9533333333333334,
|
|
"cat_f1_NoneOthe": 0.8956228956228957,
|
|
"cat_prec_NoneOthe": 0.8260869565217391,
|
|
"cat_recall_NoneOthe": 0.9779411764705882,
|
|
"cat_f1_RiskMana": 0.873015873015873,
|
|
"cat_prec_RiskMana": 0.9166666666666666,
|
|
"cat_recall_RiskMana": 0.8333333333333334,
|
|
"cat_f1_Strategy": 0.9583333333333334,
|
|
"cat_prec_Strategy": 0.981042654028436,
|
|
"cat_recall_Strategy": 0.9366515837104072,
|
|
"cat_f1_Third-Pa": 0.9712643678160919,
|
|
"cat_prec_Third-Pa": 0.9883040935672515,
|
|
"cat_recall_Third-Pa": 0.9548022598870056,
|
|
"cat_kripp_alpha": 0.9243601922903683,
|
|
"spec_macro_f1": 0.5970357338282843,
|
|
"spec_weighted_f1": 0.7040798408451929,
|
|
"spec_macro_precision": 0.7225196233593912,
|
|
"spec_macro_recall": 0.6139005306639329,
|
|
"spec_mcc": 0.6138700055328291,
|
|
"spec_auc": 0.9498756282617218,
|
|
"spec_ece": 0.1652249880135059,
|
|
"spec_confusion_matrix": [
|
|
[
|
|
596,
|
|
6,
|
|
11,
|
|
5
|
|
],
|
|
[
|
|
105,
|
|
46,
|
|
9,
|
|
8
|
|
],
|
|
[
|
|
14,
|
|
6,
|
|
52,
|
|
135
|
|
],
|
|
[
|
|
4,
|
|
0,
|
|
3,
|
|
200
|
|
]
|
|
],
|
|
"spec_f1_L1Generi": 0.8915482423335827,
|
|
"spec_prec_L1Generi": 0.8289290681502086,
|
|
"spec_recall_L1Generi": 0.9644012944983819,
|
|
"spec_f1_L2Domain": 0.40707964601769914,
|
|
"spec_prec_L2Domain": 0.7931034482758621,
|
|
"spec_recall_L2Domain": 0.27380952380952384,
|
|
"spec_f1_L3Firm-S": 0.36879432624113473,
|
|
"spec_prec_L3Firm-S": 0.6933333333333334,
|
|
"spec_recall_L3Firm-S": 0.25120772946859904,
|
|
"spec_f1_L4Quanti": 0.7207207207207207,
|
|
"spec_prec_L4Quanti": 0.5747126436781609,
|
|
"spec_recall_L4Quanti": 0.966183574879227,
|
|
"spec_qwk": 0.8757404773441285,
|
|
"spec_mae": 0.2975,
|
|
"spec_kripp_alpha": 0.8479072400833478,
|
|
"total_time_s": 6.695346015971154,
|
|
"num_samples": 1200,
|
|
"avg_ms_per_sample": 5.579455013309295,
|
|
"combined_macro_f1": 0.766546645871361
|
|
},
|
|
"best-base_weighted_ce-ep5_vs_Opus-4.6": {
|
|
"cat_macro_f1": 0.9280167387549427,
|
|
"cat_weighted_f1": 0.9273898648954128,
|
|
"cat_macro_precision": 0.9223465490796974,
|
|
"cat_macro_recall": 0.9382296607170699,
|
|
"cat_mcc": 0.9162751746063641,
|
|
"cat_auc": 0.992382433433919,
|
|
"cat_ece": 0.04689237485329312,
|
|
"cat_confusion_matrix": [
|
|
[
|
|
209,
|
|
0,
|
|
2,
|
|
1,
|
|
1,
|
|
0,
|
|
1
|
|
],
|
|
[
|
|
0,
|
|
78,
|
|
0,
|
|
0,
|
|
1,
|
|
0,
|
|
0
|
|
],
|
|
[
|
|
4,
|
|
0,
|
|
147,
|
|
2,
|
|
4,
|
|
0,
|
|
1
|
|
],
|
|
[
|
|
0,
|
|
0,
|
|
1,
|
|
139,
|
|
1,
|
|
0,
|
|
0
|
|
],
|
|
[
|
|
13,
|
|
1,
|
|
10,
|
|
15,
|
|
168,
|
|
1,
|
|
5
|
|
],
|
|
[
|
|
1,
|
|
10,
|
|
1,
|
|
4,
|
|
3,
|
|
209,
|
|
0
|
|
],
|
|
[
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
2,
|
|
1,
|
|
164
|
|
]
|
|
],
|
|
"cat_f1_BoardGov": 0.9478458049886621,
|
|
"cat_prec_BoardGov": 0.920704845814978,
|
|
"cat_recall_BoardGov": 0.9766355140186916,
|
|
"cat_f1_Incident": 0.9285714285714286,
|
|
"cat_prec_Incident": 0.8764044943820225,
|
|
"cat_recall_Incident": 0.9873417721518988,
|
|
"cat_f1_Manageme": 0.9216300940438872,
|
|
"cat_prec_Manageme": 0.9130434782608695,
|
|
"cat_recall_Manageme": 0.930379746835443,
|
|
"cat_f1_NoneOthe": 0.9205298013245033,
|
|
"cat_prec_NoneOthe": 0.8633540372670807,
|
|
"cat_recall_NoneOthe": 0.9858156028368794,
|
|
"cat_f1_RiskMana": 0.8549618320610687,
|
|
"cat_prec_RiskMana": 0.9333333333333333,
|
|
"cat_recall_RiskMana": 0.7887323943661971,
|
|
"cat_f1_Strategy": 0.9521640091116174,
|
|
"cat_prec_Strategy": 0.990521327014218,
|
|
"cat_recall_Strategy": 0.9166666666666666,
|
|
"cat_f1_Third-Pa": 0.9704142011834319,
|
|
"cat_prec_Third-Pa": 0.9590643274853801,
|
|
"cat_recall_Third-Pa": 0.9820359281437125,
|
|
"cat_kripp_alpha": 0.9154955768233572,
|
|
"spec_macro_f1": 0.5957642708821952,
|
|
"spec_weighted_f1": 0.693005282664721,
|
|
"spec_macro_precision": 0.731933400476396,
|
|
"spec_macro_recall": 0.6249872364065566,
|
|
"spec_mcc": 0.6143201053040909,
|
|
"spec_auc": 0.9470735892830423,
|
|
"spec_ece": 0.18189165468017254,
|
|
"spec_confusion_matrix": [
|
|
[
|
|
592,
|
|
4,
|
|
4,
|
|
5
|
|
],
|
|
[
|
|
92,
|
|
42,
|
|
6,
|
|
5
|
|
],
|
|
[
|
|
35,
|
|
12,
|
|
63,
|
|
150
|
|
],
|
|
[
|
|
0,
|
|
0,
|
|
2,
|
|
188
|
|
]
|
|
],
|
|
"spec_f1_L1Generi": 0.8942598187311178,
|
|
"spec_prec_L1Generi": 0.8233657858136301,
|
|
"spec_recall_L1Generi": 0.9785123966942149,
|
|
"spec_f1_L2Domain": 0.41379310344827586,
|
|
"spec_prec_L2Domain": 0.7241379310344828,
|
|
"spec_recall_L2Domain": 0.2896551724137931,
|
|
"spec_f1_L3Firm-S": 0.3761194029850746,
|
|
"spec_prec_L3Firm-S": 0.84,
|
|
"spec_recall_L3Firm-S": 0.2423076923076923,
|
|
"spec_f1_L4Quanti": 0.6988847583643123,
|
|
"spec_prec_L4Quanti": 0.5402298850574713,
|
|
"spec_recall_L4Quanti": 0.9894736842105263,
|
|
"spec_qwk": 0.872110225054491,
|
|
"spec_mae": 0.3075,
|
|
"spec_kripp_alpha": 0.8502616991488389,
|
|
"total_time_s": 6.695346015971154,
|
|
"num_samples": 1200,
|
|
"avg_ms_per_sample": 5.579455013309295,
|
|
"combined_macro_f1": 0.7618905048185689
|
|
}
|
|
} |