2026-04-05 15:37:50 -04:00

298 lines
6.6 KiB
JSON

{
"best-base_weighted_ce-ep5_vs_GPT-5.4": {
"cat_macro_f1": 0.9360575579144376,
"cat_weighted_f1": 0.9360564701876355,
"cat_macro_precision": 0.9336791798534633,
"cat_macro_recall": 0.9414080218768329,
"cat_mcc": 0.9248088496355107,
"cat_auc": 0.991343460842945,
"cat_ece": 0.0441274690628052,
"cat_confusion_matrix": [
[
220,
0,
8,
0,
1,
0,
1
],
[
0,
86,
0,
0,
1,
1,
0
],
[
1,
0,
143,
1,
5,
0,
0
],
[
0,
0,
2,
133,
0,
1,
0
],
[
6,
0,
7,
18,
165,
1,
1
],
[
0,
3,
1,
8,
2,
207,
0
],
[
0,
0,
0,
1,
6,
1,
169
]
],
"cat_f1_BoardGov": 0.962800875273523,
"cat_prec_BoardGov": 0.9691629955947136,
"cat_recall_BoardGov": 0.9565217391304348,
"cat_f1_Incident": 0.9717514124293786,
"cat_prec_Incident": 0.9662921348314607,
"cat_recall_Incident": 0.9772727272727273,
"cat_f1_Manageme": 0.9196141479099679,
"cat_prec_Manageme": 0.8881987577639752,
"cat_recall_Manageme": 0.9533333333333334,
"cat_f1_NoneOthe": 0.8956228956228957,
"cat_prec_NoneOthe": 0.8260869565217391,
"cat_recall_NoneOthe": 0.9779411764705882,
"cat_f1_RiskMana": 0.873015873015873,
"cat_prec_RiskMana": 0.9166666666666666,
"cat_recall_RiskMana": 0.8333333333333334,
"cat_f1_Strategy": 0.9583333333333334,
"cat_prec_Strategy": 0.981042654028436,
"cat_recall_Strategy": 0.9366515837104072,
"cat_f1_Third-Pa": 0.9712643678160919,
"cat_prec_Third-Pa": 0.9883040935672515,
"cat_recall_Third-Pa": 0.9548022598870056,
"cat_kripp_alpha": 0.9243601922903683,
"spec_macro_f1": 0.5970357338282843,
"spec_weighted_f1": 0.7040798408451929,
"spec_macro_precision": 0.7225196233593912,
"spec_macro_recall": 0.6139005306639329,
"spec_mcc": 0.6138700055328291,
"spec_auc": 0.9498756282617218,
"spec_ece": 0.1652249880135059,
"spec_confusion_matrix": [
[
596,
6,
11,
5
],
[
105,
46,
9,
8
],
[
14,
6,
52,
135
],
[
4,
0,
3,
200
]
],
"spec_f1_L1Generi": 0.8915482423335827,
"spec_prec_L1Generi": 0.8289290681502086,
"spec_recall_L1Generi": 0.9644012944983819,
"spec_f1_L2Domain": 0.40707964601769914,
"spec_prec_L2Domain": 0.7931034482758621,
"spec_recall_L2Domain": 0.27380952380952384,
"spec_f1_L3Firm-S": 0.36879432624113473,
"spec_prec_L3Firm-S": 0.6933333333333334,
"spec_recall_L3Firm-S": 0.25120772946859904,
"spec_f1_L4Quanti": 0.7207207207207207,
"spec_prec_L4Quanti": 0.5747126436781609,
"spec_recall_L4Quanti": 0.966183574879227,
"spec_qwk": 0.8757404773441285,
"spec_mae": 0.2975,
"spec_kripp_alpha": 0.8479072400833478,
"total_time_s": 6.695346015971154,
"num_samples": 1200,
"avg_ms_per_sample": 5.579455013309295,
"combined_macro_f1": 0.766546645871361
},
"best-base_weighted_ce-ep5_vs_Opus-4.6": {
"cat_macro_f1": 0.9280167387549427,
"cat_weighted_f1": 0.9273898648954128,
"cat_macro_precision": 0.9223465490796974,
"cat_macro_recall": 0.9382296607170699,
"cat_mcc": 0.9162751746063641,
"cat_auc": 0.992382433433919,
"cat_ece": 0.04689237485329312,
"cat_confusion_matrix": [
[
209,
0,
2,
1,
1,
0,
1
],
[
0,
78,
0,
0,
1,
0,
0
],
[
4,
0,
147,
2,
4,
0,
1
],
[
0,
0,
1,
139,
1,
0,
0
],
[
13,
1,
10,
15,
168,
1,
5
],
[
1,
10,
1,
4,
3,
209,
0
],
[
0,
0,
0,
0,
2,
1,
164
]
],
"cat_f1_BoardGov": 0.9478458049886621,
"cat_prec_BoardGov": 0.920704845814978,
"cat_recall_BoardGov": 0.9766355140186916,
"cat_f1_Incident": 0.9285714285714286,
"cat_prec_Incident": 0.8764044943820225,
"cat_recall_Incident": 0.9873417721518988,
"cat_f1_Manageme": 0.9216300940438872,
"cat_prec_Manageme": 0.9130434782608695,
"cat_recall_Manageme": 0.930379746835443,
"cat_f1_NoneOthe": 0.9205298013245033,
"cat_prec_NoneOthe": 0.8633540372670807,
"cat_recall_NoneOthe": 0.9858156028368794,
"cat_f1_RiskMana": 0.8549618320610687,
"cat_prec_RiskMana": 0.9333333333333333,
"cat_recall_RiskMana": 0.7887323943661971,
"cat_f1_Strategy": 0.9521640091116174,
"cat_prec_Strategy": 0.990521327014218,
"cat_recall_Strategy": 0.9166666666666666,
"cat_f1_Third-Pa": 0.9704142011834319,
"cat_prec_Third-Pa": 0.9590643274853801,
"cat_recall_Third-Pa": 0.9820359281437125,
"cat_kripp_alpha": 0.9154955768233572,
"spec_macro_f1": 0.5957642708821952,
"spec_weighted_f1": 0.693005282664721,
"spec_macro_precision": 0.731933400476396,
"spec_macro_recall": 0.6249872364065566,
"spec_mcc": 0.6143201053040909,
"spec_auc": 0.9470735892830423,
"spec_ece": 0.18189165468017254,
"spec_confusion_matrix": [
[
592,
4,
4,
5
],
[
92,
42,
6,
5
],
[
35,
12,
63,
150
],
[
0,
0,
2,
188
]
],
"spec_f1_L1Generi": 0.8942598187311178,
"spec_prec_L1Generi": 0.8233657858136301,
"spec_recall_L1Generi": 0.9785123966942149,
"spec_f1_L2Domain": 0.41379310344827586,
"spec_prec_L2Domain": 0.7241379310344828,
"spec_recall_L2Domain": 0.2896551724137931,
"spec_f1_L3Firm-S": 0.3761194029850746,
"spec_prec_L3Firm-S": 0.84,
"spec_recall_L3Firm-S": 0.2423076923076923,
"spec_f1_L4Quanti": 0.6988847583643123,
"spec_prec_L4Quanti": 0.5402298850574713,
"spec_recall_L4Quanti": 0.9894736842105263,
"spec_qwk": 0.872110225054491,
"spec_mae": 0.3075,
"spec_kripp_alpha": 0.8502616991488389,
"total_time_s": 6.695346015971154,
"num_samples": 1200,
"avg_ms_per_sample": 5.579455013309295,
"combined_macro_f1": 0.7618905048185689
}
}