2026-04-07 00:51:48 -04:00

298 lines
6.6 KiB
JSON

{
"iter1-dapt_vs_GPT-5.4": {
"cat_macro_f1": 0.9350000205815902,
"cat_weighted_f1": 0.936034565494772,
"cat_macro_precision": 0.9344660111343602,
"cat_macro_recall": 0.9378555188267356,
"cat_mcc": 0.9246263785540332,
"cat_auc": 0.9915953686916092,
"cat_ece": 0.04942640244960788,
"cat_confusion_matrix": [
[
224,
0,
4,
0,
2,
0,
0
],
[
0,
83,
0,
0,
2,
2,
1
],
[
2,
0,
145,
1,
2,
0,
0
],
[
0,
0,
2,
132,
1,
1,
0
],
[
6,
1,
5,
18,
166,
1,
1
],
[
0,
2,
1,
8,
1,
209,
0
],
[
0,
0,
0,
0,
13,
0,
164
]
],
"cat_f1_BoardGov": 0.9696969696969697,
"cat_prec_BoardGov": 0.9655172413793104,
"cat_recall_BoardGov": 0.9739130434782609,
"cat_f1_Incident": 0.9540229885057471,
"cat_prec_Incident": 0.9651162790697675,
"cat_recall_Incident": 0.9431818181818182,
"cat_f1_Manageme": 0.9446254071661238,
"cat_prec_Manageme": 0.9235668789808917,
"cat_recall_Manageme": 0.9666666666666667,
"cat_f1_NoneOthe": 0.8949152542372881,
"cat_prec_NoneOthe": 0.8301886792452831,
"cat_recall_NoneOthe": 0.9705882352941176,
"cat_f1_RiskMana": 0.8623376623376623,
"cat_prec_RiskMana": 0.8877005347593583,
"cat_recall_RiskMana": 0.8383838383838383,
"cat_f1_Strategy": 0.9631336405529954,
"cat_prec_Strategy": 0.9812206572769953,
"cat_recall_Strategy": 0.9457013574660633,
"cat_f1_Third-Pa": 0.956268221574344,
"cat_prec_Third-Pa": 0.9879518072289156,
"cat_recall_Third-Pa": 0.9265536723163842,
"cat_kripp_alpha": 0.9243058890635424,
"spec_macro_f1": 0.8959443847575952,
"spec_weighted_f1": 0.914085249793483,
"spec_macro_precision": 0.9055333144570721,
"spec_macro_recall": 0.889132193611932,
"spec_mcc": 0.8698798188273218,
"spec_auc": 0.9806421467148638,
"spec_ece": 0.0693218584855397,
"spec_confusion_matrix": [
[
588,
14,
13,
3
],
[
32,
126,
8,
2
],
[
11,
4,
191,
1
],
[
2,
2,
10,
193
]
],
"spec_f1_L1Generi": 0.9400479616306955,
"spec_prec_L1Generi": 0.9289099526066351,
"spec_recall_L1Generi": 0.9514563106796117,
"spec_f1_L2Domain": 0.802547770700637,
"spec_prec_L2Domain": 0.863013698630137,
"spec_recall_L2Domain": 0.75,
"spec_f1_L3Firm-S": 0.8904428904428905,
"spec_prec_L3Firm-S": 0.8603603603603603,
"spec_recall_L3Firm-S": 0.9227053140096618,
"spec_f1_L4Quanti": 0.9507389162561576,
"spec_prec_L4Quanti": 0.9698492462311558,
"spec_recall_L4Quanti": 0.9323671497584541,
"spec_qwk": 0.9315994086072762,
"spec_mae": 0.11666666666666667,
"spec_kripp_alpha": 0.9194074359344485,
"total_time_s": 6.855555058107711,
"num_samples": 1200,
"avg_ms_per_sample": 5.712962548423093,
"combined_macro_f1": 0.9154722026695927
},
"iter1-dapt_vs_Opus-4.6": {
"cat_macro_f1": 0.9277442873196512,
"cat_weighted_f1": 0.9268438855804646,
"cat_macro_precision": 0.9237899595225246,
"cat_macro_recall": 0.9349393170438051,
"cat_mcc": 0.9150420281652446,
"cat_auc": 0.9934333602136249,
"cat_ece": 0.057411353190739985,
"cat_confusion_matrix": [
[
210,
0,
2,
1,
1,
0,
0
],
[
0,
77,
0,
0,
1,
0,
1
],
[
8,
0,
145,
1,
3,
0,
1
],
[
0,
0,
0,
139,
2,
0,
0
],
[
13,
0,
9,
13,
172,
1,
5
],
[
1,
9,
1,
4,
2,
211,
0
],
[
0,
0,
0,
1,
6,
1,
159
]
],
"cat_f1_BoardGov": 0.9417040358744395,
"cat_prec_BoardGov": 0.9051724137931034,
"cat_recall_BoardGov": 0.9813084112149533,
"cat_f1_Incident": 0.9333333333333333,
"cat_prec_Incident": 0.8953488372093024,
"cat_recall_Incident": 0.9746835443037974,
"cat_f1_Manageme": 0.9206349206349206,
"cat_prec_Manageme": 0.9235668789808917,
"cat_recall_Manageme": 0.9177215189873418,
"cat_f1_NoneOthe": 0.9266666666666666,
"cat_prec_NoneOthe": 0.8742138364779874,
"cat_recall_NoneOthe": 0.9858156028368794,
"cat_f1_RiskMana": 0.86,
"cat_prec_RiskMana": 0.9197860962566845,
"cat_recall_RiskMana": 0.8075117370892019,
"cat_f1_Strategy": 0.9569160997732427,
"cat_prec_Strategy": 0.9906103286384976,
"cat_recall_Strategy": 0.9254385964912281,
"cat_f1_Third-Pa": 0.954954954954955,
"cat_prec_Third-Pa": 0.9578313253012049,
"cat_recall_Third-Pa": 0.9520958083832335,
"cat_kripp_alpha": 0.9144489824694872,
"spec_macro_f1": 0.8823881241075249,
"spec_weighted_f1": 0.8997013825586678,
"spec_macro_precision": 0.8895415282112857,
"spec_macro_recall": 0.8784196767594721,
"spec_mcc": 0.84923108221758,
"spec_auc": 0.9732413764660657,
"spec_ece": 0.08008741805950799,
"spec_confusion_matrix": [
[
573,
22,
9,
1
],
[
26,
114,
3,
2
],
[
34,
10,
207,
9
],
[
0,
0,
3,
187
]
],
"spec_f1_L1Generi": 0.925686591276252,
"spec_prec_L1Generi": 0.9052132701421801,
"spec_recall_L1Generi": 0.947107438016529,
"spec_f1_L2Domain": 0.7835051546391752,
"spec_prec_L2Domain": 0.7808219178082192,
"spec_recall_L2Domain": 0.7862068965517242,
"spec_f1_L3Firm-S": 0.8589211618257261,
"spec_prec_L3Firm-S": 0.9324324324324325,
"spec_recall_L3Firm-S": 0.7961538461538461,
"spec_f1_L4Quanti": 0.961439588688946,
"spec_prec_L4Quanti": 0.9396984924623115,
"spec_recall_L4Quanti": 0.9842105263157894,
"spec_qwk": 0.9200429286057613,
"spec_mae": 0.13833333333333334,
"spec_kripp_alpha": 0.9047987190793844,
"total_time_s": 6.855555058107711,
"num_samples": 1200,
"avg_ms_per_sample": 5.712962548423093,
"combined_macro_f1": 0.9050662057135881
}
}