{ "iter1-dapt_vs_GPT-5.4": { "cat_macro_f1": 0.9350000205815902, "cat_weighted_f1": 0.936034565494772, "cat_macro_precision": 0.9344660111343602, "cat_macro_recall": 0.9378555188267356, "cat_mcc": 0.9246263785540332, "cat_auc": 0.9915953686916092, "cat_ece": 0.04942640244960788, "cat_confusion_matrix": [ [ 224, 0, 4, 0, 2, 0, 0 ], [ 0, 83, 0, 0, 2, 2, 1 ], [ 2, 0, 145, 1, 2, 0, 0 ], [ 0, 0, 2, 132, 1, 1, 0 ], [ 6, 1, 5, 18, 166, 1, 1 ], [ 0, 2, 1, 8, 1, 209, 0 ], [ 0, 0, 0, 0, 13, 0, 164 ] ], "cat_f1_BoardGov": 0.9696969696969697, "cat_prec_BoardGov": 0.9655172413793104, "cat_recall_BoardGov": 0.9739130434782609, "cat_f1_Incident": 0.9540229885057471, "cat_prec_Incident": 0.9651162790697675, "cat_recall_Incident": 0.9431818181818182, "cat_f1_Manageme": 0.9446254071661238, "cat_prec_Manageme": 0.9235668789808917, "cat_recall_Manageme": 0.9666666666666667, "cat_f1_NoneOthe": 0.8949152542372881, "cat_prec_NoneOthe": 0.8301886792452831, "cat_recall_NoneOthe": 0.9705882352941176, "cat_f1_RiskMana": 0.8623376623376623, "cat_prec_RiskMana": 0.8877005347593583, "cat_recall_RiskMana": 0.8383838383838383, "cat_f1_Strategy": 0.9631336405529954, "cat_prec_Strategy": 0.9812206572769953, "cat_recall_Strategy": 0.9457013574660633, "cat_f1_Third-Pa": 0.956268221574344, "cat_prec_Third-Pa": 0.9879518072289156, "cat_recall_Third-Pa": 0.9265536723163842, "cat_kripp_alpha": 0.9243058890635424, "spec_macro_f1": 0.8959443847575952, "spec_weighted_f1": 0.914085249793483, "spec_macro_precision": 0.9055333144570721, "spec_macro_recall": 0.889132193611932, "spec_mcc": 0.8698798188273218, "spec_auc": 0.9806421467148638, "spec_ece": 0.0693218584855397, "spec_confusion_matrix": [ [ 588, 14, 13, 3 ], [ 32, 126, 8, 2 ], [ 11, 4, 191, 1 ], [ 2, 2, 10, 193 ] ], "spec_f1_L1Generi": 0.9400479616306955, "spec_prec_L1Generi": 0.9289099526066351, "spec_recall_L1Generi": 0.9514563106796117, "spec_f1_L2Domain": 0.802547770700637, "spec_prec_L2Domain": 0.863013698630137, "spec_recall_L2Domain": 0.75, "spec_f1_L3Firm-S": 0.8904428904428905, "spec_prec_L3Firm-S": 0.8603603603603603, "spec_recall_L3Firm-S": 0.9227053140096618, "spec_f1_L4Quanti": 0.9507389162561576, "spec_prec_L4Quanti": 0.9698492462311558, "spec_recall_L4Quanti": 0.9323671497584541, "spec_qwk": 0.9315994086072762, "spec_mae": 0.11666666666666667, "spec_kripp_alpha": 0.9194074359344485, "total_time_s": 6.855555058107711, "num_samples": 1200, "avg_ms_per_sample": 5.712962548423093, "combined_macro_f1": 0.9154722026695927 }, "iter1-dapt_vs_Opus-4.6": { "cat_macro_f1": 0.9277442873196512, "cat_weighted_f1": 0.9268438855804646, "cat_macro_precision": 0.9237899595225246, "cat_macro_recall": 0.9349393170438051, "cat_mcc": 0.9150420281652446, "cat_auc": 0.9934333602136249, "cat_ece": 0.057411353190739985, "cat_confusion_matrix": [ [ 210, 0, 2, 1, 1, 0, 0 ], [ 0, 77, 0, 0, 1, 0, 1 ], [ 8, 0, 145, 1, 3, 0, 1 ], [ 0, 0, 0, 139, 2, 0, 0 ], [ 13, 0, 9, 13, 172, 1, 5 ], [ 1, 9, 1, 4, 2, 211, 0 ], [ 0, 0, 0, 1, 6, 1, 159 ] ], "cat_f1_BoardGov": 0.9417040358744395, "cat_prec_BoardGov": 0.9051724137931034, "cat_recall_BoardGov": 0.9813084112149533, "cat_f1_Incident": 0.9333333333333333, "cat_prec_Incident": 0.8953488372093024, "cat_recall_Incident": 0.9746835443037974, "cat_f1_Manageme": 0.9206349206349206, "cat_prec_Manageme": 0.9235668789808917, "cat_recall_Manageme": 0.9177215189873418, "cat_f1_NoneOthe": 0.9266666666666666, "cat_prec_NoneOthe": 0.8742138364779874, "cat_recall_NoneOthe": 0.9858156028368794, "cat_f1_RiskMana": 0.86, "cat_prec_RiskMana": 0.9197860962566845, "cat_recall_RiskMana": 0.8075117370892019, "cat_f1_Strategy": 0.9569160997732427, "cat_prec_Strategy": 0.9906103286384976, "cat_recall_Strategy": 0.9254385964912281, "cat_f1_Third-Pa": 0.954954954954955, "cat_prec_Third-Pa": 0.9578313253012049, "cat_recall_Third-Pa": 0.9520958083832335, "cat_kripp_alpha": 0.9144489824694872, "spec_macro_f1": 0.8823881241075249, "spec_weighted_f1": 0.8997013825586678, "spec_macro_precision": 0.8895415282112857, "spec_macro_recall": 0.8784196767594721, "spec_mcc": 0.84923108221758, "spec_auc": 0.9732413764660657, "spec_ece": 0.08008741805950799, "spec_confusion_matrix": [ [ 573, 22, 9, 1 ], [ 26, 114, 3, 2 ], [ 34, 10, 207, 9 ], [ 0, 0, 3, 187 ] ], "spec_f1_L1Generi": 0.925686591276252, "spec_prec_L1Generi": 0.9052132701421801, "spec_recall_L1Generi": 0.947107438016529, "spec_f1_L2Domain": 0.7835051546391752, "spec_prec_L2Domain": 0.7808219178082192, "spec_recall_L2Domain": 0.7862068965517242, "spec_f1_L3Firm-S": 0.8589211618257261, "spec_prec_L3Firm-S": 0.9324324324324325, "spec_recall_L3Firm-S": 0.7961538461538461, "spec_f1_L4Quanti": 0.961439588688946, "spec_prec_L4Quanti": 0.9396984924623115, "spec_recall_L4Quanti": 0.9842105263157894, "spec_qwk": 0.9200429286057613, "spec_mae": 0.13833333333333334, "spec_kripp_alpha": 0.9047987190793844, "total_time_s": 6.855555058107711, "num_samples": 1200, "avg_ms_per_sample": 5.712962548423093, "combined_macro_f1": 0.9050662057135881 } }