{ "GPT-5.4": { "cat_macro_f1": 0.16293893512410998, "cat_weighted_f1": 0.1746727986514593, "cat_macro_precision": 0.6289222195093943, "cat_macro_recall": 0.23220413662370398, "cat_mcc": 0.22345796853389935, "cat_auc": 0.8960306312891495, "cat_ece": 0.2080524676044782, "cat_confusion_matrix": [ [ 8, 0, 0, 0, 221, 0, 1 ], [ 0, 0, 0, 1, 82, 0, 5 ], [ 0, 0, 4, 0, 145, 0, 1 ], [ 0, 0, 2, 3, 128, 0, 3 ], [ 0, 0, 0, 0, 195, 0, 3 ], [ 0, 0, 0, 0, 208, 2, 11 ], [ 0, 0, 0, 0, 80, 0, 97 ] ], "cat_f1_BoardGov": 0.06722689075630252, "cat_prec_BoardGov": 1.0, "cat_recall_BoardGov": 0.034782608695652174, "cat_f1_Incident": 0.0, "cat_prec_Incident": 0.0, "cat_recall_Incident": 0.0, "cat_f1_Manageme": 0.05128205128205128, "cat_prec_Manageme": 0.6666666666666666, "cat_recall_Manageme": 0.02666666666666667, "cat_f1_NoneOthe": 0.04285714285714286, "cat_prec_NoneOthe": 0.75, "cat_recall_NoneOthe": 0.022058823529411766, "cat_f1_RiskMana": 0.31026252983293556, "cat_prec_RiskMana": 0.18413597733711048, "cat_recall_RiskMana": 0.9848484848484849, "cat_f1_Strategy": 0.017937219730941704, "cat_prec_Strategy": 1.0, "cat_recall_Strategy": 0.00904977375565611, "cat_f1_Third-Pa": 0.6510067114093959, "cat_prec_Third-Pa": 0.8016528925619835, "cat_recall_Third-Pa": 0.5480225988700564, "cat_kripp_alpha": -0.08693512028952255, "spec_macro_f1": 0.20854117827130608, "spec_weighted_f1": 0.2571301750438355, "spec_macro_precision": 0.3741612607031285, "spec_macro_recall": 0.33018440069147115, "spec_mcc": 0.1895317453505129, "spec_auc": 0.8110497500610155, "spec_ece": 0.44289420386155437, "spec_confusion_matrix": [ [ 136, 473, 9, 0 ], [ 4, 163, 1, 0 ], [ 1, 179, 27, 0 ], [ 2, 171, 34, 0 ] ], "spec_f1_L1Generi": 0.35742444152431013, "spec_prec_L1Generi": 0.951048951048951, "spec_recall_L1Generi": 0.22006472491909385, "spec_f1_L2Domain": 0.2824956672443674, "spec_prec_L2Domain": 0.16531440162271804, "spec_recall_L2Domain": 0.9702380952380952, "spec_f1_L3Firm-S": 0.19424460431654678, "spec_prec_L3Firm-S": 0.38028169014084506, "spec_recall_L3Firm-S": 0.13043478260869565, "spec_f1_L4Quanti": 0.0, "spec_prec_L4Quanti": 0.0, "spec_recall_L4Quanti": 0.0, "spec_qwk": 0.2326064604575444, "spec_mae": 0.8825, "spec_kripp_alpha": 0.26499611744119067 }, "Opus-4.6": { "cat_macro_f1": 0.16861118726256397, "cat_weighted_f1": 0.1792365613004711, "cat_macro_precision": 0.6306758954840335, "cat_macro_recall": 0.2357303291121537, "cat_mcc": 0.2251562222131823, "cat_auc": 0.8995073249291591, "cat_ece": 0.19888580093781152, "cat_confusion_matrix": [ [ 8, 0, 0, 0, 205, 0, 1 ], [ 0, 0, 0, 1, 73, 0, 5 ], [ 0, 0, 4, 0, 154, 0, 0 ], [ 0, 0, 2, 3, 133, 0, 3 ], [ 0, 0, 0, 0, 208, 0, 5 ], [ 0, 0, 0, 0, 216, 2, 10 ], [ 0, 0, 0, 0, 70, 0, 97 ] ], "cat_f1_BoardGov": 0.07207207207207207, "cat_prec_BoardGov": 1.0, "cat_recall_BoardGov": 0.037383177570093455, "cat_f1_Incident": 0.0, "cat_prec_Incident": 0.0, "cat_recall_Incident": 0.0, "cat_f1_Manageme": 0.04878048780487805, "cat_prec_Manageme": 0.6666666666666666, "cat_recall_Manageme": 0.02531645569620253, "cat_f1_NoneOthe": 0.041379310344827586, "cat_prec_NoneOthe": 0.75, "cat_recall_NoneOthe": 0.02127659574468085, "cat_f1_RiskMana": 0.3270440251572327, "cat_prec_RiskMana": 0.1964117091595845, "cat_recall_RiskMana": 0.9765258215962441, "cat_f1_Strategy": 0.017391304347826087, "cat_prec_Strategy": 1.0, "cat_recall_Strategy": 0.008771929824561403, "cat_f1_Third-Pa": 0.6736111111111112, "cat_prec_Third-Pa": 0.8016528925619835, "cat_recall_Third-Pa": 0.5808383233532934, "cat_kripp_alpha": -0.07941064783948448, "spec_macro_f1": 0.19783939283519508, "spec_weighted_f1": 0.24886714543281097, "spec_macro_precision": 0.37592821714182745, "spec_macro_recall": 0.3291807330600434, "spec_mcc": 0.18219176358380398, "spec_auc": 0.790090253498083, "spec_ece": 0.45814307530721027, "spec_confusion_matrix": [ [ 132, 466, 7, 0 ], [ 1, 142, 2, 0 ], [ 8, 221, 31, 0 ], [ 2, 157, 31, 0 ] ], "spec_f1_L1Generi": 0.35294117647058826, "spec_prec_L1Generi": 0.9230769230769231, "spec_recall_L1Generi": 0.21818181818181817, "spec_f1_L2Domain": 0.251105216622458, "spec_prec_L2Domain": 0.1440162271805274, "spec_recall_L2Domain": 0.9793103448275862, "spec_f1_L3Firm-S": 0.18731117824773413, "spec_prec_L3Firm-S": 0.43661971830985913, "spec_recall_L3Firm-S": 0.11923076923076924, "spec_f1_L4Quanti": 0.0, "spec_prec_L4Quanti": 0.0, "spec_recall_L4Quanti": 0.0, "spec_qwk": 0.22580295138888895, "spec_mae": 0.8925, "spec_kripp_alpha": 0.2579634594689497 }, "_runtime": { "encoder_mb": 274.843904, "ms_per_sample": 5.865302347471394, "throughput_per_s": 170.49419463109393, "peak_vram_mb": 1287.34326171875, "build_s": 0.4887635139748454 } }