{ "GPT-5.4": { "cat_macro_f1": 0.9360988760303737, "cat_weighted_f1": 0.9367630863906107, "cat_macro_precision": 0.934342558672944, "cat_macro_recall": 0.9404157843351134, "cat_mcc": 0.9256911778959798, "cat_auc": 0.9918112947607864, "cat_ece": 0.052939765204985965, "cat_confusion_matrix": [ [ 226, 0, 2, 0, 2, 0, 0 ], [ 0, 85, 0, 0, 2, 1, 0 ], [ 2, 0, 145, 1, 2, 0, 0 ], [ 0, 0, 3, 132, 0, 1, 0 ], [ 6, 1, 5, 19, 164, 1, 2 ], [ 0, 3, 1, 8, 2, 207, 0 ], [ 0, 0, 0, 0, 12, 0, 165 ] ], "cat_f1_BoardGov": 0.9741379310344828, "cat_prec_BoardGov": 0.9658119658119658, "cat_recall_BoardGov": 0.9826086956521739, "cat_f1_Incident": 0.96045197740113, "cat_prec_Incident": 0.9550561797752809, "cat_recall_Incident": 0.9659090909090909, "cat_f1_Manageme": 0.9477124183006536, "cat_prec_Manageme": 0.9294871794871795, "cat_recall_Manageme": 0.9666666666666667, "cat_f1_NoneOthe": 0.8918918918918919, "cat_prec_NoneOthe": 0.825, "cat_recall_NoneOthe": 0.9705882352941176, "cat_f1_RiskMana": 0.8586387434554974, "cat_prec_RiskMana": 0.8913043478260869, "cat_recall_RiskMana": 0.8282828282828283, "cat_f1_Strategy": 0.9605568445475638, "cat_prec_Strategy": 0.9857142857142858, "cat_recall_Strategy": 0.9366515837104072, "cat_f1_Third-Pa": 0.9593023255813954, "cat_prec_Third-Pa": 0.9880239520958084, "cat_recall_Third-Pa": 0.9322033898305084, "cat_kripp_alpha": 0.9253092213149172, "spec_macro_f1": 0.8986323186392307, "spec_weighted_f1": 0.9144644120807768, "spec_macro_precision": 0.9034925881673722, "spec_macro_recall": 0.8950728490354916, "spec_mcc": 0.870090391628814, "spec_auc": 0.98134918835569, "spec_ece": 0.06740866973996164, "spec_confusion_matrix": [ [ 582, 19, 14, 3 ], [ 29, 130, 7, 2 ], [ 12, 3, 190, 2 ], [ 2, 1, 8, 196 ] ], "spec_f1_L1Generi": 0.9364440868865648, "spec_prec_L1Generi": 0.9312, "spec_recall_L1Generi": 0.941747572815534, "spec_f1_L2Domain": 0.8099688473520249, "spec_prec_L2Domain": 0.8496732026143791, "spec_recall_L2Domain": 0.7738095238095238, "spec_f1_L3Firm-S": 0.892018779342723, "spec_prec_L3Firm-S": 0.867579908675799, "spec_recall_L3Firm-S": 0.9178743961352657, "spec_f1_L4Quanti": 0.9560975609756097, "spec_prec_L4Quanti": 0.9655172413793104, "spec_recall_L4Quanti": 0.9468599033816425, "spec_qwk": 0.9307948020550015, "spec_mae": 0.1175, "spec_kripp_alpha": 0.9166492249745117 }, "Opus-4.6": { "cat_macro_f1": 0.9235105849558979, "cat_weighted_f1": 0.9224780370334836, "cat_macro_precision": 0.9187130112710481, "cat_macro_recall": 0.9326192612354074, "cat_mcc": 0.9103198007176273, "cat_auc": 0.9937246318315877, "cat_ece": 0.06465620135267579, "cat_confusion_matrix": [ [ 211, 0, 1, 1, 1, 0, 0 ], [ 0, 78, 0, 0, 1, 0, 0 ], [ 9, 0, 144, 1, 3, 0, 1 ], [ 0, 0, 1, 139, 1, 0, 0 ], [ 13, 0, 9, 14, 169, 1, 7 ], [ 1, 11, 1, 4, 3, 208, 0 ], [ 0, 0, 0, 1, 6, 1, 159 ] ], "cat_f1_BoardGov": 0.9419642857142857, "cat_prec_BoardGov": 0.9017094017094017, "cat_recall_BoardGov": 0.985981308411215, "cat_f1_Incident": 0.9285714285714286, "cat_prec_Incident": 0.8764044943820225, "cat_recall_Incident": 0.9873417721518988, "cat_f1_Manageme": 0.9171974522292994, "cat_prec_Manageme": 0.9230769230769231, "cat_recall_Manageme": 0.9113924050632911, "cat_f1_NoneOthe": 0.9235880398671097, "cat_prec_NoneOthe": 0.86875, "cat_recall_NoneOthe": 0.9858156028368794, "cat_f1_RiskMana": 0.8513853904282116, "cat_prec_RiskMana": 0.9184782608695652, "cat_recall_RiskMana": 0.7934272300469484, "cat_f1_Strategy": 0.9497716894977168, "cat_prec_Strategy": 0.9904761904761905, "cat_recall_Strategy": 0.9122807017543859, "cat_f1_Third-Pa": 0.9520958083832335, "cat_prec_Third-Pa": 0.9520958083832335, "cat_recall_Third-Pa": 0.9520958083832335, "cat_kripp_alpha": 0.9095619506866199, "spec_macro_f1": 0.8826923642825633, "spec_weighted_f1": 0.8991699562480843, "spec_macro_precision": 0.8862949086294886, "spec_macro_recall": 0.8831960153359262, "spec_mcc": 0.8485449936701916, "spec_auc": 0.9725823165743999, "spec_ece": 0.083350846717755, "spec_confusion_matrix": [ [ 568, 27, 9, 1 ], [ 23, 117, 3, 2 ], [ 34, 9, 206, 11 ], [ 0, 0, 1, 189 ] ], "spec_f1_L1Generi": 0.9235772357723577, "spec_prec_L1Generi": 0.9088, "spec_recall_L1Generi": 0.9388429752066115, "spec_f1_L2Domain": 0.785234899328859, "spec_prec_L2Domain": 0.7647058823529411, "spec_recall_L2Domain": 0.8068965517241379, "spec_f1_L3Firm-S": 0.860125260960334, "spec_prec_L3Firm-S": 0.9406392694063926, "spec_recall_L3Firm-S": 0.7923076923076923, "spec_f1_L4Quanti": 0.9618320610687023, "spec_prec_L4Quanti": 0.9310344827586207, "spec_recall_L4Quanti": 0.9947368421052631, "spec_qwk": 0.9198415117342273, "spec_mae": 0.13916666666666666, "spec_kripp_alpha": 0.9038906079654127 }, "_runtime": { "encoder_mb": 789.563648, "ms_per_sample": 7.762363941583317, "throughput_per_s": 128.82673468103667, "peak_vram_mb": 2135.203125, "build_s": 1.1878160500200465 } }