{ "GPT-5.4": { "cat_macro_f1": 0.9336475878058536, "cat_weighted_f1": 0.9342872402134198, "cat_macro_precision": 0.9319024691959354, "cat_macro_recall": 0.9376938259865566, "cat_mcc": 0.9226968780743573, "cat_auc": 0.9924054453050574, "cat_ece": 0.05363284418980283, "cat_confusion_matrix": [ [ 225, 0, 3, 0, 2, 0, 0 ], [ 0, 85, 0, 0, 2, 1, 0 ], [ 2, 0, 145, 1, 2, 0, 0 ], [ 1, 0, 3, 130, 0, 2, 0 ], [ 6, 1, 5, 19, 164, 1, 2 ], [ 0, 3, 1, 8, 2, 207, 0 ], [ 0, 0, 0, 0, 12, 0, 165 ] ], "cat_f1_BoardGov": 0.9698275862068966, "cat_prec_BoardGov": 0.9615384615384616, "cat_recall_BoardGov": 0.9782608695652174, "cat_f1_Incident": 0.96045197740113, "cat_prec_Incident": 0.9550561797752809, "cat_recall_Incident": 0.9659090909090909, "cat_f1_Manageme": 0.9446254071661238, "cat_prec_Manageme": 0.9235668789808917, "cat_recall_Manageme": 0.9666666666666667, "cat_f1_NoneOthe": 0.8843537414965986, "cat_prec_NoneOthe": 0.8227848101265823, "cat_recall_NoneOthe": 0.9558823529411765, "cat_f1_RiskMana": 0.8586387434554974, "cat_prec_RiskMana": 0.8913043478260869, "cat_recall_RiskMana": 0.8282828282828283, "cat_f1_Strategy": 0.9583333333333334, "cat_prec_Strategy": 0.981042654028436, "cat_recall_Strategy": 0.9366515837104072, "cat_f1_Third-Pa": 0.9593023255813954, "cat_prec_Third-Pa": 0.9880239520958084, "cat_recall_Third-Pa": 0.9322033898305084, "cat_kripp_alpha": 0.9223561935890119, "spec_macro_f1": 0.8918479759675974, "spec_weighted_f1": 0.9097693388297432, "spec_macro_precision": 0.8930494570032042, "spec_macro_recall": 0.8915621000757135, "spec_mcc": 0.8628946887605918, "spec_auc": 0.9807842405238503, "spec_ece": 0.07049367701013878, "spec_confusion_matrix": [ [ 577, 24, 14, 3 ], [ 29, 129, 8, 2 ], [ 9, 5, 191, 2 ], [ 2, 1, 9, 195 ] ], "spec_f1_L1Generi": 0.934412955465587, "spec_prec_L1Generi": 0.9351701782820098, "spec_recall_L1Generi": 0.9336569579288025, "spec_f1_L2Domain": 0.7889908256880734, "spec_prec_L2Domain": 0.8113207547169812, "spec_recall_L2Domain": 0.7678571428571429, "spec_f1_L3Firm-S": 0.8904428904428905, "spec_prec_L3Firm-S": 0.8603603603603603, "spec_recall_L3Firm-S": 0.9227053140096618, "spec_f1_L4Quanti": 0.9535452322738386, "spec_prec_L4Quanti": 0.9653465346534653, "spec_recall_L4Quanti": 0.9420289855072463, "spec_qwk": 0.931514217618119, "spec_mae": 0.12, "spec_kripp_alpha": 0.9169918680049234 }, "Opus-4.6": { "cat_macro_f1": 0.9242573204255528, "cat_weighted_f1": 0.9232556488517519, "cat_macro_precision": 0.9193897229484191, "cat_macro_recall": 0.9331778058838005, "cat_mcc": 0.9112549308356716, "cat_auc": 0.9941614030336741, "cat_ece": 0.06330573419729862, "cat_confusion_matrix": [ [ 212, 0, 1, 0, 1, 0, 0 ], [ 0, 78, 0, 0, 1, 0, 0 ], [ 8, 0, 145, 1, 3, 0, 1 ], [ 0, 0, 1, 138, 1, 1, 0 ], [ 13, 0, 9, 14, 169, 1, 7 ], [ 1, 11, 1, 4, 3, 208, 0 ], [ 0, 0, 0, 1, 6, 1, 159 ] ], "cat_f1_BoardGov": 0.9464285714285714, "cat_prec_BoardGov": 0.905982905982906, "cat_recall_BoardGov": 0.9906542056074766, "cat_f1_Incident": 0.9285714285714286, "cat_prec_Incident": 0.8764044943820225, "cat_recall_Incident": 0.9873417721518988, "cat_f1_Manageme": 0.9206349206349206, "cat_prec_Manageme": 0.9235668789808917, "cat_recall_Manageme": 0.9177215189873418, "cat_f1_NoneOthe": 0.9230769230769231, "cat_prec_NoneOthe": 0.8734177215189873, "cat_recall_NoneOthe": 0.9787234042553191, "cat_f1_RiskMana": 0.8513853904282116, "cat_prec_RiskMana": 0.9184782608695652, "cat_recall_RiskMana": 0.7934272300469484, "cat_f1_Strategy": 0.9476082004555809, "cat_prec_Strategy": 0.985781990521327, "cat_recall_Strategy": 0.9122807017543859, "cat_f1_Third-Pa": 0.9520958083832335, "cat_prec_Third-Pa": 0.9520958083832335, "cat_recall_Third-Pa": 0.9520958083832335, "cat_kripp_alpha": 0.9105393643352402, "spec_macro_f1": 0.8827245859621925, "spec_weighted_f1": 0.8997656600606208, "spec_macro_precision": 0.8833309642003535, "spec_macro_recall": 0.8861518760895928, "spec_mcc": 0.8488976906438819, "spec_auc": 0.9740582923879771, "spec_ece": 0.08290670409798626, "spec_confusion_matrix": [ [ 564, 31, 9, 1 ], [ 22, 118, 3, 2 ], [ 31, 10, 209, 10 ], [ 0, 0, 1, 189 ] ], "spec_f1_L1Generi": 0.9230769230769231, "spec_prec_L1Generi": 0.9141004862236629, "spec_recall_L1Generi": 0.9322314049586777, "spec_f1_L2Domain": 0.7763157894736842, "spec_prec_L2Domain": 0.7421383647798742, "spec_recall_L2Domain": 0.8137931034482758, "spec_f1_L3Firm-S": 0.8672199170124482, "spec_prec_L3Firm-S": 0.9414414414414415, "spec_recall_L3Firm-S": 0.8038461538461539, "spec_f1_L4Quanti": 0.9642857142857143, "spec_prec_L4Quanti": 0.9356435643564357, "spec_recall_L4Quanti": 0.9947368421052631, "spec_qwk": 0.92235918049198, "spec_mae": 0.13666666666666666, "spec_kripp_alpha": 0.9061330450504643 }, "_runtime": { "encoder_mb": 789.563648, "ms_per_sample": 9.671733896636093, "throughput_per_s": 103.39407707937539, "peak_vram_mb": 1774.27392578125, "build_s": 0.4831273259478621 } }