{ "GPT-5.4": { "cat_macro_f1": 0.9344870894825886, "cat_weighted_f1": 0.9351173265780133, "cat_macro_precision": 0.9326512314038842, "cat_macro_recall": 0.9387442461546238, "cat_mcc": 0.9237006314618685, "cat_auc": 0.992309699625497, "cat_ece": 0.05415941931307314, "cat_confusion_matrix": [ [ 225, 0, 3, 0, 2, 0, 0 ], [ 0, 85, 0, 0, 2, 1, 0 ], [ 2, 0, 145, 1, 2, 0, 0 ], [ 0, 0, 3, 131, 0, 2, 0 ], [ 6, 1, 5, 19, 164, 1, 2 ], [ 0, 3, 1, 8, 2, 207, 0 ], [ 0, 0, 0, 0, 12, 0, 165 ] ], "cat_f1_BoardGov": 0.9719222462203023, "cat_prec_BoardGov": 0.9656652360515021, "cat_recall_BoardGov": 0.9782608695652174, "cat_f1_Incident": 0.96045197740113, "cat_prec_Incident": 0.9550561797752809, "cat_recall_Incident": 0.9659090909090909, "cat_f1_Manageme": 0.9446254071661238, "cat_prec_Manageme": 0.9235668789808917, "cat_recall_Manageme": 0.9666666666666667, "cat_f1_NoneOthe": 0.888135593220339, "cat_prec_NoneOthe": 0.8238993710691824, "cat_recall_NoneOthe": 0.9632352941176471, "cat_f1_RiskMana": 0.8586387434554974, "cat_prec_RiskMana": 0.8913043478260869, "cat_recall_RiskMana": 0.8282828282828283, "cat_f1_Strategy": 0.9583333333333334, "cat_prec_Strategy": 0.981042654028436, "cat_recall_Strategy": 0.9366515837104072, "cat_f1_Third-Pa": 0.9593023255813954, "cat_prec_Third-Pa": 0.9880239520958084, "cat_recall_Third-Pa": 0.9322033898305084, "cat_kripp_alpha": 0.9233443339647499, "spec_macro_f1": 0.8941203230194683, "spec_weighted_f1": 0.9115075208518084, "spec_macro_precision": 0.8957148694260108, "spec_macro_recall": 0.892931893103379, "spec_mcc": 0.8651929532300995, "spec_auc": 0.981624069084201, "spec_ece": 0.06980206420024232, "spec_confusion_matrix": [ [ 579, 24, 12, 3 ], [ 29, 131, 6, 2 ], [ 10, 6, 189, 2 ], [ 2, 1, 9, 195 ] ], "spec_f1_L1Generi": 0.9353796445880452, "spec_prec_L1Generi": 0.9338709677419355, "spec_recall_L1Generi": 0.9368932038834952, "spec_f1_L2Domain": 0.793939393939394, "spec_prec_L2Domain": 0.808641975308642, "spec_recall_L2Domain": 0.7797619047619048, "spec_f1_L3Firm-S": 0.8936170212765957, "spec_prec_L3Firm-S": 0.875, "spec_recall_L3Firm-S": 0.9130434782608695, "spec_f1_L4Quanti": 0.9535452322738386, "spec_prec_L4Quanti": 0.9653465346534653, "spec_recall_L4Quanti": 0.9420289855072463, "spec_qwk": 0.9329693660903852, "spec_mae": 0.1175, "spec_kripp_alpha": 0.9181842655510584 }, "Opus-4.6": { "cat_macro_f1": 0.9234810481200378, "cat_weighted_f1": 0.9224737817442137, "cat_macro_precision": 0.9185473372257941, "cat_macro_recall": 0.9325102491414775, "cat_mcc": 0.9102750101817324, "cat_auc": 0.9940184741579791, "cat_ece": 0.0641141641388337, "cat_confusion_matrix": [ [ 211, 0, 1, 1, 1, 0, 0 ], [ 0, 78, 0, 0, 1, 0, 0 ], [ 8, 0, 145, 1, 3, 0, 1 ], [ 0, 0, 1, 138, 1, 1, 0 ], [ 13, 0, 9, 14, 169, 1, 7 ], [ 1, 11, 1, 4, 3, 208, 0 ], [ 0, 0, 0, 1, 6, 1, 159 ] ], "cat_f1_BoardGov": 0.9440715883668904, "cat_prec_BoardGov": 0.9055793991416309, "cat_recall_BoardGov": 0.985981308411215, "cat_f1_Incident": 0.9285714285714286, "cat_prec_Incident": 0.8764044943820225, "cat_recall_Incident": 0.9873417721518988, "cat_f1_Manageme": 0.9206349206349206, "cat_prec_Manageme": 0.9235668789808917, "cat_recall_Manageme": 0.9177215189873418, "cat_f1_NoneOthe": 0.92, "cat_prec_NoneOthe": 0.8679245283018868, "cat_recall_NoneOthe": 0.9787234042553191, "cat_f1_RiskMana": 0.8513853904282116, "cat_prec_RiskMana": 0.9184782608695652, "cat_recall_RiskMana": 0.7934272300469484, "cat_f1_Strategy": 0.9476082004555809, "cat_prec_Strategy": 0.985781990521327, "cat_recall_Strategy": 0.9122807017543859, "cat_f1_Third-Pa": 0.9520958083832335, "cat_prec_Third-Pa": 0.9520958083832335, "cat_recall_Third-Pa": 0.9520958083832335, "cat_kripp_alpha": 0.9095617653952504, "spec_macro_f1": 0.8814731397444973, "spec_weighted_f1": 0.8981338362706646, "spec_macro_precision": 0.8833981471623865, "spec_macro_recall": 0.8849913986360116, "spec_mcc": 0.8465512998506631, "spec_auc": 0.9729999946345258, "spec_ece": 0.08370273689428968, "spec_confusion_matrix": [ [ 564, 33, 7, 1 ], [ 22, 119, 2, 2 ], [ 34, 10, 206, 10 ], [ 0, 0, 1, 189 ] ], "spec_f1_L1Generi": 0.9208163265306123, "spec_prec_L1Generi": 0.9096774193548387, "spec_recall_L1Generi": 0.9322314049586777, "spec_f1_L2Domain": 0.7752442996742671, "spec_prec_L2Domain": 0.7345679012345679, "spec_recall_L2Domain": 0.8206896551724138, "spec_f1_L3Firm-S": 0.865546218487395, "spec_prec_L3Firm-S": 0.9537037037037037, "spec_recall_L3Firm-S": 0.7923076923076923, "spec_f1_L4Quanti": 0.9642857142857143, "spec_prec_L4Quanti": 0.9356435643564357, "spec_recall_L4Quanti": 0.9947368421052631, "spec_qwk": 0.9207708779443254, "spec_mae": 0.13916666666666666, "spec_kripp_alpha": 0.9033268512180281 }, "_runtime": { "encoder_mb": 789.563648, "ms_per_sample": 6.078403938445263, "throughput_per_s": 164.5168715549004, "peak_vram_mb": 1416.36376953125, "build_s": 0.5027359619853087 } }