{ "GPT-5.4": { "cat_macro_f1": 0.3536909012886116, "cat_weighted_f1": 0.4058815979606338, "cat_macro_precision": 0.6317997184487815, "cat_macro_recall": 0.38979766446605063, "cat_mcc": 0.42471542150657926, "cat_auc": 0.9205800077405307, "cat_ece": 0.09734637491405013, "cat_confusion_matrix": [ [ 143, 0, 0, 0, 85, 0, 2 ], [ 0, 0, 0, 4, 73, 0, 11 ], [ 3, 0, 20, 1, 124, 0, 2 ], [ 1, 0, 1, 5, 122, 0, 7 ], [ 0, 0, 0, 0, 185, 0, 13 ], [ 0, 0, 0, 0, 180, 28, 13 ], [ 0, 0, 0, 0, 22, 0, 155 ] ], "cat_f1_BoardGov": 0.7586206896551724, "cat_prec_BoardGov": 0.9727891156462585, "cat_recall_BoardGov": 0.6217391304347826, "cat_f1_Incident": 0.0, "cat_prec_Incident": 0.0, "cat_recall_Incident": 0.0, "cat_f1_Manageme": 0.23391812865497075, "cat_prec_Manageme": 0.9523809523809523, "cat_recall_Manageme": 0.13333333333333333, "cat_f1_NoneOthe": 0.0684931506849315, "cat_prec_NoneOthe": 0.5, "cat_recall_NoneOthe": 0.03676470588235294, "cat_f1_RiskMana": 0.3741152679474216, "cat_prec_RiskMana": 0.23388116308470291, "cat_recall_RiskMana": 0.9343434343434344, "cat_f1_Strategy": 0.2248995983935743, "cat_prec_Strategy": 1.0, "cat_recall_Strategy": 0.12669683257918551, "cat_f1_Third-Pa": 0.8157894736842105, "cat_prec_Third-Pa": 0.7635467980295566, "cat_recall_Third-Pa": 0.8757062146892656, "cat_kripp_alpha": 0.27180867501339423, "spec_macro_f1": 0.22049451330952025, "spec_weighted_f1": 0.26278390857815354, "spec_macro_precision": 0.4075440073341987, "spec_macro_recall": 0.34148466970860386, "spec_mcc": 0.20939315966102864, "spec_auc": 0.8490039116946011, "spec_ece": 0.43363295723994577, "spec_confusion_matrix": [ [ 132, 483, 3, 0 ], [ 2, 166, 0, 0 ], [ 2, 171, 34, 0 ], [ 0, 175, 32, 0 ] ], "spec_f1_L1Generi": 0.35013262599469497, "spec_prec_L1Generi": 0.9705882352941176, "spec_recall_L1Generi": 0.21359223300970873, "spec_f1_L2Domain": 0.28546861564918313, "spec_prec_L2Domain": 0.16683417085427135, "spec_recall_L2Domain": 0.9880952380952381, "spec_f1_L3Firm-S": 0.2463768115942029, "spec_prec_L3Firm-S": 0.4927536231884058, "spec_recall_L3Firm-S": 0.1642512077294686, "spec_f1_L4Quanti": 0.0, "spec_prec_L4Quanti": 0.0, "spec_recall_L4Quanti": 0.0, "spec_qwk": 0.24233251808742773, "spec_mae": 0.8733333333333333, "spec_kripp_alpha": 0.2761091078775676 }, "Opus-4.6": { "cat_macro_f1": 0.35763512449392704, "cat_weighted_f1": 0.40173099854659305, "cat_macro_precision": 0.6354693148020794, "cat_macro_recall": 0.39500680662311666, "cat_mcc": 0.42166882753874363, "cat_auc": 0.9209441610065957, "cat_ece": 0.09567970824738346, "cat_confusion_matrix": [ [ 141, 0, 0, 0, 71, 0, 2 ], [ 0, 0, 0, 4, 65, 0, 10 ], [ 5, 0, 21, 1, 131, 0, 0 ], [ 1, 0, 0, 5, 128, 0, 7 ], [ 0, 0, 0, 0, 194, 0, 19 ], [ 0, 0, 0, 0, 186, 28, 14 ], [ 0, 0, 0, 0, 16, 0, 151 ] ], "cat_f1_BoardGov": 0.7811634349030471, "cat_prec_BoardGov": 0.9591836734693877, "cat_recall_BoardGov": 0.6588785046728972, "cat_f1_Incident": 0.0, "cat_prec_Incident": 0.0, "cat_recall_Incident": 0.0, "cat_f1_Manageme": 0.2346368715083799, "cat_prec_Manageme": 1.0, "cat_recall_Manageme": 0.13291139240506328, "cat_f1_NoneOthe": 0.06622516556291391, "cat_prec_NoneOthe": 0.5, "cat_recall_NoneOthe": 0.03546099290780142, "cat_f1_RiskMana": 0.38645418326693226, "cat_prec_RiskMana": 0.24525916561314792, "cat_recall_RiskMana": 0.9107981220657277, "cat_f1_Strategy": 0.21875, "cat_prec_Strategy": 1.0, "cat_recall_Strategy": 0.12280701754385964, "cat_f1_Third-Pa": 0.8162162162162162, "cat_prec_Third-Pa": 0.7438423645320197, "cat_recall_Third-Pa": 0.9041916167664671, "cat_kripp_alpha": 0.27338793761748126, "spec_macro_f1": 0.20754679251319788, "spec_weighted_f1": 0.25637242485646744, "spec_macro_precision": 0.40946072005380696, "spec_macro_recall": 0.33929593134138586, "spec_mcc": 0.2041103760829744, "spec_auc": 0.8271022317290393, "spec_ece": 0.4489923599362374, "spec_confusion_matrix": [ [ 130, 473, 2, 0 ], [ 0, 145, 0, 0 ], [ 6, 217, 37, 0 ], [ 0, 160, 30, 0 ] ], "spec_f1_L1Generi": 0.3508771929824561, "spec_prec_L1Generi": 0.9558823529411765, "spec_recall_L1Generi": 0.21487603305785125, "spec_f1_L2Domain": 0.2543859649122807, "spec_prec_L2Domain": 0.1457286432160804, "spec_recall_L2Domain": 1.0, "spec_f1_L3Firm-S": 0.22492401215805471, "spec_prec_L3Firm-S": 0.5362318840579711, "spec_recall_L3Firm-S": 0.1423076923076923, "spec_f1_L4Quanti": 0.0, "spec_prec_L4Quanti": 0.0, "spec_recall_L4Quanti": 0.0, "spec_qwk": 0.24096533359991634, "spec_mae": 0.88, "spec_kripp_alpha": 0.2758412395136435 }, "_runtime": { "encoder_mb": 274.843904, "ms_per_sample": 5.860076693982895, "throughput_per_s": 170.64623079537446, "peak_vram_mb": 1287.34326171875, "build_s": 0.4858604749897495 } }