[ { "variant": "fp32", "description": "Float32 encoder + heads", "encoder_mb": 1579.127296, "ms_per_sample": 16.293709366727853, "throughput_per_s": 61.37337898281309, "peak_vram_mb": 3503.53369140625, "build_s": 0.6251941699883901, "GPT-5.4_cat_f1": 0.9336741161693523, "GPT-5.4_spec_f1": 0.8943486525770918, "GPT-5.4_cat_mcc": 0.9226990724708704, "GPT-5.4_spec_qwk": 0.9321211092744079, "GPT-5.4_spec_mae": 0.11833333333333333, "GPT-5.4_cat_ece": 0.05388230005900064, "GPT-5.4_spec_ece": 0.07088303024570146, "GPT-5.4_spec_f1_L1Generi": 0.9346246973365617, "GPT-5.4_spec_f1_L2Domain": 0.7951070336391437, "GPT-5.4_spec_f1_L3Firm-S": 0.8941176470588236, "GPT-5.4_spec_f1_L4Quanti": 0.9535452322738386, "Opus-4.6_cat_f1": 0.922684387023173, "Opus-4.6_spec_f1": 0.8825095464914274, "Opus-4.6_cat_mcc": 0.909266938399113, "Opus-4.6_spec_qwk": 0.9223702541559166, "Opus-4.6_spec_mae": 0.13666666666666666, "Opus-4.6_cat_ece": 0.06541596949100496, "Opus-4.6_spec_ece": 0.08238246644536655, "Opus-4.6_spec_f1_L1Generi": 0.9233278955954323, "Opus-4.6_spec_f1_L2Domain": 0.7763157894736842, "Opus-4.6_spec_f1_L3Firm-S": 0.8661087866108786, "Opus-4.6_spec_f1_L4Quanti": 0.9642857142857143 }, { "variant": "bf16", "description": "BFloat16 baseline (matches eval pipeline)", "encoder_mb": 789.563648, "ms_per_sample": 5.516677870764397, "throughput_per_s": 181.26851402716375, "peak_vram_mb": 1740.83837890625, "build_s": 0.48778308398323134, "GPT-5.4_cat_f1": 0.9336741161693523, "GPT-5.4_spec_f1": 0.8951731906425856, "GPT-5.4_cat_mcc": 0.9226990724708704, "GPT-5.4_spec_qwk": 0.9324447137231142, "GPT-5.4_spec_mae": 0.1175, "GPT-5.4_cat_ece": 0.053848127176364245, "GPT-5.4_spec_ece": 0.07135417198141418, "GPT-5.4_spec_f1_L1Generi": 0.9354838709677419, "GPT-5.4_spec_f1_L2Domain": 0.7975460122699386, "GPT-5.4_spec_f1_L3Firm-S": 0.8941176470588236, "GPT-5.4_spec_f1_L4Quanti": 0.9535452322738386, "Opus-4.6_cat_f1": 0.922684387023173, "Opus-4.6_spec_f1": 0.8833694419146193, "Opus-4.6_cat_mcc": 0.909266938399113, "Opus-4.6_spec_qwk": 0.9227008860372746, "Opus-4.6_spec_mae": 0.13583333333333333, "Opus-4.6_cat_ece": 0.06551479384303091, "Opus-4.6_spec_ece": 0.08248284702499709, "Opus-4.6_spec_f1_L1Generi": 0.9242053789731052, "Opus-4.6_spec_f1_L2Domain": 0.7788778877887789, "Opus-4.6_spec_f1_L3Firm-S": 0.8661087866108786, "Opus-4.6_spec_f1_L4Quanti": 0.9642857142857143 }, { "variant": "fp16", "description": "Float16 encoder + heads", "encoder_mb": 789.563648, "ms_per_sample": 5.539002780715236, "throughput_per_s": 180.53791261517884, "peak_vram_mb": 1740.83837890625, "build_s": 0.46582157304510474, "GPT-5.4_cat_f1": 0.9336741161693523, "GPT-5.4_spec_f1": 0.8951731906425856, "GPT-5.4_cat_mcc": 0.9226990724708704, "GPT-5.4_spec_qwk": 0.9324447137231142, "GPT-5.4_spec_mae": 0.1175, "GPT-5.4_cat_ece": 0.053747650533914546, "GPT-5.4_spec_ece": 0.07004868157207966, "GPT-5.4_spec_f1_L1Generi": 0.9354838709677419, "GPT-5.4_spec_f1_L2Domain": 0.7975460122699386, "GPT-5.4_spec_f1_L3Firm-S": 0.8941176470588236, "GPT-5.4_spec_f1_L4Quanti": 0.9535452322738386, "Opus-4.6_cat_f1": 0.922684387023173, "Opus-4.6_spec_f1": 0.8833694419146193, "Opus-4.6_cat_mcc": 0.909266938399113, "Opus-4.6_spec_qwk": 0.9227008860372746, "Opus-4.6_spec_mae": 0.13583333333333333, "Opus-4.6_cat_ece": 0.06541431720058125, "Opus-4.6_spec_ece": 0.0816012116521597, "Opus-4.6_spec_f1_L1Generi": 0.9242053789731052, "Opus-4.6_spec_f1_L2Domain": 0.7788778877887789, "Opus-4.6_spec_f1_L3Firm-S": 0.8661087866108786, "Opus-4.6_spec_f1_L4Quanti": 0.9642857142857143 }, { "variant": "torchao-int8-wo", "description": "torchao Int8 weight-only on encoder linears", "encoder_mb": 789.563648, "ms_per_sample": 6.078403938445263, "throughput_per_s": 164.5168715549004, "peak_vram_mb": 1416.36376953125, "build_s": 0.5027359619853087, "GPT-5.4_cat_f1": 0.9344870894825886, "GPT-5.4_spec_f1": 0.8941203230194683, "GPT-5.4_cat_mcc": 0.9237006314618685, "GPT-5.4_spec_qwk": 0.9329693660903852, "GPT-5.4_spec_mae": 0.1175, "GPT-5.4_cat_ece": 0.05415941931307314, "GPT-5.4_spec_ece": 0.06980206420024232, "GPT-5.4_spec_f1_L1Generi": 0.9353796445880452, "GPT-5.4_spec_f1_L2Domain": 0.793939393939394, "GPT-5.4_spec_f1_L3Firm-S": 0.8936170212765957, "GPT-5.4_spec_f1_L4Quanti": 0.9535452322738386, "Opus-4.6_cat_f1": 0.9234810481200378, "Opus-4.6_spec_f1": 0.8814731397444973, "Opus-4.6_cat_mcc": 0.9102750101817324, "Opus-4.6_spec_qwk": 0.9207708779443254, "Opus-4.6_spec_mae": 0.13916666666666666, "Opus-4.6_cat_ece": 0.0641141641388337, "Opus-4.6_spec_ece": 0.08370273689428968, "Opus-4.6_spec_f1_L1Generi": 0.9208163265306123, "Opus-4.6_spec_f1_L2Domain": 0.7752442996742671, "Opus-4.6_spec_f1_L3Firm-S": 0.865546218487395, "Opus-4.6_spec_f1_L4Quanti": 0.9642857142857143 }, { "variant": "torchao-int8-dyn", "description": "torchao Int8 dynamic activation + Int8 weight on encoder", "encoder_mb": 789.563648, "ms_per_sample": 9.671733896636093, "throughput_per_s": 103.39407707937539, "peak_vram_mb": 1774.27392578125, "build_s": 0.4831273259478621, "GPT-5.4_cat_f1": 0.9336475878058536, "GPT-5.4_spec_f1": 0.8918479759675974, "GPT-5.4_cat_mcc": 0.9226968780743573, "GPT-5.4_spec_qwk": 0.931514217618119, "GPT-5.4_spec_mae": 0.12, "GPT-5.4_cat_ece": 0.05363284418980283, "GPT-5.4_spec_ece": 0.07049367701013878, "GPT-5.4_spec_f1_L1Generi": 0.934412955465587, "GPT-5.4_spec_f1_L2Domain": 0.7889908256880734, "GPT-5.4_spec_f1_L3Firm-S": 0.8904428904428905, "GPT-5.4_spec_f1_L4Quanti": 0.9535452322738386, "Opus-4.6_cat_f1": 0.9242573204255528, "Opus-4.6_spec_f1": 0.8827245859621925, "Opus-4.6_cat_mcc": 0.9112549308356716, "Opus-4.6_spec_qwk": 0.92235918049198, "Opus-4.6_spec_mae": 0.13666666666666666, "Opus-4.6_cat_ece": 0.06330573419729862, "Opus-4.6_spec_ece": 0.08290670409798626, "Opus-4.6_spec_f1_L1Generi": 0.9230769230769231, "Opus-4.6_spec_f1_L2Domain": 0.7763157894736842, "Opus-4.6_spec_f1_L3Firm-S": 0.8672199170124482, "Opus-4.6_spec_f1_L4Quanti": 0.9642857142857143 }, { "variant": "torchao-int4-wo", "description": "torchao Int4 weight-only (group=128) on encoder", "error": "ImportError: Requires mslk >= 1.0.0" }, { "variant": "bnb-int8", "description": "bitsandbytes LLM.int8 on encoder linears", "encoder_mb": 789.563648, "ms_per_sample": 7.762363941583317, "throughput_per_s": 128.82673468103667, "peak_vram_mb": 2135.203125, "build_s": 1.1878160500200465, "GPT-5.4_cat_f1": 0.9360988760303737, "GPT-5.4_spec_f1": 0.8986323186392307, "GPT-5.4_cat_mcc": 0.9256911778959798, "GPT-5.4_spec_qwk": 0.9307948020550015, "GPT-5.4_spec_mae": 0.1175, "GPT-5.4_cat_ece": 0.052939765204985965, "GPT-5.4_spec_ece": 0.06740866973996164, "GPT-5.4_spec_f1_L1Generi": 0.9364440868865648, "GPT-5.4_spec_f1_L2Domain": 0.8099688473520249, "GPT-5.4_spec_f1_L3Firm-S": 0.892018779342723, "GPT-5.4_spec_f1_L4Quanti": 0.9560975609756097, "Opus-4.6_cat_f1": 0.9235105849558979, "Opus-4.6_spec_f1": 0.8826923642825633, "Opus-4.6_cat_mcc": 0.9103198007176273, "Opus-4.6_spec_qwk": 0.9198415117342273, "Opus-4.6_spec_mae": 0.13916666666666666, "Opus-4.6_cat_ece": 0.06465620135267579, "Opus-4.6_spec_ece": 0.083350846717755, "Opus-4.6_spec_f1_L1Generi": 0.9235772357723577, "Opus-4.6_spec_f1_L2Domain": 0.785234899328859, "Opus-4.6_spec_f1_L3Firm-S": 0.860125260960334, "Opus-4.6_spec_f1_L4Quanti": 0.9618320610687023 }, { "variant": "bnb-nf4", "description": "bitsandbytes NF4 4-bit (double-quant, bf16 compute)", "encoder_mb": 274.843904, "ms_per_sample": 5.860076693982895, "throughput_per_s": 170.64623079537446, "peak_vram_mb": 1287.34326171875, "build_s": 0.4858604749897495, "GPT-5.4_cat_f1": 0.3536909012886116, "GPT-5.4_spec_f1": 0.22049451330952025, "GPT-5.4_cat_mcc": 0.42471542150657926, "GPT-5.4_spec_qwk": 0.24233251808742773, "GPT-5.4_spec_mae": 0.8733333333333333, "GPT-5.4_cat_ece": 0.09734637491405013, "GPT-5.4_spec_ece": 0.43363295723994577, "GPT-5.4_spec_f1_L1Generi": 0.35013262599469497, "GPT-5.4_spec_f1_L2Domain": 0.28546861564918313, "GPT-5.4_spec_f1_L3Firm-S": 0.2463768115942029, "GPT-5.4_spec_f1_L4Quanti": 0.0, "Opus-4.6_cat_f1": 0.35763512449392704, "Opus-4.6_spec_f1": 0.20754679251319788, "Opus-4.6_cat_mcc": 0.42166882753874363, "Opus-4.6_spec_qwk": 0.24096533359991634, "Opus-4.6_spec_mae": 0.88, "Opus-4.6_cat_ece": 0.09567970824738346, "Opus-4.6_spec_ece": 0.4489923599362374, "Opus-4.6_spec_f1_L1Generi": 0.3508771929824561, "Opus-4.6_spec_f1_L2Domain": 0.2543859649122807, "Opus-4.6_spec_f1_L3Firm-S": 0.22492401215805471, "Opus-4.6_spec_f1_L4Quanti": 0.0 }, { "variant": "bnb-nf4-nodq", "description": "bitsandbytes NF4 4-bit (no double-quant)", "encoder_mb": 274.843904, "ms_per_sample": 5.861402786540566, "throughput_per_s": 170.607623536175, "peak_vram_mb": 1287.34326171875, "build_s": 0.4908116469741799, "GPT-5.4_cat_f1": 0.3536909012886116, "GPT-5.4_spec_f1": 0.22049451330952025, "GPT-5.4_cat_mcc": 0.42471542150657926, "GPT-5.4_spec_qwk": 0.24233251808742773, "GPT-5.4_spec_mae": 0.8733333333333333, "GPT-5.4_cat_ece": 0.09734637491405013, "GPT-5.4_spec_ece": 0.43363295723994577, "GPT-5.4_spec_f1_L1Generi": 0.35013262599469497, "GPT-5.4_spec_f1_L2Domain": 0.28546861564918313, "GPT-5.4_spec_f1_L3Firm-S": 0.2463768115942029, "GPT-5.4_spec_f1_L4Quanti": 0.0, "Opus-4.6_cat_f1": 0.35763512449392704, "Opus-4.6_spec_f1": 0.20754679251319788, "Opus-4.6_cat_mcc": 0.42166882753874363, "Opus-4.6_spec_qwk": 0.24096533359991634, "Opus-4.6_spec_mae": 0.88, "Opus-4.6_cat_ece": 0.09567970824738346, "Opus-4.6_spec_ece": 0.4489923599362374, "Opus-4.6_spec_f1_L1Generi": 0.3508771929824561, "Opus-4.6_spec_f1_L2Domain": 0.2543859649122807, "Opus-4.6_spec_f1_L3Firm-S": 0.22492401215805471, "Opus-4.6_spec_f1_L4Quanti": 0.0 }, { "variant": "bnb-fp4", "description": "bitsandbytes FP4 4-bit (no double-quant)", "encoder_mb": 274.843904, "ms_per_sample": 5.865302347471394, "throughput_per_s": 170.49419463109393, "peak_vram_mb": 1287.34326171875, "build_s": 0.4887635139748454, "GPT-5.4_cat_f1": 0.16293893512410998, "GPT-5.4_spec_f1": 0.20854117827130608, "GPT-5.4_cat_mcc": 0.22345796853389935, "GPT-5.4_spec_qwk": 0.2326064604575444, "GPT-5.4_spec_mae": 0.8825, "GPT-5.4_cat_ece": 0.2080524676044782, "GPT-5.4_spec_ece": 0.44289420386155437, "GPT-5.4_spec_f1_L1Generi": 0.35742444152431013, "GPT-5.4_spec_f1_L2Domain": 0.2824956672443674, "GPT-5.4_spec_f1_L3Firm-S": 0.19424460431654678, "GPT-5.4_spec_f1_L4Quanti": 0.0, "Opus-4.6_cat_f1": 0.16861118726256397, "Opus-4.6_spec_f1": 0.19783939283519508, "Opus-4.6_cat_mcc": 0.2251562222131823, "Opus-4.6_spec_qwk": 0.22580295138888895, "Opus-4.6_spec_mae": 0.8925, "Opus-4.6_cat_ece": 0.19888580093781152, "Opus-4.6_spec_ece": 0.45814307530721027, "Opus-4.6_spec_f1_L1Generi": 0.35294117647058826, "Opus-4.6_spec_f1_L2Domain": 0.251105216622458, "Opus-4.6_spec_f1_L3Firm-S": 0.18731117824773413, "Opus-4.6_spec_f1_L4Quanti": 0.0 } ]