{ "iter1-independent_vs_GPT-5.4": { "cat_macro_f1": 0.9336741161693523, "cat_weighted_f1": 0.9343162998643407, "cat_macro_precision": 0.93189297179766, "cat_macro_recall": 0.9377918652022429, "cat_mcc": 0.9226990724708704, "cat_auc": 0.991991833154947, "cat_ece": 0.053848127176364245, "cat_confusion_matrix": [ [ 225, 0, 3, 0, 2, 0, 0 ], [ 0, 85, 0, 0, 2, 1, 0 ], [ 2, 0, 144, 1, 3, 0, 0 ], [ 0, 0, 3, 131, 0, 2, 0 ], [ 6, 1, 5, 19, 164, 1, 2 ], [ 0, 3, 1, 8, 2, 207, 0 ], [ 0, 0, 0, 0, 12, 0, 165 ] ], "cat_f1_BoardGov": 0.9719222462203023, "cat_prec_BoardGov": 0.9656652360515021, "cat_recall_BoardGov": 0.9782608695652174, "cat_f1_Incident": 0.96045197740113, "cat_prec_Incident": 0.9550561797752809, "cat_recall_Incident": 0.9659090909090909, "cat_f1_Manageme": 0.9411764705882353, "cat_prec_Manageme": 0.9230769230769231, "cat_recall_Manageme": 0.96, "cat_f1_NoneOthe": 0.888135593220339, "cat_prec_NoneOthe": 0.8238993710691824, "cat_recall_NoneOthe": 0.9632352941176471, "cat_f1_RiskMana": 0.856396866840731, "cat_prec_RiskMana": 0.8864864864864865, "cat_recall_RiskMana": 0.8282828282828283, "cat_f1_Strategy": 0.9583333333333334, "cat_prec_Strategy": 0.981042654028436, "cat_recall_Strategy": 0.9366515837104072, "cat_f1_Third-Pa": 0.9593023255813954, "cat_prec_Third-Pa": 0.9880239520958084, "cat_recall_Third-Pa": 0.9322033898305084, "cat_kripp_alpha": 0.9223591517560865, "spec_macro_f1": 0.8951731906425856, "spec_weighted_f1": 0.9121524819510628, "spec_macro_precision": 0.8980417155129858, "spec_macro_recall": 0.8930560580782194, "spec_mcc": 0.866381831963237, "spec_auc": 0.981666223606385, "spec_ece": 0.07135417198141418, "spec_confusion_matrix": [ [ 580, 23, 12, 3 ], [ 29, 130, 7, 2 ], [ 11, 4, 190, 2 ], [ 2, 1, 9, 195 ] ], "spec_f1_L1Generi": 0.9354838709677419, "spec_prec_L1Generi": 0.932475884244373, "spec_recall_L1Generi": 0.9385113268608414, "spec_f1_L2Domain": 0.7975460122699386, "spec_prec_L2Domain": 0.8227848101265823, "spec_recall_L2Domain": 0.7738095238095238, "spec_f1_L3Firm-S": 0.8941176470588236, "spec_prec_L3Firm-S": 0.8715596330275229, "spec_recall_L3Firm-S": 0.9178743961352657, "spec_f1_L4Quanti": 0.9535452322738386, "spec_prec_L4Quanti": 0.9653465346534653, "spec_recall_L4Quanti": 0.9420289855072463, "spec_qwk": 0.9324447137231142, "spec_mae": 0.1175, "spec_kripp_alpha": 0.917725722448833, "total_time_s": 6.732117835083045, "num_samples": 1200, "avg_ms_per_sample": 5.6100981959025376, "combined_macro_f1": 0.9144236534059689 }, "iter1-independent_vs_Opus-4.6": { "cat_macro_f1": 0.922684387023173, "cat_weighted_f1": 0.9216414809666168, "cat_macro_precision": 0.9177680939029339, "cat_macro_recall": 0.9316060900094703, "cat_mcc": 0.909266938399113, "cat_auc": 0.9939660707189948, "cat_ece": 0.06551479384303091, "cat_confusion_matrix": [ [ 211, 0, 1, 1, 1, 0, 0 ], [ 0, 78, 0, 0, 1, 0, 0 ], [ 8, 0, 144, 1, 4, 0, 1 ], [ 0, 0, 1, 138, 1, 1, 0 ], [ 13, 0, 9, 14, 169, 1, 7 ], [ 1, 11, 1, 4, 3, 208, 0 ], [ 0, 0, 0, 1, 6, 1, 159 ] ], "cat_f1_BoardGov": 0.9440715883668904, "cat_prec_BoardGov": 0.9055793991416309, "cat_recall_BoardGov": 0.985981308411215, "cat_f1_Incident": 0.9285714285714286, "cat_prec_Incident": 0.8764044943820225, "cat_recall_Incident": 0.9873417721518988, "cat_f1_Manageme": 0.9171974522292994, "cat_prec_Manageme": 0.9230769230769231, "cat_recall_Manageme": 0.9113924050632911, "cat_f1_NoneOthe": 0.92, "cat_prec_NoneOthe": 0.8679245283018868, "cat_recall_NoneOthe": 0.9787234042553191, "cat_f1_RiskMana": 0.8492462311557789, "cat_prec_RiskMana": 0.9135135135135135, "cat_recall_RiskMana": 0.7934272300469484, "cat_f1_Strategy": 0.9476082004555809, "cat_prec_Strategy": 0.985781990521327, "cat_recall_Strategy": 0.9122807017543859, "cat_f1_Third-Pa": 0.9520958083832335, "cat_prec_Third-Pa": 0.9520958083832335, "cat_recall_Third-Pa": 0.9520958083832335, "cat_kripp_alpha": 0.908575631724203, "spec_macro_f1": 0.8833694419146193, "spec_weighted_f1": 0.9004034318676798, "spec_macro_precision": 0.8858989636247611, "spec_macro_recall": 0.8854684685880032, "spec_mcc": 0.8500778641433316, "spec_auc": 0.9736633898988131, "spec_ece": 0.08248284702499709, "spec_confusion_matrix": [ [ 567, 30, 7, 1 ], [ 22, 118, 3, 2 ], [ 33, 10, 207, 10 ], [ 0, 0, 1, 189 ] ], "spec_f1_L1Generi": 0.9242053789731052, "spec_prec_L1Generi": 0.9115755627009646, "spec_recall_L1Generi": 0.9371900826446281, "spec_f1_L2Domain": 0.7788778877887789, "spec_prec_L2Domain": 0.7468354430379747, "spec_recall_L2Domain": 0.8137931034482758, "spec_f1_L3Firm-S": 0.8661087866108786, "spec_prec_L3Firm-S": 0.9495412844036697, "spec_recall_L3Firm-S": 0.7961538461538461, "spec_f1_L4Quanti": 0.9642857142857143, "spec_prec_L4Quanti": 0.9356435643564357, "spec_recall_L4Quanti": 0.9947368421052631, "spec_qwk": 0.9227008860372746, "spec_mae": 0.13583333333333333, "spec_kripp_alpha": 0.9065248741550552, "total_time_s": 6.732117835083045, "num_samples": 1200, "avg_ms_per_sample": 5.6100981959025376, "combined_macro_f1": 0.9030269144688962 } }