{ "iter1-clspool_vs_GPT-5.4": { "cat_macro_f1": 0.9296272782528762, "cat_weighted_f1": 0.9306824376807155, "cat_macro_precision": 0.9289887550616817, "cat_macro_recall": 0.9334375025997984, "cat_mcc": 0.9179226636085169, "cat_auc": 0.9911299127522846, "cat_ece": 0.05557066917419438, "cat_confusion_matrix": [ [ 217, 0, 8, 3, 2, 0, 0 ], [ 0, 83, 0, 2, 2, 1, 0 ], [ 2, 0, 144, 1, 3, 0, 0 ], [ 1, 0, 2, 132, 1, 0, 0 ], [ 6, 1, 5, 17, 167, 1, 1 ], [ 0, 2, 1, 8, 2, 208, 0 ], [ 0, 0, 0, 1, 11, 0, 165 ] ], "cat_f1_BoardGov": 0.9517543859649122, "cat_prec_BoardGov": 0.9601769911504425, "cat_recall_BoardGov": 0.9434782608695652, "cat_f1_Incident": 0.9540229885057471, "cat_prec_Incident": 0.9651162790697675, "cat_recall_Incident": 0.9431818181818182, "cat_f1_Manageme": 0.9290322580645162, "cat_prec_Manageme": 0.9, "cat_recall_Manageme": 0.96, "cat_f1_NoneOthe": 0.88, "cat_prec_NoneOthe": 0.8048780487804879, "cat_recall_NoneOthe": 0.9705882352941176, "cat_f1_RiskMana": 0.8652849740932642, "cat_prec_RiskMana": 0.8882978723404256, "cat_recall_RiskMana": 0.8434343434343434, "cat_f1_Strategy": 0.9651972157772621, "cat_prec_Strategy": 0.9904761904761905, "cat_recall_Strategy": 0.9411764705882353, "cat_f1_Third-Pa": 0.9620991253644315, "cat_prec_Third-Pa": 0.9939759036144579, "cat_recall_Third-Pa": 0.9322033898305084, "cat_kripp_alpha": 0.9174669822467758, "spec_macro_f1": 0.892010224838834, "spec_weighted_f1": 0.9098424770121019, "spec_macro_precision": 0.9042493173083448, "spec_macro_recall": 0.8836163792237031, "spec_mcc": 0.8634241541671751, "spec_auc": 0.9777836963763646, "spec_ece": 0.07659540871779125, "spec_confusion_matrix": [ [ 587, 11, 17, 3 ], [ 32, 125, 9, 2 ], [ 14, 4, 187, 2 ], [ 3, 1, 9, 194 ] ], "spec_f1_L1Generi": 0.9362041467304625, "spec_prec_L1Generi": 0.9229559748427673, "spec_recall_L1Generi": 0.9498381877022654, "spec_f1_L2Domain": 0.8090614886731392, "spec_prec_L2Domain": 0.8865248226950354, "spec_recall_L2Domain": 0.7440476190476191, "spec_f1_L3Firm-S": 0.8717948717948718, "spec_prec_L3Firm-S": 0.8423423423423423, "spec_recall_L3Firm-S": 0.9033816425120773, "spec_f1_L4Quanti": 0.9509803921568627, "spec_prec_L4Quanti": 0.9651741293532339, "spec_recall_L4Quanti": 0.9371980676328503, "spec_qwk": 0.9224750079938221, "spec_mae": 0.1275, "spec_kripp_alpha": 0.9099809044589873, "total_time_s": 6.83874113188358, "num_samples": 1200, "avg_ms_per_sample": 5.698950943236317, "combined_macro_f1": 0.910818751545855 }, "iter1-clspool_vs_Opus-4.6": { "cat_macro_f1": 0.9228949790380195, "cat_weighted_f1": 0.9228190044594041, "cat_macro_precision": 0.9183239817151002, "cat_macro_recall": 0.9310538134995027, "cat_mcc": 0.9101930161599978, "cat_auc": 0.9924519781241848, "cat_ece": 0.06223733584086104, "cat_confusion_matrix": [ [ 208, 0, 3, 3, 0, 0, 0 ], [ 0, 76, 0, 1, 2, 0, 0 ], [ 5, 0, 147, 1, 4, 0, 1 ], [ 0, 0, 0, 139, 2, 0, 0 ], [ 12, 1, 9, 14, 171, 1, 5 ], [ 1, 9, 1, 6, 2, 208, 1 ], [ 0, 0, 0, 0, 7, 1, 159 ] ], "cat_f1_BoardGov": 0.9454545454545454, "cat_prec_BoardGov": 0.9203539823008849, "cat_recall_BoardGov": 0.9719626168224299, "cat_f1_Incident": 0.9212121212121213, "cat_prec_Incident": 0.8837209302325582, "cat_recall_Incident": 0.9620253164556962, "cat_f1_Manageme": 0.9245283018867925, "cat_prec_Manageme": 0.91875, "cat_recall_Manageme": 0.930379746835443, "cat_f1_NoneOthe": 0.9114754098360656, "cat_prec_NoneOthe": 0.8475609756097561, "cat_recall_NoneOthe": 0.9858156028368794, "cat_f1_RiskMana": 0.8528678304239401, "cat_prec_RiskMana": 0.9095744680851063, "cat_recall_RiskMana": 0.8028169014084507, "cat_f1_Strategy": 0.9497716894977168, "cat_prec_Strategy": 0.9904761904761905, "cat_recall_Strategy": 0.9122807017543859, "cat_f1_Third-Pa": 0.954954954954955, "cat_prec_Third-Pa": 0.9578313253012049, "cat_recall_Third-Pa": 0.9520958083832335, "cat_kripp_alpha": 0.9095735484151157, "spec_macro_f1": 0.8804386286358235, "spec_weighted_f1": 0.8975676999782217, "spec_macro_precision": 0.8892226854649037, "spec_macro_recall": 0.8750457181821643, "spec_mcc": 0.8465565454059848, "spec_auc": 0.9697722386763277, "spec_ece": 0.08741456707318629, "spec_confusion_matrix": [ [ 575, 19, 10, 1 ], [ 26, 114, 4, 1 ], [ 35, 8, 204, 13 ], [ 0, 0, 4, 186 ] ], "spec_f1_L1Generi": 0.9266720386784851, "spec_prec_L1Generi": 0.9040880503144654, "spec_recall_L1Generi": 0.9504132231404959, "spec_f1_L2Domain": 0.7972027972027972, "spec_prec_L2Domain": 0.8085106382978723, "spec_recall_L2Domain": 0.7862068965517242, "spec_f1_L3Firm-S": 0.8464730290456431, "spec_prec_L3Firm-S": 0.918918918918919, "spec_recall_L3Firm-S": 0.7846153846153846, "spec_f1_L4Quanti": 0.9514066496163683, "spec_prec_L4Quanti": 0.9253731343283582, "spec_recall_L4Quanti": 0.9789473684210527, "spec_qwk": 0.9187882106031572, "spec_mae": 0.14083333333333334, "spec_kripp_alpha": 0.9041056117796359, "total_time_s": 6.83874113188358, "num_samples": 1200, "avg_ms_per_sample": 5.698950943236317, "combined_macro_f1": 0.9016668038369215 } }