{ "dictionary_vs_GPT-5.4": { "cat_macro_f1": 0.5562709796995989, "cat_weighted_f1": 0.586654770315343, "cat_macro_precision": 0.5820642365150382, "cat_macro_recall": 0.559253048500957, "cat_mcc": 0.5159948841699565, "cat_auc": 0.7450329775506974, "cat_ece": 0.4141666666666667, "cat_confusion_matrix": [ [ 177, 1, 23, 3, 19, 1, 6 ], [ 1, 41, 2, 8, 16, 10, 10 ], [ 13, 2, 83, 3, 40, 1, 8 ], [ 3, 27, 0, 33, 44, 14, 15 ], [ 15, 12, 11, 7, 94, 0, 59 ], [ 1, 20, 0, 4, 34, 129, 33 ], [ 0, 5, 0, 18, 6, 2, 146 ] ], "cat_f1_BoardGov": 0.8045454545454546, "cat_prec_BoardGov": 0.8428571428571429, "cat_recall_BoardGov": 0.7695652173913043, "cat_f1_Incident": 0.41836734693877553, "cat_prec_Incident": 0.37962962962962965, "cat_recall_Incident": 0.4659090909090909, "cat_f1_Manageme": 0.6171003717472119, "cat_prec_Manageme": 0.6974789915966386, "cat_recall_Manageme": 0.5533333333333333, "cat_f1_NoneOthe": 0.3113207547169811, "cat_prec_NoneOthe": 0.4342105263157895, "cat_recall_NoneOthe": 0.2426470588235294, "cat_f1_RiskMana": 0.41685144124168516, "cat_prec_RiskMana": 0.3715415019762846, "cat_recall_RiskMana": 0.47474747474747475, "cat_f1_Strategy": 0.6825396825396826, "cat_prec_Strategy": 0.821656050955414, "cat_recall_Strategy": 0.583710407239819, "cat_f1_Third-Pa": 0.6431718061674009, "cat_prec_Third-Pa": 0.5270758122743683, "cat_recall_Third-Pa": 0.8248587570621468, "cat_kripp_alpha": 0.509166416578055, "spec_macro_f1": 0.6554577856007078, "spec_weighted_f1": 0.709500413776473, "spec_macro_precision": 0.7204439491998363, "spec_macro_recall": 0.6226176238048335, "spec_mcc": 0.5554600287825188, "spec_auc": 0.7506681772561045, "spec_ece": 0.28, "spec_confusion_matrix": [ [ 554, 27, 4, 33 ], [ 75, 86, 2, 5 ], [ 87, 16, 104, 0 ], [ 48, 25, 14, 120 ] ], "spec_f1_L1Generi": 0.8017366136034733, "spec_prec_L1Generi": 0.725130890052356, "spec_recall_L1Generi": 0.8964401294498382, "spec_f1_L2Domain": 0.5341614906832298, "spec_prec_L2Domain": 0.5584415584415584, "spec_recall_L2Domain": 0.5119047619047619, "spec_f1_L3Firm-S": 0.6283987915407855, "spec_prec_L3Firm-S": 0.8387096774193549, "spec_recall_L3Firm-S": 0.5024154589371981, "spec_f1_L4Quanti": 0.6575342465753424, "spec_prec_L4Quanti": 0.759493670886076, "spec_recall_L4Quanti": 0.5797101449275363, "spec_qwk": 0.5756972488045813, "spec_mae": 0.5158333333333334, "spec_kripp_alpha": 0.559449580800123, "num_samples": 1200, "total_time_s": 0.0, "avg_ms_per_sample": 0.001, "combined_macro_f1": 0.6058643826501533 }, "dictionary_vs_Opus-4.6": { "cat_macro_f1": 0.5404608035704013, "cat_weighted_f1": 0.5680942824830456, "cat_macro_precision": 0.564206294840196, "cat_macro_recall": 0.5502937128850568, "cat_mcc": 0.49808632770596933, "cat_auc": 0.7391875463755565, "cat_ece": 0.43000000000000005, "cat_confusion_matrix": [ [ 162, 1, 22, 3, 21, 1, 4 ], [ 1, 37, 2, 8, 16, 6, 9 ], [ 20, 1, 85, 6, 37, 1, 8 ], [ 3, 32, 0, 29, 46, 14, 17 ], [ 22, 12, 10, 7, 97, 0, 65 ], [ 2, 21, 0, 5, 34, 133, 33 ], [ 0, 4, 0, 18, 2, 2, 141 ] ], "cat_f1_BoardGov": 0.7641509433962265, "cat_prec_BoardGov": 0.7714285714285715, "cat_recall_BoardGov": 0.7570093457943925, "cat_f1_Incident": 0.39572192513368987, "cat_prec_Incident": 0.3425925925925926, "cat_recall_Incident": 0.46835443037974683, "cat_f1_Manageme": 0.6137184115523465, "cat_prec_Manageme": 0.7142857142857143, "cat_recall_Manageme": 0.5379746835443038, "cat_f1_NoneOthe": 0.2672811059907834, "cat_prec_NoneOthe": 0.3815789473684211, "cat_recall_NoneOthe": 0.20567375886524822, "cat_f1_RiskMana": 0.41630901287553645, "cat_prec_RiskMana": 0.383399209486166, "cat_recall_RiskMana": 0.45539906103286387, "cat_f1_Strategy": 0.6909090909090909, "cat_prec_Strategy": 0.8471337579617835, "cat_recall_Strategy": 0.5833333333333334, "cat_f1_Third-Pa": 0.6351351351351351, "cat_prec_Third-Pa": 0.5090252707581228, "cat_recall_Third-Pa": 0.844311377245509, "cat_kripp_alpha": 0.49046948704650417, "spec_macro_f1": 0.6345038647761864, "spec_weighted_f1": 0.6901912617666649, "spec_macro_precision": 0.7050601461353045, "spec_macro_recall": 0.6128856912762208, "spec_mcc": 0.5373481008745777, "spec_auc": 0.7435001662825611, "spec_ece": 0.29666666666666663, "spec_confusion_matrix": [ [ 542, 33, 3, 27 ], [ 66, 73, 1, 5 ], [ 121, 26, 108, 5 ], [ 35, 22, 12, 121 ] ], "spec_f1_L1Generi": 0.7918188458729, "spec_prec_L1Generi": 0.7094240837696335, "spec_recall_L1Generi": 0.8958677685950414, "spec_f1_L2Domain": 0.4882943143812709, "spec_prec_L2Domain": 0.474025974025974, "spec_recall_L2Domain": 0.503448275862069, "spec_f1_L3Firm-S": 0.5625, "spec_prec_L3Firm-S": 0.8709677419354839, "spec_recall_L3Firm-S": 0.4153846153846154, "spec_f1_L4Quanti": 0.6954022988505747, "spec_prec_L4Quanti": 0.7658227848101266, "spec_recall_L4Quanti": 0.6368421052631579, "spec_qwk": 0.5875343721356554, "spec_mae": 0.5258333333333334, "spec_kripp_alpha": 0.562049085880076, "num_samples": 1200, "total_time_s": 0.0, "avg_ms_per_sample": 0.001, "combined_macro_f1": 0.5874823341732938 } }