298 lines
6.6 KiB
JSON

{
"dictionary_vs_GPT-5.4": {
"cat_macro_f1": 0.5562709796995989,
"cat_weighted_f1": 0.586654770315343,
"cat_macro_precision": 0.5820642365150382,
"cat_macro_recall": 0.559253048500957,
"cat_mcc": 0.5159948841699565,
"cat_auc": 0.7450329775506974,
"cat_ece": 0.4141666666666667,
"cat_confusion_matrix": [
[
177,
1,
23,
3,
19,
1,
6
],
[
1,
41,
2,
8,
16,
10,
10
],
[
13,
2,
83,
3,
40,
1,
8
],
[
3,
27,
0,
33,
44,
14,
15
],
[
15,
12,
11,
7,
94,
0,
59
],
[
1,
20,
0,
4,
34,
129,
33
],
[
0,
5,
0,
18,
6,
2,
146
]
],
"cat_f1_BoardGov": 0.8045454545454546,
"cat_prec_BoardGov": 0.8428571428571429,
"cat_recall_BoardGov": 0.7695652173913043,
"cat_f1_Incident": 0.41836734693877553,
"cat_prec_Incident": 0.37962962962962965,
"cat_recall_Incident": 0.4659090909090909,
"cat_f1_Manageme": 0.6171003717472119,
"cat_prec_Manageme": 0.6974789915966386,
"cat_recall_Manageme": 0.5533333333333333,
"cat_f1_NoneOthe": 0.3113207547169811,
"cat_prec_NoneOthe": 0.4342105263157895,
"cat_recall_NoneOthe": 0.2426470588235294,
"cat_f1_RiskMana": 0.41685144124168516,
"cat_prec_RiskMana": 0.3715415019762846,
"cat_recall_RiskMana": 0.47474747474747475,
"cat_f1_Strategy": 0.6825396825396826,
"cat_prec_Strategy": 0.821656050955414,
"cat_recall_Strategy": 0.583710407239819,
"cat_f1_Third-Pa": 0.6431718061674009,
"cat_prec_Third-Pa": 0.5270758122743683,
"cat_recall_Third-Pa": 0.8248587570621468,
"cat_kripp_alpha": 0.509166416578055,
"spec_macro_f1": 0.6554577856007078,
"spec_weighted_f1": 0.709500413776473,
"spec_macro_precision": 0.7204439491998363,
"spec_macro_recall": 0.6226176238048335,
"spec_mcc": 0.5554600287825188,
"spec_auc": 0.7506681772561045,
"spec_ece": 0.28,
"spec_confusion_matrix": [
[
554,
27,
4,
33
],
[
75,
86,
2,
5
],
[
87,
16,
104,
0
],
[
48,
25,
14,
120
]
],
"spec_f1_L1Generi": 0.8017366136034733,
"spec_prec_L1Generi": 0.725130890052356,
"spec_recall_L1Generi": 0.8964401294498382,
"spec_f1_L2Domain": 0.5341614906832298,
"spec_prec_L2Domain": 0.5584415584415584,
"spec_recall_L2Domain": 0.5119047619047619,
"spec_f1_L3Firm-S": 0.6283987915407855,
"spec_prec_L3Firm-S": 0.8387096774193549,
"spec_recall_L3Firm-S": 0.5024154589371981,
"spec_f1_L4Quanti": 0.6575342465753424,
"spec_prec_L4Quanti": 0.759493670886076,
"spec_recall_L4Quanti": 0.5797101449275363,
"spec_qwk": 0.5756972488045813,
"spec_mae": 0.5158333333333334,
"spec_kripp_alpha": 0.559449580800123,
"num_samples": 1200,
"total_time_s": 0.0,
"avg_ms_per_sample": 0.001,
"combined_macro_f1": 0.6058643826501533
},
"dictionary_vs_Opus-4.6": {
"cat_macro_f1": 0.5404608035704013,
"cat_weighted_f1": 0.5680942824830456,
"cat_macro_precision": 0.564206294840196,
"cat_macro_recall": 0.5502937128850568,
"cat_mcc": 0.49808632770596933,
"cat_auc": 0.7391875463755565,
"cat_ece": 0.43000000000000005,
"cat_confusion_matrix": [
[
162,
1,
22,
3,
21,
1,
4
],
[
1,
37,
2,
8,
16,
6,
9
],
[
20,
1,
85,
6,
37,
1,
8
],
[
3,
32,
0,
29,
46,
14,
17
],
[
22,
12,
10,
7,
97,
0,
65
],
[
2,
21,
0,
5,
34,
133,
33
],
[
0,
4,
0,
18,
2,
2,
141
]
],
"cat_f1_BoardGov": 0.7641509433962265,
"cat_prec_BoardGov": 0.7714285714285715,
"cat_recall_BoardGov": 0.7570093457943925,
"cat_f1_Incident": 0.39572192513368987,
"cat_prec_Incident": 0.3425925925925926,
"cat_recall_Incident": 0.46835443037974683,
"cat_f1_Manageme": 0.6137184115523465,
"cat_prec_Manageme": 0.7142857142857143,
"cat_recall_Manageme": 0.5379746835443038,
"cat_f1_NoneOthe": 0.2672811059907834,
"cat_prec_NoneOthe": 0.3815789473684211,
"cat_recall_NoneOthe": 0.20567375886524822,
"cat_f1_RiskMana": 0.41630901287553645,
"cat_prec_RiskMana": 0.383399209486166,
"cat_recall_RiskMana": 0.45539906103286387,
"cat_f1_Strategy": 0.6909090909090909,
"cat_prec_Strategy": 0.8471337579617835,
"cat_recall_Strategy": 0.5833333333333334,
"cat_f1_Third-Pa": 0.6351351351351351,
"cat_prec_Third-Pa": 0.5090252707581228,
"cat_recall_Third-Pa": 0.844311377245509,
"cat_kripp_alpha": 0.49046948704650417,
"spec_macro_f1": 0.6345038647761864,
"spec_weighted_f1": 0.6901912617666649,
"spec_macro_precision": 0.7050601461353045,
"spec_macro_recall": 0.6128856912762208,
"spec_mcc": 0.5373481008745777,
"spec_auc": 0.7435001662825611,
"spec_ece": 0.29666666666666663,
"spec_confusion_matrix": [
[
542,
33,
3,
27
],
[
66,
73,
1,
5
],
[
121,
26,
108,
5
],
[
35,
22,
12,
121
]
],
"spec_f1_L1Generi": 0.7918188458729,
"spec_prec_L1Generi": 0.7094240837696335,
"spec_recall_L1Generi": 0.8958677685950414,
"spec_f1_L2Domain": 0.4882943143812709,
"spec_prec_L2Domain": 0.474025974025974,
"spec_recall_L2Domain": 0.503448275862069,
"spec_f1_L3Firm-S": 0.5625,
"spec_prec_L3Firm-S": 0.8709677419354839,
"spec_recall_L3Firm-S": 0.4153846153846154,
"spec_f1_L4Quanti": 0.6954022988505747,
"spec_prec_L4Quanti": 0.7658227848101266,
"spec_recall_L4Quanti": 0.6368421052631579,
"spec_qwk": 0.5875343721356554,
"spec_mae": 0.5258333333333334,
"spec_kripp_alpha": 0.562049085880076,
"num_samples": 1200,
"total_time_s": 0.0,
"avg_ms_per_sample": 0.001,
"combined_macro_f1": 0.5874823341732938
}
}