298 lines
6.6 KiB
JSON
298 lines
6.6 KiB
JSON
{
|
|
"dictionary_vs_GPT-5.4": {
|
|
"cat_macro_f1": 0.5562709796995989,
|
|
"cat_weighted_f1": 0.586654770315343,
|
|
"cat_macro_precision": 0.5820642365150382,
|
|
"cat_macro_recall": 0.559253048500957,
|
|
"cat_mcc": 0.5159948841699565,
|
|
"cat_auc": 0.7450329775506974,
|
|
"cat_ece": 0.4141666666666667,
|
|
"cat_confusion_matrix": [
|
|
[
|
|
177,
|
|
1,
|
|
23,
|
|
3,
|
|
19,
|
|
1,
|
|
6
|
|
],
|
|
[
|
|
1,
|
|
41,
|
|
2,
|
|
8,
|
|
16,
|
|
10,
|
|
10
|
|
],
|
|
[
|
|
13,
|
|
2,
|
|
83,
|
|
3,
|
|
40,
|
|
1,
|
|
8
|
|
],
|
|
[
|
|
3,
|
|
27,
|
|
0,
|
|
33,
|
|
44,
|
|
14,
|
|
15
|
|
],
|
|
[
|
|
15,
|
|
12,
|
|
11,
|
|
7,
|
|
94,
|
|
0,
|
|
59
|
|
],
|
|
[
|
|
1,
|
|
20,
|
|
0,
|
|
4,
|
|
34,
|
|
129,
|
|
33
|
|
],
|
|
[
|
|
0,
|
|
5,
|
|
0,
|
|
18,
|
|
6,
|
|
2,
|
|
146
|
|
]
|
|
],
|
|
"cat_f1_BoardGov": 0.8045454545454546,
|
|
"cat_prec_BoardGov": 0.8428571428571429,
|
|
"cat_recall_BoardGov": 0.7695652173913043,
|
|
"cat_f1_Incident": 0.41836734693877553,
|
|
"cat_prec_Incident": 0.37962962962962965,
|
|
"cat_recall_Incident": 0.4659090909090909,
|
|
"cat_f1_Manageme": 0.6171003717472119,
|
|
"cat_prec_Manageme": 0.6974789915966386,
|
|
"cat_recall_Manageme": 0.5533333333333333,
|
|
"cat_f1_NoneOthe": 0.3113207547169811,
|
|
"cat_prec_NoneOthe": 0.4342105263157895,
|
|
"cat_recall_NoneOthe": 0.2426470588235294,
|
|
"cat_f1_RiskMana": 0.41685144124168516,
|
|
"cat_prec_RiskMana": 0.3715415019762846,
|
|
"cat_recall_RiskMana": 0.47474747474747475,
|
|
"cat_f1_Strategy": 0.6825396825396826,
|
|
"cat_prec_Strategy": 0.821656050955414,
|
|
"cat_recall_Strategy": 0.583710407239819,
|
|
"cat_f1_Third-Pa": 0.6431718061674009,
|
|
"cat_prec_Third-Pa": 0.5270758122743683,
|
|
"cat_recall_Third-Pa": 0.8248587570621468,
|
|
"cat_kripp_alpha": 0.509166416578055,
|
|
"spec_macro_f1": 0.6554577856007078,
|
|
"spec_weighted_f1": 0.709500413776473,
|
|
"spec_macro_precision": 0.7204439491998363,
|
|
"spec_macro_recall": 0.6226176238048335,
|
|
"spec_mcc": 0.5554600287825188,
|
|
"spec_auc": 0.7506681772561045,
|
|
"spec_ece": 0.28,
|
|
"spec_confusion_matrix": [
|
|
[
|
|
554,
|
|
27,
|
|
4,
|
|
33
|
|
],
|
|
[
|
|
75,
|
|
86,
|
|
2,
|
|
5
|
|
],
|
|
[
|
|
87,
|
|
16,
|
|
104,
|
|
0
|
|
],
|
|
[
|
|
48,
|
|
25,
|
|
14,
|
|
120
|
|
]
|
|
],
|
|
"spec_f1_L1Generi": 0.8017366136034733,
|
|
"spec_prec_L1Generi": 0.725130890052356,
|
|
"spec_recall_L1Generi": 0.8964401294498382,
|
|
"spec_f1_L2Domain": 0.5341614906832298,
|
|
"spec_prec_L2Domain": 0.5584415584415584,
|
|
"spec_recall_L2Domain": 0.5119047619047619,
|
|
"spec_f1_L3Firm-S": 0.6283987915407855,
|
|
"spec_prec_L3Firm-S": 0.8387096774193549,
|
|
"spec_recall_L3Firm-S": 0.5024154589371981,
|
|
"spec_f1_L4Quanti": 0.6575342465753424,
|
|
"spec_prec_L4Quanti": 0.759493670886076,
|
|
"spec_recall_L4Quanti": 0.5797101449275363,
|
|
"spec_qwk": 0.5756972488045813,
|
|
"spec_mae": 0.5158333333333334,
|
|
"spec_kripp_alpha": 0.559449580800123,
|
|
"num_samples": 1200,
|
|
"total_time_s": 0.0,
|
|
"avg_ms_per_sample": 0.001,
|
|
"combined_macro_f1": 0.6058643826501533
|
|
},
|
|
"dictionary_vs_Opus-4.6": {
|
|
"cat_macro_f1": 0.5404608035704013,
|
|
"cat_weighted_f1": 0.5680942824830456,
|
|
"cat_macro_precision": 0.564206294840196,
|
|
"cat_macro_recall": 0.5502937128850568,
|
|
"cat_mcc": 0.49808632770596933,
|
|
"cat_auc": 0.7391875463755565,
|
|
"cat_ece": 0.43000000000000005,
|
|
"cat_confusion_matrix": [
|
|
[
|
|
162,
|
|
1,
|
|
22,
|
|
3,
|
|
21,
|
|
1,
|
|
4
|
|
],
|
|
[
|
|
1,
|
|
37,
|
|
2,
|
|
8,
|
|
16,
|
|
6,
|
|
9
|
|
],
|
|
[
|
|
20,
|
|
1,
|
|
85,
|
|
6,
|
|
37,
|
|
1,
|
|
8
|
|
],
|
|
[
|
|
3,
|
|
32,
|
|
0,
|
|
29,
|
|
46,
|
|
14,
|
|
17
|
|
],
|
|
[
|
|
22,
|
|
12,
|
|
10,
|
|
7,
|
|
97,
|
|
0,
|
|
65
|
|
],
|
|
[
|
|
2,
|
|
21,
|
|
0,
|
|
5,
|
|
34,
|
|
133,
|
|
33
|
|
],
|
|
[
|
|
0,
|
|
4,
|
|
0,
|
|
18,
|
|
2,
|
|
2,
|
|
141
|
|
]
|
|
],
|
|
"cat_f1_BoardGov": 0.7641509433962265,
|
|
"cat_prec_BoardGov": 0.7714285714285715,
|
|
"cat_recall_BoardGov": 0.7570093457943925,
|
|
"cat_f1_Incident": 0.39572192513368987,
|
|
"cat_prec_Incident": 0.3425925925925926,
|
|
"cat_recall_Incident": 0.46835443037974683,
|
|
"cat_f1_Manageme": 0.6137184115523465,
|
|
"cat_prec_Manageme": 0.7142857142857143,
|
|
"cat_recall_Manageme": 0.5379746835443038,
|
|
"cat_f1_NoneOthe": 0.2672811059907834,
|
|
"cat_prec_NoneOthe": 0.3815789473684211,
|
|
"cat_recall_NoneOthe": 0.20567375886524822,
|
|
"cat_f1_RiskMana": 0.41630901287553645,
|
|
"cat_prec_RiskMana": 0.383399209486166,
|
|
"cat_recall_RiskMana": 0.45539906103286387,
|
|
"cat_f1_Strategy": 0.6909090909090909,
|
|
"cat_prec_Strategy": 0.8471337579617835,
|
|
"cat_recall_Strategy": 0.5833333333333334,
|
|
"cat_f1_Third-Pa": 0.6351351351351351,
|
|
"cat_prec_Third-Pa": 0.5090252707581228,
|
|
"cat_recall_Third-Pa": 0.844311377245509,
|
|
"cat_kripp_alpha": 0.49046948704650417,
|
|
"spec_macro_f1": 0.6345038647761864,
|
|
"spec_weighted_f1": 0.6901912617666649,
|
|
"spec_macro_precision": 0.7050601461353045,
|
|
"spec_macro_recall": 0.6128856912762208,
|
|
"spec_mcc": 0.5373481008745777,
|
|
"spec_auc": 0.7435001662825611,
|
|
"spec_ece": 0.29666666666666663,
|
|
"spec_confusion_matrix": [
|
|
[
|
|
542,
|
|
33,
|
|
3,
|
|
27
|
|
],
|
|
[
|
|
66,
|
|
73,
|
|
1,
|
|
5
|
|
],
|
|
[
|
|
121,
|
|
26,
|
|
108,
|
|
5
|
|
],
|
|
[
|
|
35,
|
|
22,
|
|
12,
|
|
121
|
|
]
|
|
],
|
|
"spec_f1_L1Generi": 0.7918188458729,
|
|
"spec_prec_L1Generi": 0.7094240837696335,
|
|
"spec_recall_L1Generi": 0.8958677685950414,
|
|
"spec_f1_L2Domain": 0.4882943143812709,
|
|
"spec_prec_L2Domain": 0.474025974025974,
|
|
"spec_recall_L2Domain": 0.503448275862069,
|
|
"spec_f1_L3Firm-S": 0.5625,
|
|
"spec_prec_L3Firm-S": 0.8709677419354839,
|
|
"spec_recall_L3Firm-S": 0.4153846153846154,
|
|
"spec_f1_L4Quanti": 0.6954022988505747,
|
|
"spec_prec_L4Quanti": 0.7658227848101266,
|
|
"spec_recall_L4Quanti": 0.6368421052631579,
|
|
"spec_qwk": 0.5875343721356554,
|
|
"spec_mae": 0.5258333333333334,
|
|
"spec_kripp_alpha": 0.562049085880076,
|
|
"num_samples": 1200,
|
|
"total_time_s": 0.0,
|
|
"avg_ms_per_sample": 0.001,
|
|
"combined_macro_f1": 0.5874823341732938
|
|
}
|
|
} |