2026-04-07 00:51:48 -04:00

298 lines
6.6 KiB
JSON

{
"iter1-clspool_vs_GPT-5.4": {
"cat_macro_f1": 0.9296272782528762,
"cat_weighted_f1": 0.9306824376807155,
"cat_macro_precision": 0.9289887550616817,
"cat_macro_recall": 0.9334375025997984,
"cat_mcc": 0.9179226636085169,
"cat_auc": 0.9911299127522846,
"cat_ece": 0.05557066917419438,
"cat_confusion_matrix": [
[
217,
0,
8,
3,
2,
0,
0
],
[
0,
83,
0,
2,
2,
1,
0
],
[
2,
0,
144,
1,
3,
0,
0
],
[
1,
0,
2,
132,
1,
0,
0
],
[
6,
1,
5,
17,
167,
1,
1
],
[
0,
2,
1,
8,
2,
208,
0
],
[
0,
0,
0,
1,
11,
0,
165
]
],
"cat_f1_BoardGov": 0.9517543859649122,
"cat_prec_BoardGov": 0.9601769911504425,
"cat_recall_BoardGov": 0.9434782608695652,
"cat_f1_Incident": 0.9540229885057471,
"cat_prec_Incident": 0.9651162790697675,
"cat_recall_Incident": 0.9431818181818182,
"cat_f1_Manageme": 0.9290322580645162,
"cat_prec_Manageme": 0.9,
"cat_recall_Manageme": 0.96,
"cat_f1_NoneOthe": 0.88,
"cat_prec_NoneOthe": 0.8048780487804879,
"cat_recall_NoneOthe": 0.9705882352941176,
"cat_f1_RiskMana": 0.8652849740932642,
"cat_prec_RiskMana": 0.8882978723404256,
"cat_recall_RiskMana": 0.8434343434343434,
"cat_f1_Strategy": 0.9651972157772621,
"cat_prec_Strategy": 0.9904761904761905,
"cat_recall_Strategy": 0.9411764705882353,
"cat_f1_Third-Pa": 0.9620991253644315,
"cat_prec_Third-Pa": 0.9939759036144579,
"cat_recall_Third-Pa": 0.9322033898305084,
"cat_kripp_alpha": 0.9174669822467758,
"spec_macro_f1": 0.892010224838834,
"spec_weighted_f1": 0.9098424770121019,
"spec_macro_precision": 0.9042493173083448,
"spec_macro_recall": 0.8836163792237031,
"spec_mcc": 0.8634241541671751,
"spec_auc": 0.9777836963763646,
"spec_ece": 0.07659540871779125,
"spec_confusion_matrix": [
[
587,
11,
17,
3
],
[
32,
125,
9,
2
],
[
14,
4,
187,
2
],
[
3,
1,
9,
194
]
],
"spec_f1_L1Generi": 0.9362041467304625,
"spec_prec_L1Generi": 0.9229559748427673,
"spec_recall_L1Generi": 0.9498381877022654,
"spec_f1_L2Domain": 0.8090614886731392,
"spec_prec_L2Domain": 0.8865248226950354,
"spec_recall_L2Domain": 0.7440476190476191,
"spec_f1_L3Firm-S": 0.8717948717948718,
"spec_prec_L3Firm-S": 0.8423423423423423,
"spec_recall_L3Firm-S": 0.9033816425120773,
"spec_f1_L4Quanti": 0.9509803921568627,
"spec_prec_L4Quanti": 0.9651741293532339,
"spec_recall_L4Quanti": 0.9371980676328503,
"spec_qwk": 0.9224750079938221,
"spec_mae": 0.1275,
"spec_kripp_alpha": 0.9099809044589873,
"total_time_s": 6.83874113188358,
"num_samples": 1200,
"avg_ms_per_sample": 5.698950943236317,
"combined_macro_f1": 0.910818751545855
},
"iter1-clspool_vs_Opus-4.6": {
"cat_macro_f1": 0.9228949790380195,
"cat_weighted_f1": 0.9228190044594041,
"cat_macro_precision": 0.9183239817151002,
"cat_macro_recall": 0.9310538134995027,
"cat_mcc": 0.9101930161599978,
"cat_auc": 0.9924519781241848,
"cat_ece": 0.06223733584086104,
"cat_confusion_matrix": [
[
208,
0,
3,
3,
0,
0,
0
],
[
0,
76,
0,
1,
2,
0,
0
],
[
5,
0,
147,
1,
4,
0,
1
],
[
0,
0,
0,
139,
2,
0,
0
],
[
12,
1,
9,
14,
171,
1,
5
],
[
1,
9,
1,
6,
2,
208,
1
],
[
0,
0,
0,
0,
7,
1,
159
]
],
"cat_f1_BoardGov": 0.9454545454545454,
"cat_prec_BoardGov": 0.9203539823008849,
"cat_recall_BoardGov": 0.9719626168224299,
"cat_f1_Incident": 0.9212121212121213,
"cat_prec_Incident": 0.8837209302325582,
"cat_recall_Incident": 0.9620253164556962,
"cat_f1_Manageme": 0.9245283018867925,
"cat_prec_Manageme": 0.91875,
"cat_recall_Manageme": 0.930379746835443,
"cat_f1_NoneOthe": 0.9114754098360656,
"cat_prec_NoneOthe": 0.8475609756097561,
"cat_recall_NoneOthe": 0.9858156028368794,
"cat_f1_RiskMana": 0.8528678304239401,
"cat_prec_RiskMana": 0.9095744680851063,
"cat_recall_RiskMana": 0.8028169014084507,
"cat_f1_Strategy": 0.9497716894977168,
"cat_prec_Strategy": 0.9904761904761905,
"cat_recall_Strategy": 0.9122807017543859,
"cat_f1_Third-Pa": 0.954954954954955,
"cat_prec_Third-Pa": 0.9578313253012049,
"cat_recall_Third-Pa": 0.9520958083832335,
"cat_kripp_alpha": 0.9095735484151157,
"spec_macro_f1": 0.8804386286358235,
"spec_weighted_f1": 0.8975676999782217,
"spec_macro_precision": 0.8892226854649037,
"spec_macro_recall": 0.8750457181821643,
"spec_mcc": 0.8465565454059848,
"spec_auc": 0.9697722386763277,
"spec_ece": 0.08741456707318629,
"spec_confusion_matrix": [
[
575,
19,
10,
1
],
[
26,
114,
4,
1
],
[
35,
8,
204,
13
],
[
0,
0,
4,
186
]
],
"spec_f1_L1Generi": 0.9266720386784851,
"spec_prec_L1Generi": 0.9040880503144654,
"spec_recall_L1Generi": 0.9504132231404959,
"spec_f1_L2Domain": 0.7972027972027972,
"spec_prec_L2Domain": 0.8085106382978723,
"spec_recall_L2Domain": 0.7862068965517242,
"spec_f1_L3Firm-S": 0.8464730290456431,
"spec_prec_L3Firm-S": 0.918918918918919,
"spec_recall_L3Firm-S": 0.7846153846153846,
"spec_f1_L4Quanti": 0.9514066496163683,
"spec_prec_L4Quanti": 0.9253731343283582,
"spec_recall_L4Quanti": 0.9789473684210527,
"spec_qwk": 0.9187882106031572,
"spec_mae": 0.14083333333333334,
"spec_kripp_alpha": 0.9041056117796359,
"total_time_s": 6.83874113188358,
"num_samples": 1200,
"avg_ms_per_sample": 5.698950943236317,
"combined_macro_f1": 0.9016668038369215
}
}