298 lines
6.6 KiB
JSON
298 lines
6.6 KiB
JSON
{
|
|
"iter1-clspool_vs_GPT-5.4": {
|
|
"cat_macro_f1": 0.9296272782528762,
|
|
"cat_weighted_f1": 0.9306824376807155,
|
|
"cat_macro_precision": 0.9289887550616817,
|
|
"cat_macro_recall": 0.9334375025997984,
|
|
"cat_mcc": 0.9179226636085169,
|
|
"cat_auc": 0.9911299127522846,
|
|
"cat_ece": 0.05557066917419438,
|
|
"cat_confusion_matrix": [
|
|
[
|
|
217,
|
|
0,
|
|
8,
|
|
3,
|
|
2,
|
|
0,
|
|
0
|
|
],
|
|
[
|
|
0,
|
|
83,
|
|
0,
|
|
2,
|
|
2,
|
|
1,
|
|
0
|
|
],
|
|
[
|
|
2,
|
|
0,
|
|
144,
|
|
1,
|
|
3,
|
|
0,
|
|
0
|
|
],
|
|
[
|
|
1,
|
|
0,
|
|
2,
|
|
132,
|
|
1,
|
|
0,
|
|
0
|
|
],
|
|
[
|
|
6,
|
|
1,
|
|
5,
|
|
17,
|
|
167,
|
|
1,
|
|
1
|
|
],
|
|
[
|
|
0,
|
|
2,
|
|
1,
|
|
8,
|
|
2,
|
|
208,
|
|
0
|
|
],
|
|
[
|
|
0,
|
|
0,
|
|
0,
|
|
1,
|
|
11,
|
|
0,
|
|
165
|
|
]
|
|
],
|
|
"cat_f1_BoardGov": 0.9517543859649122,
|
|
"cat_prec_BoardGov": 0.9601769911504425,
|
|
"cat_recall_BoardGov": 0.9434782608695652,
|
|
"cat_f1_Incident": 0.9540229885057471,
|
|
"cat_prec_Incident": 0.9651162790697675,
|
|
"cat_recall_Incident": 0.9431818181818182,
|
|
"cat_f1_Manageme": 0.9290322580645162,
|
|
"cat_prec_Manageme": 0.9,
|
|
"cat_recall_Manageme": 0.96,
|
|
"cat_f1_NoneOthe": 0.88,
|
|
"cat_prec_NoneOthe": 0.8048780487804879,
|
|
"cat_recall_NoneOthe": 0.9705882352941176,
|
|
"cat_f1_RiskMana": 0.8652849740932642,
|
|
"cat_prec_RiskMana": 0.8882978723404256,
|
|
"cat_recall_RiskMana": 0.8434343434343434,
|
|
"cat_f1_Strategy": 0.9651972157772621,
|
|
"cat_prec_Strategy": 0.9904761904761905,
|
|
"cat_recall_Strategy": 0.9411764705882353,
|
|
"cat_f1_Third-Pa": 0.9620991253644315,
|
|
"cat_prec_Third-Pa": 0.9939759036144579,
|
|
"cat_recall_Third-Pa": 0.9322033898305084,
|
|
"cat_kripp_alpha": 0.9174669822467758,
|
|
"spec_macro_f1": 0.892010224838834,
|
|
"spec_weighted_f1": 0.9098424770121019,
|
|
"spec_macro_precision": 0.9042493173083448,
|
|
"spec_macro_recall": 0.8836163792237031,
|
|
"spec_mcc": 0.8634241541671751,
|
|
"spec_auc": 0.9777836963763646,
|
|
"spec_ece": 0.07659540871779125,
|
|
"spec_confusion_matrix": [
|
|
[
|
|
587,
|
|
11,
|
|
17,
|
|
3
|
|
],
|
|
[
|
|
32,
|
|
125,
|
|
9,
|
|
2
|
|
],
|
|
[
|
|
14,
|
|
4,
|
|
187,
|
|
2
|
|
],
|
|
[
|
|
3,
|
|
1,
|
|
9,
|
|
194
|
|
]
|
|
],
|
|
"spec_f1_L1Generi": 0.9362041467304625,
|
|
"spec_prec_L1Generi": 0.9229559748427673,
|
|
"spec_recall_L1Generi": 0.9498381877022654,
|
|
"spec_f1_L2Domain": 0.8090614886731392,
|
|
"spec_prec_L2Domain": 0.8865248226950354,
|
|
"spec_recall_L2Domain": 0.7440476190476191,
|
|
"spec_f1_L3Firm-S": 0.8717948717948718,
|
|
"spec_prec_L3Firm-S": 0.8423423423423423,
|
|
"spec_recall_L3Firm-S": 0.9033816425120773,
|
|
"spec_f1_L4Quanti": 0.9509803921568627,
|
|
"spec_prec_L4Quanti": 0.9651741293532339,
|
|
"spec_recall_L4Quanti": 0.9371980676328503,
|
|
"spec_qwk": 0.9224750079938221,
|
|
"spec_mae": 0.1275,
|
|
"spec_kripp_alpha": 0.9099809044589873,
|
|
"total_time_s": 6.83874113188358,
|
|
"num_samples": 1200,
|
|
"avg_ms_per_sample": 5.698950943236317,
|
|
"combined_macro_f1": 0.910818751545855
|
|
},
|
|
"iter1-clspool_vs_Opus-4.6": {
|
|
"cat_macro_f1": 0.9228949790380195,
|
|
"cat_weighted_f1": 0.9228190044594041,
|
|
"cat_macro_precision": 0.9183239817151002,
|
|
"cat_macro_recall": 0.9310538134995027,
|
|
"cat_mcc": 0.9101930161599978,
|
|
"cat_auc": 0.9924519781241848,
|
|
"cat_ece": 0.06223733584086104,
|
|
"cat_confusion_matrix": [
|
|
[
|
|
208,
|
|
0,
|
|
3,
|
|
3,
|
|
0,
|
|
0,
|
|
0
|
|
],
|
|
[
|
|
0,
|
|
76,
|
|
0,
|
|
1,
|
|
2,
|
|
0,
|
|
0
|
|
],
|
|
[
|
|
5,
|
|
0,
|
|
147,
|
|
1,
|
|
4,
|
|
0,
|
|
1
|
|
],
|
|
[
|
|
0,
|
|
0,
|
|
0,
|
|
139,
|
|
2,
|
|
0,
|
|
0
|
|
],
|
|
[
|
|
12,
|
|
1,
|
|
9,
|
|
14,
|
|
171,
|
|
1,
|
|
5
|
|
],
|
|
[
|
|
1,
|
|
9,
|
|
1,
|
|
6,
|
|
2,
|
|
208,
|
|
1
|
|
],
|
|
[
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
7,
|
|
1,
|
|
159
|
|
]
|
|
],
|
|
"cat_f1_BoardGov": 0.9454545454545454,
|
|
"cat_prec_BoardGov": 0.9203539823008849,
|
|
"cat_recall_BoardGov": 0.9719626168224299,
|
|
"cat_f1_Incident": 0.9212121212121213,
|
|
"cat_prec_Incident": 0.8837209302325582,
|
|
"cat_recall_Incident": 0.9620253164556962,
|
|
"cat_f1_Manageme": 0.9245283018867925,
|
|
"cat_prec_Manageme": 0.91875,
|
|
"cat_recall_Manageme": 0.930379746835443,
|
|
"cat_f1_NoneOthe": 0.9114754098360656,
|
|
"cat_prec_NoneOthe": 0.8475609756097561,
|
|
"cat_recall_NoneOthe": 0.9858156028368794,
|
|
"cat_f1_RiskMana": 0.8528678304239401,
|
|
"cat_prec_RiskMana": 0.9095744680851063,
|
|
"cat_recall_RiskMana": 0.8028169014084507,
|
|
"cat_f1_Strategy": 0.9497716894977168,
|
|
"cat_prec_Strategy": 0.9904761904761905,
|
|
"cat_recall_Strategy": 0.9122807017543859,
|
|
"cat_f1_Third-Pa": 0.954954954954955,
|
|
"cat_prec_Third-Pa": 0.9578313253012049,
|
|
"cat_recall_Third-Pa": 0.9520958083832335,
|
|
"cat_kripp_alpha": 0.9095735484151157,
|
|
"spec_macro_f1": 0.8804386286358235,
|
|
"spec_weighted_f1": 0.8975676999782217,
|
|
"spec_macro_precision": 0.8892226854649037,
|
|
"spec_macro_recall": 0.8750457181821643,
|
|
"spec_mcc": 0.8465565454059848,
|
|
"spec_auc": 0.9697722386763277,
|
|
"spec_ece": 0.08741456707318629,
|
|
"spec_confusion_matrix": [
|
|
[
|
|
575,
|
|
19,
|
|
10,
|
|
1
|
|
],
|
|
[
|
|
26,
|
|
114,
|
|
4,
|
|
1
|
|
],
|
|
[
|
|
35,
|
|
8,
|
|
204,
|
|
13
|
|
],
|
|
[
|
|
0,
|
|
0,
|
|
4,
|
|
186
|
|
]
|
|
],
|
|
"spec_f1_L1Generi": 0.9266720386784851,
|
|
"spec_prec_L1Generi": 0.9040880503144654,
|
|
"spec_recall_L1Generi": 0.9504132231404959,
|
|
"spec_f1_L2Domain": 0.7972027972027972,
|
|
"spec_prec_L2Domain": 0.8085106382978723,
|
|
"spec_recall_L2Domain": 0.7862068965517242,
|
|
"spec_f1_L3Firm-S": 0.8464730290456431,
|
|
"spec_prec_L3Firm-S": 0.918918918918919,
|
|
"spec_recall_L3Firm-S": 0.7846153846153846,
|
|
"spec_f1_L4Quanti": 0.9514066496163683,
|
|
"spec_prec_L4Quanti": 0.9253731343283582,
|
|
"spec_recall_L4Quanti": 0.9789473684210527,
|
|
"spec_qwk": 0.9187882106031572,
|
|
"spec_mae": 0.14083333333333334,
|
|
"spec_kripp_alpha": 0.9041056117796359,
|
|
"total_time_s": 6.83874113188358,
|
|
"num_samples": 1200,
|
|
"avg_ms_per_sample": 5.698950943236317,
|
|
"combined_macro_f1": 0.9016668038369215
|
|
}
|
|
} |