298 lines
6.6 KiB
JSON
298 lines
6.6 KiB
JSON
{
|
|
"ensemble-3seed_vs_GPT-5.4": {
|
|
"cat_macro_f1": 0.9382530391727061,
|
|
"cat_weighted_f1": 0.9385858996685268,
|
|
"cat_macro_precision": 0.937038491784886,
|
|
"cat_macro_recall": 0.9417984783962936,
|
|
"cat_mcc": 0.9275970467019695,
|
|
"cat_auc": 0.9930606345789074,
|
|
"cat_ece": 0.05087702547510463,
|
|
"cat_confusion_matrix": [
|
|
[
|
|
225,
|
|
0,
|
|
3,
|
|
0,
|
|
2,
|
|
0,
|
|
0
|
|
],
|
|
[
|
|
0,
|
|
85,
|
|
0,
|
|
0,
|
|
2,
|
|
1,
|
|
0
|
|
],
|
|
[
|
|
2,
|
|
0,
|
|
145,
|
|
1,
|
|
2,
|
|
0,
|
|
0
|
|
],
|
|
[
|
|
0,
|
|
0,
|
|
3,
|
|
132,
|
|
0,
|
|
1,
|
|
0
|
|
],
|
|
[
|
|
6,
|
|
1,
|
|
4,
|
|
18,
|
|
167,
|
|
1,
|
|
1
|
|
],
|
|
[
|
|
0,
|
|
2,
|
|
1,
|
|
8,
|
|
2,
|
|
208,
|
|
0
|
|
],
|
|
[
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
13,
|
|
0,
|
|
164
|
|
]
|
|
],
|
|
"cat_f1_BoardGov": 0.9719222462203023,
|
|
"cat_prec_BoardGov": 0.9656652360515021,
|
|
"cat_recall_BoardGov": 0.9782608695652174,
|
|
"cat_f1_Incident": 0.9659090909090909,
|
|
"cat_prec_Incident": 0.9659090909090909,
|
|
"cat_recall_Incident": 0.9659090909090909,
|
|
"cat_f1_Manageme": 0.9477124183006536,
|
|
"cat_prec_Manageme": 0.9294871794871795,
|
|
"cat_recall_Manageme": 0.9666666666666667,
|
|
"cat_f1_NoneOthe": 0.8949152542372881,
|
|
"cat_prec_NoneOthe": 0.8301886792452831,
|
|
"cat_recall_NoneOthe": 0.9705882352941176,
|
|
"cat_f1_RiskMana": 0.8652849740932642,
|
|
"cat_prec_RiskMana": 0.8882978723404256,
|
|
"cat_recall_RiskMana": 0.8434343434343434,
|
|
"cat_f1_Strategy": 0.9629629629629629,
|
|
"cat_prec_Strategy": 0.985781990521327,
|
|
"cat_recall_Strategy": 0.9411764705882353,
|
|
"cat_f1_Third-Pa": 0.9590643274853801,
|
|
"cat_prec_Third-Pa": 0.9939393939393939,
|
|
"cat_recall_Third-Pa": 0.9265536723163842,
|
|
"cat_kripp_alpha": 0.9272644584249223,
|
|
"spec_macro_f1": 0.902152688639083,
|
|
"spec_weighted_f1": 0.9177972939099285,
|
|
"spec_macro_precision": 0.9070378979232232,
|
|
"spec_macro_recall": 0.8991005681856252,
|
|
"spec_mcc": 0.8753613597836426,
|
|
"spec_auc": 0.9826044267990239,
|
|
"spec_ece": 0.06921947295467064,
|
|
"spec_confusion_matrix": [
|
|
[
|
|
583,
|
|
17,
|
|
15,
|
|
3
|
|
],
|
|
[
|
|
28,
|
|
130,
|
|
9,
|
|
1
|
|
],
|
|
[
|
|
10,
|
|
3,
|
|
192,
|
|
2
|
|
],
|
|
[
|
|
2,
|
|
1,
|
|
7,
|
|
197
|
|
]
|
|
],
|
|
"spec_f1_L1Generi": 0.9395648670427075,
|
|
"spec_prec_L1Generi": 0.9357945425361156,
|
|
"spec_recall_L1Generi": 0.9433656957928802,
|
|
"spec_f1_L2Domain": 0.8150470219435737,
|
|
"spec_prec_L2Domain": 0.8609271523178808,
|
|
"spec_recall_L2Domain": 0.7738095238095238,
|
|
"spec_f1_L3Firm-S": 0.8930232558139535,
|
|
"spec_prec_L3Firm-S": 0.8609865470852018,
|
|
"spec_recall_L3Firm-S": 0.927536231884058,
|
|
"spec_f1_L4Quanti": 0.9609756097560975,
|
|
"spec_prec_L4Quanti": 0.9704433497536946,
|
|
"spec_recall_L4Quanti": 0.9516908212560387,
|
|
"spec_qwk": 0.9338562415243872,
|
|
"spec_mae": 0.1125,
|
|
"spec_kripp_alpha": 0.9206308343112934,
|
|
"total_time_s": 19.849480003875215,
|
|
"num_samples": 1200,
|
|
"avg_ms_per_sample": 16.54123333656268,
|
|
"combined_macro_f1": 0.9202028639058946
|
|
},
|
|
"ensemble-3seed_vs_Opus-4.6": {
|
|
"cat_macro_f1": 0.9287535853888995,
|
|
"cat_weighted_f1": 0.9277067129478959,
|
|
"cat_macro_precision": 0.9242877868683518,
|
|
"cat_macro_recall": 0.9368327500295983,
|
|
"cat_mcc": 0.9160728021840298,
|
|
"cat_auc": 0.9947981532709612,
|
|
"cat_ece": 0.06293055539329852,
|
|
"cat_confusion_matrix": [
|
|
[
|
|
211,
|
|
0,
|
|
1,
|
|
1,
|
|
1,
|
|
0,
|
|
0
|
|
],
|
|
[
|
|
0,
|
|
78,
|
|
0,
|
|
0,
|
|
1,
|
|
0,
|
|
0
|
|
],
|
|
[
|
|
8,
|
|
0,
|
|
145,
|
|
1,
|
|
3,
|
|
0,
|
|
1
|
|
],
|
|
[
|
|
0,
|
|
0,
|
|
1,
|
|
139,
|
|
1,
|
|
0,
|
|
0
|
|
],
|
|
[
|
|
13,
|
|
0,
|
|
8,
|
|
13,
|
|
173,
|
|
1,
|
|
5
|
|
],
|
|
[
|
|
1,
|
|
10,
|
|
1,
|
|
4,
|
|
3,
|
|
209,
|
|
0
|
|
],
|
|
[
|
|
0,
|
|
0,
|
|
0,
|
|
1,
|
|
6,
|
|
1,
|
|
159
|
|
]
|
|
],
|
|
"cat_f1_BoardGov": 0.9440715883668904,
|
|
"cat_prec_BoardGov": 0.9055793991416309,
|
|
"cat_recall_BoardGov": 0.985981308411215,
|
|
"cat_f1_Incident": 0.9341317365269461,
|
|
"cat_prec_Incident": 0.8863636363636364,
|
|
"cat_recall_Incident": 0.9873417721518988,
|
|
"cat_f1_Manageme": 0.9235668789808917,
|
|
"cat_prec_Manageme": 0.9294871794871795,
|
|
"cat_recall_Manageme": 0.9177215189873418,
|
|
"cat_f1_NoneOthe": 0.9266666666666666,
|
|
"cat_prec_NoneOthe": 0.8742138364779874,
|
|
"cat_recall_NoneOthe": 0.9858156028368794,
|
|
"cat_f1_RiskMana": 0.8628428927680798,
|
|
"cat_prec_RiskMana": 0.9202127659574468,
|
|
"cat_recall_RiskMana": 0.812206572769953,
|
|
"cat_f1_Strategy": 0.9521640091116174,
|
|
"cat_prec_Strategy": 0.990521327014218,
|
|
"cat_recall_Strategy": 0.9166666666666666,
|
|
"cat_f1_Third-Pa": 0.9578313253012049,
|
|
"cat_prec_Third-Pa": 0.9636363636363636,
|
|
"cat_recall_Third-Pa": 0.9520958083832335,
|
|
"cat_kripp_alpha": 0.9154443888884335,
|
|
"spec_macro_f1": 0.8852876459236954,
|
|
"spec_weighted_f1": 0.9023972621736004,
|
|
"spec_macro_precision": 0.888087338599951,
|
|
"spec_macro_recall": 0.8858055716763026,
|
|
"spec_mcc": 0.8535145242291756,
|
|
"spec_auc": 0.9775733710374438,
|
|
"spec_ece": 0.08450941021243728,
|
|
"spec_confusion_matrix": [
|
|
[
|
|
571,
|
|
24,
|
|
9,
|
|
1
|
|
],
|
|
[
|
|
21,
|
|
118,
|
|
5,
|
|
1
|
|
],
|
|
[
|
|
31,
|
|
9,
|
|
207,
|
|
13
|
|
],
|
|
[
|
|
0,
|
|
0,
|
|
2,
|
|
188
|
|
]
|
|
],
|
|
"spec_f1_L1Generi": 0.9299674267100977,
|
|
"spec_prec_L1Generi": 0.9165329052969502,
|
|
"spec_recall_L1Generi": 0.943801652892562,
|
|
"spec_f1_L2Domain": 0.7972972972972973,
|
|
"spec_prec_L2Domain": 0.7814569536423841,
|
|
"spec_recall_L2Domain": 0.8137931034482758,
|
|
"spec_f1_L3Firm-S": 0.8571428571428571,
|
|
"spec_prec_L3Firm-S": 0.9282511210762332,
|
|
"spec_recall_L3Firm-S": 0.7961538461538461,
|
|
"spec_f1_L4Quanti": 0.9567430025445293,
|
|
"spec_prec_L4Quanti": 0.9261083743842364,
|
|
"spec_recall_L4Quanti": 0.9894736842105263,
|
|
"spec_qwk": 0.9247559136673115,
|
|
"spec_mae": 0.1325,
|
|
"spec_kripp_alpha": 0.910971486983108,
|
|
"total_time_s": 19.849480003875215,
|
|
"num_samples": 1200,
|
|
"avg_ms_per_sample": 16.54123333656268,
|
|
"combined_macro_f1": 0.9070206156562974
|
|
}
|
|
} |