298 lines
6.6 KiB
JSON

{
"ensemble-3seed_vs_GPT-5.4": {
"cat_macro_f1": 0.9382530391727061,
"cat_weighted_f1": 0.9385858996685268,
"cat_macro_precision": 0.937038491784886,
"cat_macro_recall": 0.9417984783962936,
"cat_mcc": 0.9275970467019695,
"cat_auc": 0.9930606345789074,
"cat_ece": 0.05087702547510463,
"cat_confusion_matrix": [
[
225,
0,
3,
0,
2,
0,
0
],
[
0,
85,
0,
0,
2,
1,
0
],
[
2,
0,
145,
1,
2,
0,
0
],
[
0,
0,
3,
132,
0,
1,
0
],
[
6,
1,
4,
18,
167,
1,
1
],
[
0,
2,
1,
8,
2,
208,
0
],
[
0,
0,
0,
0,
13,
0,
164
]
],
"cat_f1_BoardGov": 0.9719222462203023,
"cat_prec_BoardGov": 0.9656652360515021,
"cat_recall_BoardGov": 0.9782608695652174,
"cat_f1_Incident": 0.9659090909090909,
"cat_prec_Incident": 0.9659090909090909,
"cat_recall_Incident": 0.9659090909090909,
"cat_f1_Manageme": 0.9477124183006536,
"cat_prec_Manageme": 0.9294871794871795,
"cat_recall_Manageme": 0.9666666666666667,
"cat_f1_NoneOthe": 0.8949152542372881,
"cat_prec_NoneOthe": 0.8301886792452831,
"cat_recall_NoneOthe": 0.9705882352941176,
"cat_f1_RiskMana": 0.8652849740932642,
"cat_prec_RiskMana": 0.8882978723404256,
"cat_recall_RiskMana": 0.8434343434343434,
"cat_f1_Strategy": 0.9629629629629629,
"cat_prec_Strategy": 0.985781990521327,
"cat_recall_Strategy": 0.9411764705882353,
"cat_f1_Third-Pa": 0.9590643274853801,
"cat_prec_Third-Pa": 0.9939393939393939,
"cat_recall_Third-Pa": 0.9265536723163842,
"cat_kripp_alpha": 0.9272644584249223,
"spec_macro_f1": 0.902152688639083,
"spec_weighted_f1": 0.9177972939099285,
"spec_macro_precision": 0.9070378979232232,
"spec_macro_recall": 0.8991005681856252,
"spec_mcc": 0.8753613597836426,
"spec_auc": 0.9826044267990239,
"spec_ece": 0.06921947295467064,
"spec_confusion_matrix": [
[
583,
17,
15,
3
],
[
28,
130,
9,
1
],
[
10,
3,
192,
2
],
[
2,
1,
7,
197
]
],
"spec_f1_L1Generi": 0.9395648670427075,
"spec_prec_L1Generi": 0.9357945425361156,
"spec_recall_L1Generi": 0.9433656957928802,
"spec_f1_L2Domain": 0.8150470219435737,
"spec_prec_L2Domain": 0.8609271523178808,
"spec_recall_L2Domain": 0.7738095238095238,
"spec_f1_L3Firm-S": 0.8930232558139535,
"spec_prec_L3Firm-S": 0.8609865470852018,
"spec_recall_L3Firm-S": 0.927536231884058,
"spec_f1_L4Quanti": 0.9609756097560975,
"spec_prec_L4Quanti": 0.9704433497536946,
"spec_recall_L4Quanti": 0.9516908212560387,
"spec_qwk": 0.9338562415243872,
"spec_mae": 0.1125,
"spec_kripp_alpha": 0.9206308343112934,
"total_time_s": 19.849480003875215,
"num_samples": 1200,
"avg_ms_per_sample": 16.54123333656268,
"combined_macro_f1": 0.9202028639058946
},
"ensemble-3seed_vs_Opus-4.6": {
"cat_macro_f1": 0.9287535853888995,
"cat_weighted_f1": 0.9277067129478959,
"cat_macro_precision": 0.9242877868683518,
"cat_macro_recall": 0.9368327500295983,
"cat_mcc": 0.9160728021840298,
"cat_auc": 0.9947981532709612,
"cat_ece": 0.06293055539329852,
"cat_confusion_matrix": [
[
211,
0,
1,
1,
1,
0,
0
],
[
0,
78,
0,
0,
1,
0,
0
],
[
8,
0,
145,
1,
3,
0,
1
],
[
0,
0,
1,
139,
1,
0,
0
],
[
13,
0,
8,
13,
173,
1,
5
],
[
1,
10,
1,
4,
3,
209,
0
],
[
0,
0,
0,
1,
6,
1,
159
]
],
"cat_f1_BoardGov": 0.9440715883668904,
"cat_prec_BoardGov": 0.9055793991416309,
"cat_recall_BoardGov": 0.985981308411215,
"cat_f1_Incident": 0.9341317365269461,
"cat_prec_Incident": 0.8863636363636364,
"cat_recall_Incident": 0.9873417721518988,
"cat_f1_Manageme": 0.9235668789808917,
"cat_prec_Manageme": 0.9294871794871795,
"cat_recall_Manageme": 0.9177215189873418,
"cat_f1_NoneOthe": 0.9266666666666666,
"cat_prec_NoneOthe": 0.8742138364779874,
"cat_recall_NoneOthe": 0.9858156028368794,
"cat_f1_RiskMana": 0.8628428927680798,
"cat_prec_RiskMana": 0.9202127659574468,
"cat_recall_RiskMana": 0.812206572769953,
"cat_f1_Strategy": 0.9521640091116174,
"cat_prec_Strategy": 0.990521327014218,
"cat_recall_Strategy": 0.9166666666666666,
"cat_f1_Third-Pa": 0.9578313253012049,
"cat_prec_Third-Pa": 0.9636363636363636,
"cat_recall_Third-Pa": 0.9520958083832335,
"cat_kripp_alpha": 0.9154443888884335,
"spec_macro_f1": 0.8852876459236954,
"spec_weighted_f1": 0.9023972621736004,
"spec_macro_precision": 0.888087338599951,
"spec_macro_recall": 0.8858055716763026,
"spec_mcc": 0.8535145242291756,
"spec_auc": 0.9775733710374438,
"spec_ece": 0.08450941021243728,
"spec_confusion_matrix": [
[
571,
24,
9,
1
],
[
21,
118,
5,
1
],
[
31,
9,
207,
13
],
[
0,
0,
2,
188
]
],
"spec_f1_L1Generi": 0.9299674267100977,
"spec_prec_L1Generi": 0.9165329052969502,
"spec_recall_L1Generi": 0.943801652892562,
"spec_f1_L2Domain": 0.7972972972972973,
"spec_prec_L2Domain": 0.7814569536423841,
"spec_recall_L2Domain": 0.8137931034482758,
"spec_f1_L3Firm-S": 0.8571428571428571,
"spec_prec_L3Firm-S": 0.9282511210762332,
"spec_recall_L3Firm-S": 0.7961538461538461,
"spec_f1_L4Quanti": 0.9567430025445293,
"spec_prec_L4Quanti": 0.9261083743842364,
"spec_recall_L4Quanti": 0.9894736842105263,
"spec_qwk": 0.9247559136673115,
"spec_mae": 0.1325,
"spec_kripp_alpha": 0.910971486983108,
"total_time_s": 19.849480003875215,
"num_samples": 1200,
"avg_ms_per_sample": 16.54123333656268,
"combined_macro_f1": 0.9070206156562974
}
}