SEC-cyBERT/python/configs/finetune/iter1-seed69.yaml

38 lines
999 B
YAML

model:
name_or_path: answerdotai/ModernBERT-large
data:
paragraphs_path: ../data/paragraphs/paragraphs-clean.patched.jsonl
consensus_path: ../data/annotations/v2-stage1/consensus.jsonl
quality_path: ../data/paragraphs/quality/quality-scores.jsonl
holdout_path: ../data/gold/v2-holdout-ids.json
max_seq_length: 512
validation_split: 0.1
training:
output_dir: ../checkpoints/finetune/iter1-seed69
learning_rate: 0.00005
num_train_epochs: 11
per_device_train_batch_size: 32
per_device_eval_batch_size: 64
gradient_accumulation_steps: 1
warmup_ratio: 0.1
weight_decay: 0.01
dropout: 0.1
bf16: true
gradient_checkpointing: false
logging_steps: 50
save_total_limit: 3
dataloader_num_workers: 4
seed: 69
loss_type: ce
focal_gamma: 2.0
class_weighting: true
category_loss_weight: 1.0
specificity_loss_weight: 1.0
specificity_head: independent
spec_mlp_dim: 256
pooling: attention
ordinal_consistency_weight: 0.1
filter_spec_confidence: true