model: name_or_path: ../checkpoints/tapt/modernbert-large/final # Alternatives for ablation: # base: answerdotai/ModernBERT-large # dapt: ../checkpoints/dapt/modernbert-large/final # tapt: ../checkpoints/tapt/modernbert-large/final data: paragraphs_path: ../data/paragraphs/paragraphs-clean.patched.jsonl consensus_path: ../data/annotations/v2-stage1/consensus.jsonl quality_path: ../data/paragraphs/quality/quality-scores.jsonl holdout_path: ../data/gold/v2-holdout-ids.json max_seq_length: 512 validation_split: 0.1 training: output_dir: ../checkpoints/finetune/modernbert-large learning_rate: 0.00005 num_train_epochs: 3 per_device_train_batch_size: 32 per_device_eval_batch_size: 64 gradient_accumulation_steps: 1 warmup_ratio: 0.1 weight_decay: 0.01 dropout: 0.1 bf16: true gradient_checkpointing: false logging_steps: 50 save_total_limit: 3 dataloader_num_workers: 4 seed: 42 loss_type: ce focal_gamma: 2.0 class_weighting: true category_loss_weight: 1.0 specificity_loss_weight: 1.0