stage: dapt model: name_or_path: answerdotai/ModernBERT-large trust_remote_code: false data: corpus_path: ../data/dapt-corpus text_field: text max_seq_length: 2048 validation_split: 0.02 training: output_dir: ../checkpoints/dapt/modernbert-large learning_rate: 5.0e-5 mlm_probability: 0.30 num_train_epochs: 1 per_device_train_batch_size: 4 gradient_accumulation_steps: 8 # effective batch = 32 warmup_ratio: 0.05 weight_decay: 0.01 bf16: true gradient_checkpointing: true logging_steps: 50 save_steps: 1000 eval_steps: 1000 save_total_limit: 3 dataloader_num_workers: 4 seed: 42