stage: dapt model: name_or_path: chandar-lab/NeoBERT trust_remote_code: true data: corpus_path: ../data/dapt-corpus text_field: text max_seq_length: 2048 # NeoBERT supports up to 4096 validation_split: 0.02 training: output_dir: ../checkpoints/dapt/neobert learning_rate: 5.0e-5 mlm_probability: 0.20 # NeoBERT was pre-trained with 20% masking num_train_epochs: 1 per_device_train_batch_size: 6 # smaller model, can fit more per batch gradient_accumulation_steps: 5 # effective batch = 30 warmup_ratio: 0.05 weight_decay: 0.01 bf16: true gradient_checkpointing: true logging_steps: 50 save_steps: 1000 eval_steps: 1000 save_total_limit: 3 dataloader_num_workers: 4 seed: 42