SEC-cyBERT/python/configs/dapt/modernbert.yaml
2026-03-30 11:41:20 -04:00

31 lines
710 B
YAML

stage: dapt
model:
name_or_path: answerdotai/ModernBERT-large
trust_remote_code: false
data:
corpus_path: ../data/dapt-corpus
text_field: text
max_seq_length: 8192
max_tokens: 500_000_000 # Ponnock (2025): diminishing returns past 250M tokens
validation_split: 0.02
training:
output_dir: ../checkpoints/dapt/modernbert-large
learning_rate: 5.0e-5
mlm_probability: 0.30
num_train_epochs: 1
per_device_train_batch_size: 4
gradient_accumulation_steps: 8 # effective batch = 32
warmup_ratio: 0.05
weight_decay: 1.0e-5
bf16: true
gradient_checkpointing: true
logging_steps: 50
save_steps: 256
eval_steps: 256
save_total_limit: 8
dataloader_num_workers: 4
seed: 42