SEC-cyBERT/python/configs/tapt/modernbert.yaml
2026-03-30 19:46:20 -04:00

31 lines
925 B
YAML

stage: tapt
model:
name_or_path: ../checkpoints/dapt/modernbert-large/final
trust_remote_code: false
data:
corpus_path: ../data/paragraphs/paragraphs-clean.patched.jsonl
text_field: text
max_seq_length: 512 # 99.6% of paragraphs fit; mean=127, P99=386
validation_split: 0.05 # larger val split — small dataset
training:
output_dir: ../checkpoints/tapt/modernbert-large
learning_rate: 5.0e-5
mlm_probability: 0.30
whole_word_mask: true
num_train_epochs: 5
per_device_train_batch_size: 32 # 22.7 GB peak w/ torch.compile at seq_len=512
gradient_accumulation_steps: 1 # effective batch = 32 (matches DAPT)
warmup_ratio: 0.05
weight_decay: 1.0e-5
bf16: true
gradient_checkpointing: false # short sequences, not needed
logging_steps: 50
save_strategy: epoch
eval_strategy: epoch
save_total_limit: 6 # keep all 5 epoch checkpoints + final
dataloader_num_workers: 4
seed: 42