30 lines
629 B
YAML
30 lines
629 B
YAML
stage: dapt
|
|
|
|
model:
|
|
name_or_path: answerdotai/ModernBERT-large
|
|
trust_remote_code: false
|
|
|
|
data:
|
|
corpus_path: ../data/dapt-corpus
|
|
text_field: text
|
|
max_seq_length: 8192
|
|
validation_split: 0.02
|
|
|
|
training:
|
|
output_dir: ../checkpoints/dapt/modernbert-large
|
|
learning_rate: 5.0e-5
|
|
mlm_probability: 0.30
|
|
num_train_epochs: 1
|
|
per_device_train_batch_size: 1
|
|
gradient_accumulation_steps: 32 # effective batch = 32
|
|
warmup_ratio: 0.05
|
|
weight_decay: 0.01
|
|
bf16: true
|
|
gradient_checkpointing: true
|
|
logging_steps: 50
|
|
save_steps: 1000
|
|
eval_steps: 1000
|
|
save_total_limit: 3
|
|
dataloader_num_workers: 4
|
|
seed: 42
|