diff --git a/python/configs/dapt/modernbert.yaml b/python/configs/dapt/modernbert.yaml index d7242a1..c5bf2fb 100644 --- a/python/configs/dapt/modernbert.yaml +++ b/python/configs/dapt/modernbert.yaml @@ -25,6 +25,6 @@ training: logging_steps: 50 save_steps: 256 eval_steps: 256 - save_total_limit: 3 + save_total_limit: 8 dataloader_num_workers: 4 seed: 42 diff --git a/python/src/dapt/train.py b/python/src/dapt/train.py index be4aff9..92d7476 100644 --- a/python/src/dapt/train.py +++ b/python/src/dapt/train.py @@ -131,6 +131,7 @@ def train(config: DAPTConfig) -> None: torch_compile=True, optim="adamw_torch_fused", tf32=True, + per_device_eval_batch_size=1, dataloader_persistent_workers=True, logging_steps=config.training.logging_steps, save_steps=config.training.save_steps,