SEC-cyBERT/python/configs/finetune/modernbert.yaml
2026-04-05 12:16:16 -04:00

37 lines
1.0 KiB
YAML

model:
name_or_path: ../checkpoints/tapt/modernbert-large/final
# Alternatives for ablation:
# base: answerdotai/ModernBERT-large
# dapt: ../checkpoints/dapt/modernbert-large/final
# tapt: ../checkpoints/tapt/modernbert-large/final
data:
paragraphs_path: ../data/paragraphs/paragraphs-clean.patched.jsonl
consensus_path: ../data/annotations/v2-stage1/consensus.jsonl
quality_path: ../data/paragraphs/quality/quality-scores.jsonl
holdout_path: ../data/gold/v2-holdout-ids.json
max_seq_length: 512
validation_split: 0.1
training:
output_dir: ../checkpoints/finetune/modernbert-large
learning_rate: 0.00005
num_train_epochs: 3
per_device_train_batch_size: 32
per_device_eval_batch_size: 64
gradient_accumulation_steps: 1
warmup_ratio: 0.1
weight_decay: 0.01
dropout: 0.1
bf16: true
gradient_checkpointing: false
logging_steps: 50
save_total_limit: 3
dataloader_num_workers: 4
seed: 42
loss_type: ce
focal_gamma: 2.0
class_weighting: true
category_loss_weight: 1.0
specificity_loss_weight: 1.0