37 lines
1.0 KiB
YAML
37 lines
1.0 KiB
YAML
model:
|
|
name_or_path: ../checkpoints/tapt/modernbert-large/final
|
|
# Alternatives for ablation:
|
|
# base: answerdotai/ModernBERT-large
|
|
# dapt: ../checkpoints/dapt/modernbert-large/final
|
|
# tapt: ../checkpoints/tapt/modernbert-large/final
|
|
|
|
data:
|
|
paragraphs_path: ../data/paragraphs/paragraphs-clean.patched.jsonl
|
|
consensus_path: ../data/annotations/v2-stage1/consensus.jsonl
|
|
quality_path: ../data/paragraphs/quality/quality-scores.jsonl
|
|
holdout_path: ../data/gold/v2-holdout-ids.json
|
|
max_seq_length: 512
|
|
validation_split: 0.1
|
|
|
|
training:
|
|
output_dir: ../checkpoints/finetune/modernbert-large
|
|
learning_rate: 0.00005
|
|
num_train_epochs: 3
|
|
per_device_train_batch_size: 32
|
|
per_device_eval_batch_size: 64
|
|
gradient_accumulation_steps: 1
|
|
warmup_ratio: 0.1
|
|
weight_decay: 0.01
|
|
dropout: 0.1
|
|
bf16: true
|
|
gradient_checkpointing: false
|
|
logging_steps: 50
|
|
save_total_limit: 3
|
|
dataloader_num_workers: 4
|
|
seed: 42
|
|
loss_type: ce
|
|
focal_gamma: 2.0
|
|
class_weighting: true
|
|
category_loss_weight: 1.0
|
|
specificity_loss_weight: 1.0
|