38 lines
1001 B
YAML
38 lines
1001 B
YAML
model:
|
|
name_or_path: answerdotai/ModernBERT-large
|
|
|
|
data:
|
|
paragraphs_path: ../data/paragraphs/paragraphs-clean.patched.jsonl
|
|
consensus_path: ../data/annotations/v2-stage1/consensus.jsonl
|
|
quality_path: ../data/paragraphs/quality/quality-scores.jsonl
|
|
holdout_path: ../data/gold/v2-holdout-ids.json
|
|
max_seq_length: 512
|
|
validation_split: 0.1
|
|
|
|
training:
|
|
output_dir: ../checkpoints/finetune/iter1-seed420
|
|
learning_rate: 0.00005
|
|
num_train_epochs: 11
|
|
per_device_train_batch_size: 32
|
|
per_device_eval_batch_size: 64
|
|
gradient_accumulation_steps: 1
|
|
warmup_ratio: 0.1
|
|
weight_decay: 0.01
|
|
dropout: 0.1
|
|
bf16: true
|
|
gradient_checkpointing: false
|
|
logging_steps: 50
|
|
save_total_limit: 3
|
|
dataloader_num_workers: 4
|
|
seed: 420
|
|
loss_type: ce
|
|
focal_gamma: 2.0
|
|
class_weighting: true
|
|
category_loss_weight: 1.0
|
|
specificity_loss_weight: 1.0
|
|
specificity_head: independent
|
|
spec_mlp_dim: 256
|
|
pooling: attention
|
|
ordinal_consistency_weight: 0.1
|
|
filter_spec_confidence: true
|