diff --git a/pdelfin/train/config/qwen2vl-7b-lora.yaml b/pdelfin/train/config/qwen2vl-7b-lora.yaml index fb7a64f..aaa471e 100644 --- a/pdelfin/train/config/qwen2vl-7b-lora.yaml +++ b/pdelfin/train/config/qwen2vl-7b-lora.yaml @@ -46,7 +46,7 @@ hparams: gradient_accumulation_steps: 4 gradient_checkpointing: true clip_grad_norm: 1.0 - learning_rate: 3e-4 + learning_rate: 1e-4 max_steps: 10000 pad_multiple_of: 16 log_every_steps: 10 diff --git a/scripts/qwen2vl-7b-gantry.sh b/scripts/qwen2vl-7b-gantry.sh index 9400aba..42223eb 100755 --- a/scripts/qwen2vl-7b-gantry.sh +++ b/scripts/qwen2vl-7b-gantry.sh @@ -10,7 +10,7 @@ then fi -EXTRA_ARGS="-c pdelfin/train/config/qwen2vl-7b.yaml --num_proc 64 --save.path \"s3://ai2-oe-data/jakep/experiments/qwen2vl-pdf/v1/models/\${BEAKER_USER_ID}\"" +EXTRA_ARGS="-c pdelfin/train/config/qwen2vl-7b.yaml --num_proc 64 --save.path \"/data/jakep/experiments/qwen2vl-pdf/v1/models/\${BEAKER_USER_ID}\"" run_name=$(basename "$0" .sh)