From 0433c4435b7972dcaf2ba337da8d15fb72973e09 Mon Sep 17 00:00:00 2001 From: Dmitry Chaplinsky Date: Tue, 6 Jun 2023 15:47:17 +0300 Subject: [PATCH] Another config for sophia: reproducing params from their github --- ...ofilter.wechsel.largedict.sophia-run3.json | 30 +++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100644 configs/experimental/gpt2/gpt2-small.oscar.nofilter.wechsel.largedict.sophia-run3.json diff --git a/configs/experimental/gpt2/gpt2-small.oscar.nofilter.wechsel.largedict.sophia-run3.json b/configs/experimental/gpt2/gpt2-small.oscar.nofilter.wechsel.largedict.sophia-run3.json new file mode 100644 index 0000000..e0437cf --- /dev/null +++ b/configs/experimental/gpt2/gpt2-small.oscar.nofilter.wechsel.largedict.sophia-run3.json @@ -0,0 +1,30 @@ +{ + "output_dir": "exps/gpt2-small.oscar.nofilter.wechsel.largedict.sophia-run3", + "model_type": "gpt2", + "config_name": "exps/gpt2-small.oscar.nofilter.wechsel.largedict.sophia-run3", + "tokenizer_name": "exps/gpt2-small.oscar.nofilter.wechsel.largedict.sophia-run3", + "train_file": "data/oscar.nofilter", + "block_size": 1024, + "do_train": true, + "do_eval": true, + "validation_file": "data/bruk_valid_data.txt", + "weight_decay": 1e-1, + "per_device_train_batch_size": 8, + "per_device_eval_batch_size": 8, + "gradient_accumulation_steps": 4, + "learning_rate": 3e-4, + "evaluation_strategy": "steps", + "max_steps": 600000, + "eval_steps": 500, + "save_steps": 12500, + "logging_steps": 500, + "overwrite_output_dir": true, + "num_train_epochs": 0, + "warmup_steps": 2000, + "model_name_or_path": "exps/gpt2-small.oscar.nofilter.wechsel.largedict.sophia-run3", + "preprocessing_num_workers": 48, + "fp16": true, + "adam_beta1": 0.9, + "adam_beta2": 0.95, + "lr_scheduler_type": "cosine" +} \ No newline at end of file