Skip to content

Commit

Permalink
Another config for sophia: reproducing params from their github
Browse files Browse the repository at this point in the history
  • Loading branch information
dchaplinsky committed Jun 6, 2023
1 parent c467bac commit 0433c44
Showing 1 changed file with 30 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
{
"output_dir": "exps/gpt2-small.oscar.nofilter.wechsel.largedict.sophia-run3",
"model_type": "gpt2",
"config_name": "exps/gpt2-small.oscar.nofilter.wechsel.largedict.sophia-run3",
"tokenizer_name": "exps/gpt2-small.oscar.nofilter.wechsel.largedict.sophia-run3",
"train_file": "data/oscar.nofilter",
"block_size": 1024,
"do_train": true,
"do_eval": true,
"validation_file": "data/bruk_valid_data.txt",
"weight_decay": 1e-1,
"per_device_train_batch_size": 8,
"per_device_eval_batch_size": 8,
"gradient_accumulation_steps": 4,
"learning_rate": 3e-4,
"evaluation_strategy": "steps",
"max_steps": 600000,
"eval_steps": 500,
"save_steps": 12500,
"logging_steps": 500,
"overwrite_output_dir": true,
"num_train_epochs": 0,
"warmup_steps": 2000,
"model_name_or_path": "exps/gpt2-small.oscar.nofilter.wechsel.largedict.sophia-run3",
"preprocessing_num_workers": 48,
"fp16": true,
"adam_beta1": 0.9,
"adam_beta2": 0.95,
"lr_scheduler_type": "cosine"
}

0 comments on commit 0433c44

Please sign in to comment.