From 0433c4435b7972dcaf2ba337da8d15fb72973e09 Mon Sep 17 00:00:00 2001
From: Dmitry Chaplinsky <dchaplinsky@conversionscience.co.uk>
Date: Tue, 6 Jun 2023 15:47:17 +0300
Subject: [PATCH] Another config for sophia: reproducing params from their
 github

---
 ...ofilter.wechsel.largedict.sophia-run3.json | 30 +++++++++++++++++++
 1 file changed, 30 insertions(+)
 create mode 100644 configs/experimental/gpt2/gpt2-small.oscar.nofilter.wechsel.largedict.sophia-run3.json

diff --git a/configs/experimental/gpt2/gpt2-small.oscar.nofilter.wechsel.largedict.sophia-run3.json b/configs/experimental/gpt2/gpt2-small.oscar.nofilter.wechsel.largedict.sophia-run3.json
new file mode 100644
index 0000000..e0437cf
--- /dev/null
+++ b/configs/experimental/gpt2/gpt2-small.oscar.nofilter.wechsel.largedict.sophia-run3.json
@@ -0,0 +1,30 @@
+{
+    "output_dir": "exps/gpt2-small.oscar.nofilter.wechsel.largedict.sophia-run3",
+    "model_type": "gpt2",
+    "config_name": "exps/gpt2-small.oscar.nofilter.wechsel.largedict.sophia-run3",
+    "tokenizer_name": "exps/gpt2-small.oscar.nofilter.wechsel.largedict.sophia-run3",
+    "train_file": "data/oscar.nofilter",
+    "block_size": 1024,
+    "do_train": true,
+    "do_eval": true,
+    "validation_file": "data/bruk_valid_data.txt",
+    "weight_decay": 1e-1,
+    "per_device_train_batch_size": 8,
+    "per_device_eval_batch_size": 8,
+    "gradient_accumulation_steps": 4,
+    "learning_rate": 3e-4,
+    "evaluation_strategy": "steps",
+    "max_steps": 600000,
+    "eval_steps": 500,
+    "save_steps": 12500,
+    "logging_steps": 500,
+    "overwrite_output_dir": true,
+    "num_train_epochs": 0,
+    "warmup_steps": 2000,
+    "model_name_or_path": "exps/gpt2-small.oscar.nofilter.wechsel.largedict.sophia-run3",
+    "preprocessing_num_workers": 48,
+    "fp16": true,
+    "adam_beta1": 0.9,
+    "adam_beta2": 0.95,
+    "lr_scheduler_type": "cosine"
+}
\ No newline at end of file