Fixing configs, adding two more for rerun on base and large robertas

lang-uk · Oct 3, 2023 · a2b46e4 · a2b46e4
1 parent dbed625
commit a2b46e4
Show file tree

Hide file tree

Showing 3 changed files with 68 additions and 4 deletions.
diff --git a/...imental/roberta/roberta.large.malyuk.filter.wechsel.largedict.tpu.run1.rerun1.rerun1.json b/...imental/roberta/roberta.large.malyuk.filter.wechsel.largedict.tpu.run1.rerun1.rerun1.json
@@ -0,0 +1,32 @@
+{
+    "output_dir": "exps/roberta.large.malyuk.filter.wechsel.largedict.tpu.run1.rerun1.rerun1",
+    "model_type": "roberta",
+    "config_name": "exps/roberta.large.malyuk.filter.wechsel.largedict.tpu.run1.rerun1.rerun1",
+    "tokenizer_name": "exps/roberta.large.malyuk.filter.wechsel.largedict.tpu.run1.rerun1.rerun1",
+    "train_file": "data/malyuk.filter",
+    "max_seq_length": 512,
+    "do_train": true,
+    "do_eval": true,
+    "validation_file": "data/bruk_valid_data.txt",
+    "weight_decay": 1e-1,
+    "per_device_train_batch_size": 4,
+    "per_device_eval_batch_size": 4,
+    "gradient_accumulation_steps": 1,
+    "learning_rate": 1e-5,
+    "evaluation_strategy": "steps",
+    "warmup_steps": 9000,
+    "max_steps": 400000,
+    "eval_steps": 500,
+    "save_steps": 50000,
+    "logging_steps": 500,
+    "overwrite_output_dir": true,
+    "num_train_epochs": 0,
+    "adam_beta1": 0.965,
+    "adam_beta2": 0.99,
+    "adam_epsilon": 1e-6,
+    "preprocessing_num_workers": 96,
+    "model_name_or_path": "exps/roberta.large.malyuk.filter.wechsel.largedict.tpu.run1.rerun1.rerun1",
+    "bf16": true,
+    "optimizer": "sophia",
+    "lr_scheduler_type": "linear"
+}
diff --git a/...s/experimental/roberta/roberta.large.malyuk.filter.wechsel.largedict.tpu.run1.rerun2.json b/...s/experimental/roberta/roberta.large.malyuk.filter.wechsel.largedict.tpu.run1.rerun2.json
@@ -1,8 +1,8 @@
 {
-    "output_dir": "exps/roberta.large.malyuk.filter.wechsel.largedict.tpu.run1.rerun1",
+    "output_dir": "exps/roberta.large.malyuk.filter.wechsel.largedict.tpu.run1.rerun2",
     "model_type": "roberta",
-    "config_name": "exps/roberta.large.malyuk.filter.wechsel.largedict.tpu.run1.rerun1",
-    "tokenizer_name": "exps/roberta.large.malyuk.filter.wechsel.largedict.tpu.run1.rerun1",
+    "config_name": "exps/roberta.large.malyuk.filter.wechsel.largedict.tpu.run1.rerun2",
+    "tokenizer_name": "exps/roberta.large.malyuk.filter.wechsel.largedict.tpu.run1.rerun2",
     "train_file": "data/malyuk.filter",
     "max_seq_length": 512,
     "do_train": true,
@@ -25,7 +25,7 @@
     "adam_beta2": 0.99,
     "adam_epsilon": 1e-6,
     "preprocessing_num_workers": 96,
-    "model_name_or_path": "exps/roberta.large.malyuk.filter.wechsel.largedict.tpu.run1.rerun1",
+    "model_name_or_path": "exps/roberta.large.malyuk.filter.wechsel.largedict.tpu.run1.rerun2",
     "bf16": true,
     "optimizer": "sophia",
     "lr_scheduler_type": "linear"

diff --git a/.../experimental/roberta/roberta.malyuk.filter.wechsel.largedict.tpu.run7.rerun3.rerun1.json b/.../experimental/roberta/roberta.malyuk.filter.wechsel.largedict.tpu.run7.rerun3.rerun1.json
@@ -0,0 +1,32 @@
+{
+    "output_dir": "exps/roberta.malyuk.filter.wechsel.largedict.tpu.run7.rerun3.rerun1",
+    "model_type": "roberta",
+    "config_name": "exps/roberta.malyuk.filter.wechsel.largedict.tpu.run7.rerun3.rerun1",
+    "tokenizer_name": "exps/roberta.malyuk.filter.wechsel.largedict.tpu.run7.rerun3.rerun1",
+    "train_file": "data/malyuk.filter",
+    "max_seq_length": 512,
+    "do_train": true,
+    "do_eval": true,
+    "validation_file": "data/bruk_valid_data.txt",
+    "weight_decay": 1e-1,
+    "per_device_train_batch_size": 38,
+    "per_device_eval_batch_size": 38,
+    "gradient_accumulation_steps": 1,
+    "learning_rate": 1e-5,
+    "evaluation_strategy": "steps",
+    "warmup_steps": 9000,
+    "max_steps": 400000,
+    "eval_steps": 500,
+    "save_steps": 50000,
+    "logging_steps": 500,
+    "overwrite_output_dir": true,
+    "num_train_epochs": 0,
+    "adam_beta1": 0.965,
+    "adam_beta2": 0.99,
+    "adam_epsilon": 1e-6,
+    "preprocessing_num_workers": 96,
+    "model_name_or_path": "exps/roberta.malyuk.filter.wechsel.largedict.tpu.run7.rerun3.rerun1",
+    "bf16": true,
+    "optimizer": "sophia",
+    "lr_scheduler_type": "linear"
+}