adding more test configs

EleutherAI · Apr 29, 2021 · 1dae917 · 1dae917
1 parent 6f5079f
commit 1dae917
Show file tree

Hide file tree

Showing 3 changed files with 9 additions and 8 deletions.
diff --git a/configs/small.yml b/configs/small.yml
@@ -2,8 +2,8 @@
 {
    # parallelism settings ( you will want to change these based on your cluster setup, ideally scheduling pipeline stages
    # across the node boundaries )
-   "pipe-parallel-size": 1,
-   "model-parallel-size": 1,
+   "pipe-parallel-size": 0,
+   "model-parallel-size": 2,
 
    # model settings
    "num-layers": 12,
@@ -41,7 +41,7 @@
   },
 
    # batch / data settings
-   "train_micro_batch_size_per_gpu": 4,
+   "train_micro_batch_size_per_gpu": 8,
    "data-impl": "mmap",
    "split": "949,50,1",
 

diff --git a/tests/model/test_model_checkpoint.py b/tests/model/test_model_checkpoint.py
@@ -65,9 +65,10 @@ def run_checkpoint_test(self, config_yml):
         # save model checkpoint
         save_checkpoint(42, model, optimizer, lr_scheduler)
 
-        if args.pipe_parallel_size == 1 and isinstance(model, PipelineEngine):
-            # if it's a pipe parallel model but not actually doing parallelism, convert it to a normal deepspeed model
-            model = pipe_to_normal(model)
+        #if args.pipe_parallel_size == 1 and isinstance(model, PipelineEngine):
+        #    # if it's a pipe parallel model but not actually doing parallelism, convert it to a normal deepspeed model
+        #    model = pipe_to_normal(model)
+        #model.to_sequential()
         model.eval()
 
         context_tokens_tensor = torch.cuda.LongTensor([[1,2,3,4,5],[1,2,3,4,5],[6,7,8,9,10],[1,2,3,4,100]])

diff --git a/tests/neox_args/test_neoxargs_commandline.py b/tests/neox_args/test_neoxargs_commandline.py
@@ -70,7 +70,7 @@ def test_neoxargs_consume_deepy_args_with_config_dir(self):
 
         self.assertTrue(args_loaded_yamls == args_loaded_consume)
 
-    def test_neoxargs_consume_megatron_args(self):
+    def test_neoxargs_consume_neox_args(self):
         """
         verify megatron args are correctly consumed after sending via deepspeed
         """
@@ -83,7 +83,7 @@ def test_neoxargs_consume_megatron_args(self):
 
         # patch sys.argv so that args can be access by set_global_variables within initialize_megatron
         with patch('sys.argv', deepspeed_main_args):
-            args_loaded = NeoXArgs.consume_megatron_args()
+            args_loaded = NeoXArgs.consume_neox_args()
 
         #TODO is the wandb group really to be changed?
         args_loaded.wandb_group = args_baseline.wandb_group