Lightning-AI · carmocca · Nov 26, 2021 · Nov 26, 2021 · Nov 26, 2021 · Nov 26, 2021
@@ -337,7 +337,7 @@ def on_fit_start(self, trainer, pl_module):
         trainer.fit(model)
 
 
-@RunIf(skip_windows=True, special=True)
+@RunIf(skip_windows=True, standalone=True)
 def test_accelerator_choice_ddp_cpu_and_strategy(tmpdir):
     """Test that accelerator="ddp_cpu" can work together with an instance of DDPPlugin."""
     _test_accelerator_choice_ddp_cpu_and_strategy(tmpdir, ddp_strategy_class=DDPPlugin)

@@ -108,7 +108,7 @@ def setup(self, stage: Optional[str] = None) -> None:
         trainer.fit(model)
 
 
-@RunIf(min_gpus=2, min_torch="1.8.1", special=True)
+@RunIf(min_gpus=2, min_torch="1.8.1", standalone=True)
 @pytest.mark.parametrize("precision", (16, 32))
 def test_ddp_wrapper(tmpdir, precision):
     """Test parameters to ignore are carried over for DDP."""

@@ -31,7 +31,7 @@
 # TODO(Borda): When multi-node tests are re-enabled (.github/workflows/ci_test-mnodes.yml)
 # use an environment variable `PL_RUNNING_MULTINODE_TESTS` and set `RunIf(multinode=True)`
 @pytest.mark.skip("Multi-node testing is currently disabled")
-@RunIf(special=True)
+@RunIf(standalone=True)
 def test_logging_sync_dist_true_ddp(tmpdir):
     """Tests to ensure that the sync_dist flag works with CPU (should just return the original value)"""
     fake_result = 1
@@ -68,7 +68,7 @@ def validation_step(self, batch, batch_idx):
 # TODO(Borda): When multi-node tests are re-enabled (.github/workflows/ci_test-mnodes.yml)
 # use an environment variable `PL_RUNNING_MULTINODE_TESTS` and set `RunIf(multinode=True)`
 @pytest.mark.skip("Multi-node testing is currently disabled")
-@RunIf(special=True)
+@RunIf(standalone=True)
 def test__validation_step__log(tmpdir):
     """Tests that validation_step can log."""
 

@@ -160,7 +160,7 @@ def test_pruning_callback(
     )
 
 
-@RunIf(special=True, min_gpus=2)
+@RunIf(standalone=True, min_gpus=2)
 @pytest.mark.parametrize("parameters_to_prune", (False, True))
 @pytest.mark.parametrize("use_global_unstructured", (False, True))
 def test_pruning_callback_ddp(tmpdir, parameters_to_prune, use_global_unstructured):

@@ -138,7 +138,7 @@ def train_with_swa(
     assert trainer.lightning_module == model
 
 
-@RunIf(min_gpus=2, special=True)
+@RunIf(min_gpus=2, standalone=True)
 def test_swa_callback_ddp(tmpdir):
     train_with_swa(tmpdir, strategy="ddp", gpus=2)
 

@@ -512,7 +512,7 @@ def test_tqdm_progress_bar_can_be_pickled():
     pickle.dumps(bar)
 
 
-@RunIf(min_gpus=2, special=True)
+@RunIf(min_gpus=2, standalone=True)
 @pytest.mark.parametrize(
     ["total_train_samples", "train_batch_size", "total_val_samples", "val_batch_size", "val_check_interval"],
     [(8, 4, 2, 1, 0.2), (8, 4, 2, 1, 0.5)],

@@ -87,7 +87,7 @@ def training_step(self, batch, batch_idx):
 
 
 @mock.patch("torch.save")
-@RunIf(special=True, min_gpus=2)
+@RunIf(standalone=True, min_gpus=2)
 @pytest.mark.parametrize(["k", "epochs", "val_check_interval", "expected"], [(1, 1, 1.0, 1), (2, 2, 0.3, 4)])
 def test_top_k_ddp(save_mock, tmpdir, k, epochs, val_check_interval, expected):
     class TestModel(BoringModel):

@@ -172,13 +172,13 @@ def single_process_pg():
 
 
 def pytest_collection_modifyitems(items):
-    if os.getenv("PL_RUNNING_SPECIAL_TESTS", "0") != "1":
+    if os.getenv("PL_RUN_STANDALONE_TESTS", "0") != "1":
         return
-    # filter out non-special tests
+    # filter out non-standalone tests
     items[:] = [
         item
         for item in items
         for marker in item.own_markers
-        # has `@RunIf(special=True)`
-        if marker.name == "skipif" and marker.kwargs.get("special")
+        # has `@RunIf(standalone=True)`
+        if marker.name == "skipif" and marker.kwargs.get("standalone")
     ]
@@ -480,7 +480,7 @@ def test_result_collection_reload_1_gpu_ddp(tmpdir):
     result_collection_reload(default_root_dir=tmpdir, strategy="ddp", gpus=1)
 
 
-@RunIf(min_gpus=2, special=True)
+@RunIf(min_gpus=2, standalone=True)
 @mock.patch.dict(os.environ, {"PL_FAULT_TOLERANT_TRAINING": "1"})
 def test_result_collection_reload_2_gpus(tmpdir):
     result_collection_reload(default_root_dir=tmpdir, strategy="ddp", gpus=2)

@@ -65,7 +65,7 @@ def __new__(
         horovod: bool = False,
         horovod_nccl: bool = False,
         skip_windows: bool = False,
-        special: bool = False,
+        standalone: bool = False,
         fairscale: bool = False,
         fairscale_fully_sharded: bool = False,
         deepspeed: bool = False,
@@ -87,7 +87,7 @@ def __new__(
             horovod: if Horovod is installed
             horovod_nccl: if Horovod is installed with NCCL support
             skip_windows: skip test for Windows platform (typically for some limited torch functionality)
-            special: running in special mode, outside pytest suit
+            standalone: Mark the test as standalone, our CI will run it in a separate process.
             fairscale: if `fairscale` module is required to run the test
             fairscale_fully_sharded: if `fairscale` fully sharded module is required to run the test
             deepspeed: if `deepspeed` module is required to run the test
@@ -146,12 +146,12 @@ def __new__(
             conditions.append(not _HOROVOD_NCCL_AVAILABLE)
             reasons.append("Horovod with NCCL")
 
-        if special:
-            env_flag = os.getenv("PL_RUNNING_SPECIAL_TESTS", "0")
+        if standalone:
+            env_flag = os.getenv("PL_RUN_STANDALONE_TESTS", "0")
             conditions.append(env_flag != "1")
-            reasons.append("Special execution")
+            reasons.append("Standalone execution")
             # used in tests/conftest.py::pytest_collection_modifyitems
-            kwargs["special"] = True
+            kwargs["standalone"] = True
 
         if fairscale:
             conditions.append(not _FAIRSCALE_AVAILABLE)

@@ -380,7 +380,7 @@ def test_autocast():
     lite._precision_plugin.forward_context().__exit__.assert_called()
 
 
-@RunIf(min_gpus=2, deepspeed=True, special=True)
+@RunIf(min_gpus=2, deepspeed=True, standalone=True)
 def test_deepspeed_multiple_models():
     class Lite(LightningLite):
         def run(self):

@@ -190,7 +190,7 @@ def test_boring_lite_model_ddp_spawn(precision, strategy, devices, accelerator,
         assert torch.equal(w_pure.cpu(), w_lite.cpu())
 
 
-@RunIf(min_gpus=2, special=True)
+@RunIf(min_gpus=2, standalone=True)
 @pytest.mark.parametrize(
     "precision, strategy, devices, accelerator",
     [

@@ -167,7 +167,7 @@ def transfer_batch_to_device(self, batch, device, dataloader_idx):
     assert torch.allclose(batch_gpu.targets.cpu(), torch.ones(5, 1, dtype=torch.long) * 2)
 
 
-@RunIf(min_gpus=2, special=True)
+@RunIf(min_gpus=2, standalone=True)
 def test_transfer_batch_hook_ddp(tmpdir):
     """Test custom data are properly moved to the right device using ddp."""
 
@@ -426,7 +426,7 @@ def _predict_batch(trainer, model, batches):
         return out
 
 
-@RunIf(deepspeed=True, min_gpus=1, special=True)
+@RunIf(deepspeed=True, min_gpus=1, standalone=True)
 @pytest.mark.parametrize("automatic_optimization", (True, False))
 def test_trainer_model_hook_system_fit_deepspeed(tmpdir, automatic_optimization):
     _run_trainer_model_hook_system_fit(

@@ -67,7 +67,7 @@ def configure_optimizers(self):
 
 # TODO: Fatal Python error: Bus error
 @pytest.mark.skip(reason="Fatal Python error: Bus error")
-@RunIf(min_gpus=2, special=True)
+@RunIf(min_gpus=2, standalone=True)
 def test_sync_batchnorm_ddp(tmpdir):
     seed_everything(234)
     set_random_main_port()

@@ -7,7 +7,7 @@
 from pytorch_lightning.utilities.exceptions import DeadlockDetectedException
 from tests.helpers.boring_model import BoringModel
 
-if os.getenv("PL_RUNNING_SPECIAL_TESTS", "0") == "1" and os.getenv("PL_RECONCILE_PROCESS", "0") == "1":
+if os.getenv("PL_RUN_STANDALONE_TESTS", "0") == "1" and os.getenv("PL_RECONCILE_PROCESS", "0") == "1":
 
     class CustomException(Exception):
         pass

@@ -190,7 +190,7 @@ def configure_optimizers(self):
     trainer.fit(model)
 
 
-@RunIf(min_gpus=2, amp_apex=True, special=True)
+@RunIf(min_gpus=2, amp_apex=True, standalone=True)
 @pytest.mark.parametrize("amp_level", ["O2"])
 def test_amp_apex_ddp_fit(amp_level, tmpdir):
     class CustomBoringModel(BoringModel):

@@ -89,7 +89,7 @@ def _assert_layer_fsdp_instance(self) -> None:
         assert self.layer.module[2].reshard_after_forward is True
 
 
-@RunIf(min_gpus=1, skip_windows=True, fairscale_fully_sharded=True, special=True)
+@RunIf(min_gpus=1, skip_windows=True, fairscale_fully_sharded=True, standalone=True)
 def test_fully_sharded_plugin_checkpoint(tmpdir):
     """Test to ensure that checkpoint is saved correctly when using a single GPU, and all stages can be run."""
 
@@ -98,7 +98,7 @@ def test_fully_sharded_plugin_checkpoint(tmpdir):
     _run_multiple_stages(trainer, model, os.path.join(tmpdir, "last.ckpt"))
 
 
-@RunIf(min_gpus=2, skip_windows=True, fairscale_fully_sharded=True, special=True)
+@RunIf(min_gpus=2, skip_windows=True, fairscale_fully_sharded=True, standalone=True)
 def test_fully_sharded_plugin_checkpoint_multi_gpus(tmpdir):
     """Test to ensure that checkpoint is saved correctly when using multiple GPUs, and all stages can be run."""
 
@@ -136,7 +136,7 @@ def _run_multiple_stages(trainer, model, model_path: Optional[str] = None):
     trainer.test(ckpt_path=model_path)
 
 
-@RunIf(min_gpus=1, skip_windows=True, fairscale_fully_sharded=True, special=True)
+@RunIf(min_gpus=1, skip_windows=True, fairscale_fully_sharded=True, standalone=True)
 def test_fsdp_gradient_clipping_raises(tmpdir):
     """Test to ensure that an exception is raised when clipping gradients by value with FSDP."""
     model = BoringModel()

@@ -33,7 +33,7 @@ def on_train_start(self) -> None:
         self.start_cuda_memory = torch.cuda.memory_allocated()
 
 
-@RunIf(skip_windows=True, min_gpus=2, special=True)
+@RunIf(skip_windows=True, min_gpus=2, standalone=True)
 def test_ddp_with_2_gpus():
     """Tests if device is set correctely when training and after teardown for DDPPlugin."""
     trainer = Trainer(gpus=2, strategy="ddp", fast_dev_run=True)
@@ -64,7 +64,7 @@ def on_train_start(self):
         self.trainer.training_type_plugin.barrier("barrier after model is wrapped")
 
 
-@RunIf(min_gpus=4, special=True)
+@RunIf(min_gpus=4, standalone=True)
 @mock.patch("torch.distributed.barrier")
 def test_ddp_barrier_non_consecutive_device_ids(barrier_mock, tmpdir):
     """Test correct usage of barriers when device ids do not start at 0 or are not consecutive."""

@@ -26,7 +26,7 @@
     import torch.distributed.algorithms.ddp_comm_hooks.post_localSGD_hook as post_localSGD
 
 
-@RunIf(skip_windows=True, min_torch="1.9.0", min_gpus=2, special=True)
+@RunIf(skip_windows=True, min_torch="1.9.0", min_gpus=2, standalone=True)
 def test_ddp_fp16_compress_comm_hook(tmpdir):
     """Test for DDP FP16 compress hook."""
     model = BoringModel()
@@ -46,7 +46,7 @@ def test_ddp_fp16_compress_comm_hook(tmpdir):
     assert trainer.state.finished, f"Training failed with {trainer.state}"
 
 
-@RunIf(skip_windows=True, min_torch="1.9.0", min_gpus=2, special=True)
+@RunIf(skip_windows=True, min_torch="1.9.0", min_gpus=2, standalone=True)
 def test_ddp_sgd_comm_hook(tmpdir):
     """Test for DDP FP16 compress hook."""
     model = BoringModel()
@@ -69,7 +69,7 @@ def test_ddp_sgd_comm_hook(tmpdir):
     assert trainer.state.finished, f"Training failed with {trainer.state}"
 
 
-@RunIf(skip_windows=True, min_torch="1.9.0", min_gpus=2, special=True)
+@RunIf(skip_windows=True, min_torch="1.9.0", min_gpus=2, standalone=True)
 def test_ddp_fp16_compress_wrap_sgd_comm_hook(tmpdir):
     """Test for DDP FP16 compress wrapper for SGD hook."""
     model = BoringModel()
@@ -93,7 +93,7 @@ def test_ddp_fp16_compress_wrap_sgd_comm_hook(tmpdir):
     assert trainer.state.finished, f"Training failed with {trainer.state}"
 
 
-@RunIf(skip_windows=True, min_torch="1.9.0", min_gpus=2, special=True)
+@RunIf(skip_windows=True, min_torch="1.9.0", min_gpus=2, standalone=True)
 def test_ddp_spawn_fp16_compress_comm_hook(tmpdir):
     """Test for DDP Spawn FP16 compress hook."""
     model = BoringModel()
@@ -110,7 +110,7 @@ def test_ddp_spawn_fp16_compress_comm_hook(tmpdir):
     assert trainer.state.finished, f"Training failed with {trainer.state}"
 
 
-@RunIf(skip_windows=True, min_torch="1.10.0", min_gpus=2, special=True)
+@RunIf(skip_windows=True, min_torch="1.10.0", min_gpus=2, standalone=True)
 def test_ddp_post_local_sgd_comm_hook(tmpdir):
     """Test for DDP post-localSGD hook."""
     model = BoringModel()