Lightning-AI · tchaton · Nov 15, 2021 · Nov 11, 2021 · Nov 11, 2021 · Nov 12, 2021
@@ -121,6 +121,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 - Fixed `CombinedLoader` and `max_size_cycle` didn't receive a `DistributedSampler` ([#10374](https://github.com/PyTorchLightning/pytorch-lightning/issues/10374))
 
 
+- Fix `isinstance` not working with `init_meta_context`, materialize model not being moved to the device ([#10493](https://github.com/PyTorchLightning/metrics/pull/10493))
+
+
 -
 
 

@@ -17,6 +17,8 @@
 import torch
 from torch.nn import Module
 
+import pytorch_lightning as pl
+
 
 class DeviceDtypeModuleMixin(Module):
     __jit_unused_properties__ = ["device", "dtype"]
@@ -177,7 +179,9 @@ def __update_properties(
         self, device: Optional[torch.device] = None, dtype: Optional[Union[str, torch.dtype]] = None
     ) -> None:
         def apply_fn(module: Union["DeviceDtypeModuleMixin", Module]) -> None:
-            if not isinstance(module, DeviceDtypeModuleMixin):
+            # TODO: Find why `isinstance(module, DeviceDtypeModuleMixin)` doesn't
+            # work when using `init_meta_device`.
+            if not isinstance(module, (DeviceDtypeModuleMixin, pl.LightningModule)):
                 return
             if device is not None:
                 module._device = device

@@ -38,7 +38,14 @@
 from pytorch_lightning.loops import PredictionLoop, TrainingBatchLoop, TrainingEpochLoop
 from pytorch_lightning.loops.dataloader.evaluation_loop import EvaluationLoop
 from pytorch_lightning.loops.fit_loop import FitLoop
-from pytorch_lightning.plugins import DDPSpawnPlugin, ParallelPlugin, PLUGIN_INPUT, PrecisionPlugin, TrainingTypePlugin
+from pytorch_lightning.plugins import (
+    DDPSpawnPlugin,
+    ParallelPlugin,
+    PLUGIN_INPUT,
+    PrecisionPlugin,
+    TPUSpawnPlugin,
+    TrainingTypePlugin,
+)
 from pytorch_lightning.profiler import (
     AdvancedProfiler,
     BaseProfiler,
@@ -1404,10 +1411,23 @@ def _call_setup_hook(self) -> None:
 
     def _call_configure_sharded_model(self) -> None:
         with self.accelerator.model_sharded_context():
-            materialize_module(self.lightning_module)
+            self._handle_meta_model()
             self.call_hook("configure_sharded_model")
             self.call_hook("on_configure_sharded_model")
 
+    def _handle_meta_model(self) -> None:
+        param = next(self.lightning_module.parameters())
+        if param.device.type != "meta":
+            return
+
+        if isinstance(self.training_type_plugin, (DDPSpawnPlugin, TPUSpawnPlugin)):
+            raise MisconfigurationException("LightningModule on meta device isn't supported with spawn.")
+
+        materialize_module(self.lightning_module)
+        self.lightning_module.trainer = proxy(self)
+        # TODO: Find a better place to move the newly materialized model to the device
+        self.training_type_plugin.model_to_device()
+
     def _call_teardown_hook(self) -> None:
         fn = self.state.fn._setup_fn
 

@@ -25,6 +25,7 @@
 from torch.nn import Module
 from torch.nn.modules.container import ModuleDict, ModuleList, Sequential
 
+import pytorch_lightning as pl
 from pytorch_lightning.utilities import rank_zero_warn
 from pytorch_lightning.utilities.exceptions import MisconfigurationException
 from pytorch_lightning.utilities.imports import _TORCH_GREATER_EQUAL_1_10
@@ -191,7 +192,6 @@ def materialize_module(root_module: nn.Module) -> nn.Module:
 
 # cache subclasses to optimize the search when resetting the meta device later on.
 __STORAGE_META__ = {}
-
 __CREATED_MODULES__ = set()
 
 
@@ -237,7 +237,7 @@ def _set_meta_device() -> None:
 
     for subclass in get_all_subclasses(torch.nn.modules.module.Module):
 
-        if isinstance(subclass, (Sequential, ModuleList, ModuleDict)):
+        if subclass in (Sequential, ModuleList, ModuleDict, pl.LightningModule):
             continue
 
         # if a subclass has already been stored, we should use the cache
@@ -267,8 +267,10 @@ def materialize(cls, materialize_fn: Callable):
 
             @staticmethod
             def add_subclasses(subclass):
-                """This is used to unrol the instantion tree while creating the modules."""
-                __CREATED_MODULES__.add(subclass)
+                """This is used to unroll the instantion tree while creating the modules."""
+                # Don't store the LightningModule as skiped from the Meta process.
+                if subclass != pl.LightningModule:
+                    __CREATED_MODULES__.add(subclass)
                 if subclass.__bases__[0] != torch.nn.modules.module.Module:
                     _MetaClass.add_subclasses(subclass.__bases__[0])
 
@@ -312,12 +314,20 @@ def search(mod: ModuleType) -> List[ModuleType]:
             setattr(mod, subclass.__name__, _MetaClass)
 
 
+def mock_isinstance(A, B, isinstance=None):
+    if isinstance(B, type) and "_MetaClass" in B.__name__:
+        return isinstance(A, B.__bases__[0])
+    return isinstance(A, B)
+
+
 @contextmanager
 def init_meta_context() -> Generator:
     rank_zero_warn(
         "Be aware this feature is highly experimental and there are a number of weird edge cases "
         "where it can internal assert and/or crash. A more stable version is to be expected from PyTorch 1.11."
     )
     _set_meta_device()
+    __builtins__["isinstance"] = partial(mock_isinstance, isinstance=isinstance)
     yield
+    __builtins__["isinstance"] = isinstance.keywords["isinstance"]
     _unset_meta_device()
@@ -36,6 +36,7 @@ def test_init_meta_context():
 
     with init_meta_context():
         m = nn.Linear(in_features=1, out_features=1)
+        assert isinstance(m, nn.Linear)
         assert m.weight.device.type == "meta"
         mlp = MLP(4)
         assert mlp.layer[0].weight.device.type == "meta"