Fix load_from_checkpoint to return model on correct device (#17308)

ryan597 · pre-commit-ci[bot] · carmocca · Borda · commit c41de5b5df87 · 2023-04-24T17:44:02.000+02:00
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Carlos Mocholí <carlossmocholi@gmail.com> Co-authored-by: Jirka Borovec <6035284+Borda@users.noreply.github.com> (cherry picked from commit e1ce887)
diff --git a/src/pytorch_lightning/CHANGELOG.md b/src/pytorch_lightning/CHANGELOG.md
@@ -4,6 +4,19 @@ All notable changes to this project will be documented in this file.
 
 The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 
+
+## [UnReleased] - 2023-04-DD
+
+### Changed
+
+-
+
+
+### Fixed
+
+- Fixed issue where `Model.load_from_checkpoint("checkpoint.ckpt", map_location=map_location)` would always return model on CPU ([#17308](https://github.com/Lightning-AI/lightning/pull/17308))
+
+
 ## [1.9.5] - 2023-03-30
 
 ### Changed
diff --git a/src/pytorch_lightning/core/saving.py b/src/pytorch_lightning/core/saving.py
@@ -154,8 +154,6 @@ def _load_from_checkpoint(
     strict: Optional[bool] = None,
     **kwargs: Any,
 ) -> Union["pl.LightningModule", "pl.LightningDataModule"]:
-    if map_location is None:
-        map_location = cast(_MAP_LOCATION_TYPE, lambda storage, loc: storage)
     with pl_legacy_patch():
         checkpoint = pl_load(checkpoint_path, map_location=map_location)
 
@@ -185,7 +183,14 @@ def _load_from_checkpoint(
     if issubclass(cls, pl.LightningDataModule):
         return _load_state(cls, checkpoint, **kwargs)
     if issubclass(cls, pl.LightningModule):
-        return _load_state(cls, checkpoint, strict=strict, **kwargs)
+        storage = _load_state(cls, checkpoint, strict=strict, **kwargs)
+        state_dict = checkpoint["state_dict"]
+        if not state_dict:
+            raise ValueError(f"The state dict in {checkpoint_path!r} contains no parameters.")
+        map_location = list(state_dict.values())[0].device
+        assert isinstance(storage, pl.LightningModule)
+        return storage.to(map_location)
+
     raise NotImplementedError(f"Unsupported {cls}")
 
 
diff --git a/tests/tests_pytorch/core/test_saving.py b/tests/tests_pytorch/core/test_saving.py
@@ -0,0 +1,57 @@
+import pytest
+import torch
+
+import lightning.pytorch as pl
+from lightning.pytorch.callbacks import ModelCheckpoint
+from lightning.pytorch.demos.boring_classes import BoringModel
+from tests_pytorch.helpers.runif import RunIf
+
+
+def create_boring_checkpoint(tmp_path, model, accelerator="cuda"):
+    checkpoint_callback = ModelCheckpoint(dirpath=tmp_path, filename="checkpoint")
+    trainer = pl.Trainer(
+        devices=1,
+        accelerator=accelerator,
+        max_epochs=1,
+        enable_model_summary=False,
+        enable_progress_bar=False,
+        callbacks=[checkpoint_callback],
+    )
+    trainer.fit(model)
+
+
+@pytest.mark.parametrize(
+    "map_location", (None, "cpu", torch.device("cpu"), lambda storage, loc: storage, {"cpu": "cpu"})
+)
+def test_load_from_checkpoint_map_location_cpu(tmp_path, map_location):
+    create_boring_checkpoint(tmp_path, BoringModel(), accelerator="cpu")
+    model = BoringModel.load_from_checkpoint(f"{tmp_path}/checkpoint.ckpt", map_location=map_location)
+    assert model.device.type == "cpu"
+
+
+@RunIf(min_cuda_gpus=1)
+@pytest.mark.parametrize(
+    "map_location", (None, "cuda", torch.device("cuda"), lambda storage, loc: storage.cuda(), {"cpu": "cuda"})
+)
+def test_load_from_checkpoint_map_location_gpu(tmp_path, map_location):
+    create_boring_checkpoint(tmp_path, BoringModel(), accelerator="cuda")
+    model = BoringModel.load_from_checkpoint(f"{tmp_path}/checkpoint.ckpt", map_location=map_location)
+    assert model.device.type == "cuda"
+
+
+@RunIf(min_cuda_gpus=1)
+@pytest.mark.parametrize("map_location", ("cpu", torch.device("cpu"), lambda storage, loc: storage, {"cuda": "cpu"}))
+def test_load_from_checkpoint_map_location_gpu_to_cpu(tmp_path, map_location):
+    create_boring_checkpoint(tmp_path, BoringModel(), accelerator="cpu")
+    model = BoringModel.load_from_checkpoint(f"{tmp_path}/checkpoint.ckpt", map_location=map_location)
+    assert model.device.type == "cpu"
+
+
+@RunIf(min_cuda_gpus=1)
+@pytest.mark.parametrize(
+    "map_location", ("cuda", torch.device("cuda"), lambda storage, loc: storage.cuda(), {"cpu": "cuda"})
+)
+def test_load_from_checkpoint_map_location_cpu_to_gpu(tmp_path, map_location):
+    create_boring_checkpoint(tmp_path, BoringModel(), accelerator="cpu")
+    model = BoringModel.load_from_checkpoint(f"{tmp_path}/checkpoint.ckpt", map_location=map_location)
+    assert model.device.type == "cuda"
diff --git a/tests/tests_pytorch/strategies/test_ddp_fully_sharded_native.py b/tests/tests_pytorch/strategies/test_ddp_fully_sharded_native.py
@@ -139,7 +139,7 @@ def _assert_save_equality(trainer, ckpt_path, cls=TestFSDPModel):
 
         # Assert model parameters are identical after loading
         for ddp_param, shard_param in zip(model_state_dict.values(), saved_model.state_dict().values()):
-            assert torch.equal(ddp_param.float().cpu(), shard_param)
+            assert torch.equal(ddp_param, shard_param)
 
 
 @RunIf(min_torch="1.12")