diff --git a/CHANGELOG.md b/CHANGELOG.md index b0ebb9a37aa4c..ee523731092b7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). ### Fixed +- Fixed `NeptuneLogger` when using DDP ([#11030](https://github.com/PyTorchLightning/pytorch-lightning/pull/11030)) - Fixed a bug to disable logging hyperparameters in logger if there are no hparams ([#11105](https://github.com/PyTorchLightning/pytorch-lightning/issues/11105)) - Fixed an issue when torch-scripting a `LightningModule` after training with `Trainer(sync_batchnorm=True)` ([#11078](https://github.com/PyTorchLightning/pytorch-lightning/pull/11078)) - Fixed an `AttributeError` occuring when using a `CombinedLoader` (multiple dataloaders) for prediction ([#11111](https://github.com/PyTorchLightning/pytorch-lightning/pull/11111)) diff --git a/environment.yml b/environment.yml index d7d34c387af15..7e3c84e913f9e 100644 --- a/environment.yml +++ b/environment.yml @@ -51,4 +51,4 @@ dependencies: - mlflow>=1.0.0 - comet_ml>=3.1.12 - wandb>=0.8.21 - - neptune-client>=0.4.109 + - neptune-client>=0.10.0 diff --git a/pytorch_lightning/loggers/neptune.py b/pytorch_lightning/loggers/neptune.py index 97876afe65368..2e7031ff46c4f 100644 --- a/pytorch_lightning/loggers/neptune.py +++ b/pytorch_lightning/loggers/neptune.py @@ -44,7 +44,7 @@ from neptune.new.types import File as NeptuneFile except ModuleNotFoundError: import neptune - from neptune.exceptions import NeptuneLegacyProjectException + from neptune.exceptions import NeptuneLegacyProjectException, NeptuneOfflineModeFetchException from neptune.run import Run from neptune.types import File as NeptuneFile else: @@ -266,51 +266,64 @@ def __init__( prefix: str = "training", **neptune_run_kwargs, ): - # verify if user passed proper init arguments self._verify_input_arguments(api_key, project, name, run, neptune_run_kwargs) + if neptune is None: + raise ModuleNotFoundError( + "You want to use the `Neptune` logger which is not installed yet, install it with" + " `pip install neptune-client`." + ) super().__init__() self._log_model_checkpoints = log_model_checkpoints self._prefix = prefix + self._run_name = name + self._project_name = project + self._api_key = api_key + self._run_instance = run + self._neptune_run_kwargs = neptune_run_kwargs + self._run_short_id = None - self._run_instance = self._init_run_instance(api_key, project, name, run, neptune_run_kwargs) + if self._run_instance is not None: + self._retrieve_run_data() - self._run_short_id = self.run._short_id # skipcq: PYL-W0212 + # make sure that we've log integration version for outside `Run` instances + self._run_instance[_INTEGRATION_VERSION_KEY] = __version__ + + def _retrieve_run_data(self): try: - self.run.wait() + self._run_instance.wait() + self._run_short_id = self.run._short_id # skipcq: PYL-W0212 self._run_name = self._run_instance["sys/name"].fetch() except NeptuneOfflineModeFetchException: self._run_name = "offline-name" - def _init_run_instance(self, api_key, project, name, run, neptune_run_kwargs) -> Run: - if run is not None: - run_instance = run - else: - try: - run_instance = neptune.init( - project=project, - api_token=api_key, - name=name, - **neptune_run_kwargs, - ) - except NeptuneLegacyProjectException as e: - raise TypeError( - f"""Project {project} has not been migrated to the new structure. - You can still integrate it with the Neptune logger using legacy Python API - available as part of neptune-contrib package: - - https://docs-legacy.neptune.ai/integrations/pytorch_lightning.html\n - """ - ) from e - - # make sure that we've log integration version for both newly created and outside `Run` instances - run_instance[_INTEGRATION_VERSION_KEY] = __version__ - - # keep api_key and project, they will be required when resuming Run for pickled logger - self._api_key = api_key - self._project_name = run_instance._project_name # skipcq: PYL-W0212 + @property + def _neptune_init_args(self): + args = {} + # Backward compatibility in case of previous version retrieval + try: + args = self._neptune_run_kwargs + except AttributeError: + pass + + if self._project_name is not None: + args["project"] = self._project_name + + if self._api_key is not None: + args["api_token"] = self._api_key - return run_instance + if self._run_short_id is not None: + args["run"] = self._run_short_id + + # Backward compatibility in case of previous version retrieval + try: + if self._run_name is not None: + args["name"] = self._run_name + except AttributeError: + pass + + return args def _construct_path_with_prefix(self, *keys) -> str: """Return sequence of keys joined by `LOGGER_JOIN_CHAR`, started with `_prefix` if defined.""" @@ -379,7 +392,7 @@ def __getstate__(self): def __setstate__(self, state): self.__dict__ = state - self._run_instance = neptune.init(project=self._project_name, api_token=self._api_key, run=self._run_short_id) + self._run_instance = neptune.init(**self._neptune_init_args) @property @rank_zero_experiment @@ -412,8 +425,23 @@ def training_step(self, batch, batch_idx): return self.run @property + @rank_zero_experiment def run(self) -> Run: - return self._run_instance + try: + if not self._run_instance: + self._run_instance = neptune.init(**self._neptune_init_args) + self._retrieve_run_data() + # make sure that we've log integration version for newly created + self._run_instance[_INTEGRATION_VERSION_KEY] = __version__ + + return self._run_instance + except NeptuneLegacyProjectException as e: + raise TypeError( + f"Project {self._project_name} has not been migrated to the new structure." + " You can still integrate it with the Neptune logger using legacy Python API" + " available as part of neptune-contrib package:" + " https://docs-legacy.neptune.ai/integrations/pytorch_lightning.html\n" + ) from e @rank_zero_only def log_hyperparams(self, params: Union[Dict[str, Any], Namespace]) -> None: # skipcq: PYL-W0221 @@ -473,13 +501,13 @@ def log_metrics(self, metrics: Dict[str, Union[torch.Tensor, float]], step: Opti for key, val in metrics.items(): # `step` is ignored because Neptune expects strictly increasing step values which - # Lighting does not always guarantee. - self.experiment[key].log(val) + # Lightning does not always guarantee. + self.run[key].log(val) @rank_zero_only def finalize(self, status: str) -> None: if status: - self.experiment[self._construct_path_with_prefix("status")] = status + self.run[self._construct_path_with_prefix("status")] = status super().finalize(status) @@ -493,12 +521,14 @@ def save_dir(self) -> Optional[str]: """ return os.path.join(os.getcwd(), ".neptune") + @rank_zero_only def log_model_summary(self, model, max_depth=-1): model_str = str(ModelSummary(model=model, max_depth=max_depth)) - self.experiment[self._construct_path_with_prefix("model/summary")] = neptune.types.File.from_content( + self.run[self._construct_path_with_prefix("model/summary")] = neptune.types.File.from_content( content=model_str, extension="txt" ) + @rank_zero_only def after_save_checkpoint(self, checkpoint_callback: "ReferenceType[ModelCheckpoint]") -> None: """Automatically log checkpointed model. Called after model checkpoint callback saves a new checkpoint. @@ -515,35 +545,33 @@ def after_save_checkpoint(self, checkpoint_callback: "ReferenceType[ModelCheckpo if checkpoint_callback.last_model_path: model_last_name = self._get_full_model_name(checkpoint_callback.last_model_path, checkpoint_callback) file_names.add(model_last_name) - self.experiment[f"{checkpoints_namespace}/{model_last_name}"].upload(checkpoint_callback.last_model_path) + self.run[f"{checkpoints_namespace}/{model_last_name}"].upload(checkpoint_callback.last_model_path) # save best k models for key in checkpoint_callback.best_k_models.keys(): model_name = self._get_full_model_name(key, checkpoint_callback) file_names.add(model_name) - self.experiment[f"{checkpoints_namespace}/{model_name}"].upload(key) + self.run[f"{checkpoints_namespace}/{model_name}"].upload(key) # log best model path and checkpoint if checkpoint_callback.best_model_path: - self.experiment[ - self._construct_path_with_prefix("model/best_model_path") - ] = checkpoint_callback.best_model_path + self.run[self._construct_path_with_prefix("model/best_model_path")] = checkpoint_callback.best_model_path model_name = self._get_full_model_name(checkpoint_callback.best_model_path, checkpoint_callback) file_names.add(model_name) - self.experiment[f"{checkpoints_namespace}/{model_name}"].upload(checkpoint_callback.best_model_path) + self.run[f"{checkpoints_namespace}/{model_name}"].upload(checkpoint_callback.best_model_path) # remove old models logged to experiment if they are not part of best k models at this point - if self.experiment.exists(checkpoints_namespace): - exp_structure = self.experiment.get_structure() + if self.run.exists(checkpoints_namespace): + exp_structure = self.run.get_structure() uploaded_model_names = self._get_full_model_names_from_exp_structure(exp_structure, checkpoints_namespace) for file_to_drop in list(uploaded_model_names - file_names): - del self.experiment[f"{checkpoints_namespace}/{file_to_drop}"] + del self.run[f"{checkpoints_namespace}/{file_to_drop}"] # log best model score if checkpoint_callback.best_model_score: - self.experiment[self._construct_path_with_prefix("model/best_model_score")] = ( + self.run[self._construct_path_with_prefix("model/best_model_score")] = ( checkpoint_callback.best_model_score.cpu().detach().numpy() ) @@ -637,13 +665,11 @@ def log_artifact(self, artifact: str, destination: Optional[str] = None) -> None self._signal_deprecated_api_usage("log_artifact", f"logger.run['{key}].log('path_to_file')") self.run[key].log(destination) - @rank_zero_only def set_property(self, *args, **kwargs): self._signal_deprecated_api_usage( "log_artifact", f"logger.run['{self._prefix}/{self.PARAMETERS_KEY}/key'].log(value)", raise_exception=True ) - @rank_zero_only def append_tags(self, *args, **kwargs): self._signal_deprecated_api_usage( "append_tags", "logger.run['sys/tags'].add(['foo', 'bar'])", raise_exception=True diff --git a/tests/loggers/test_all.py b/tests/loggers/test_all.py index 3bedd26621238..b12f7c8286c62 100644 --- a/tests/loggers/test_all.py +++ b/tests/loggers/test_all.py @@ -47,6 +47,8 @@ def _get_logger_args(logger_class, save_dir): logger_args.update(offline_mode=True) if "offline" in inspect.getfullargspec(logger_class).args: logger_args.update(offline=True) + if issubclass(logger_class, NeptuneLogger): + logger_args.update(mode="offline") return logger_args @@ -320,7 +322,9 @@ def on_train_batch_start(self, trainer, pl_module, batch, batch_idx): @RunIf(skip_windows=True, skip_49370=True, skip_hanging_spawn=True) -@pytest.mark.parametrize("logger_class", [CometLogger, CSVLogger, MLFlowLogger, TensorBoardLogger, TestTubeLogger]) +@pytest.mark.parametrize( + "logger_class", [CometLogger, CSVLogger, MLFlowLogger, NeptuneLogger, TensorBoardLogger, TestTubeLogger] +) def test_logger_created_on_rank_zero_only(tmpdir, monkeypatch, logger_class): """Test that loggers get replaced by dummy loggers on global rank > 0.""" _patch_comet_atexit(monkeypatch) diff --git a/tests/loggers/test_neptune.py b/tests/loggers/test_neptune.py index cb7ef9c5156cc..ddea7b2419608 100644 --- a/tests/loggers/test_neptune.py +++ b/tests/loggers/test_neptune.py @@ -77,7 +77,7 @@ def tmpdir_unittest_fixture(request, tmpdir): class TestNeptuneLogger(unittest.TestCase): def test_neptune_online(self, neptune): logger = NeptuneLogger(api_key="test", project="project") - created_run_mock = logger._run_instance + created_run_mock = logger.run self.assertEqual(logger._run_instance, created_run_mock) self.assertEqual(logger.name, "Run test name") @@ -109,7 +109,7 @@ def test_neptune_pickling(self, neptune): pickled_logger = pickle.dumps(logger) unpickled = pickle.loads(pickled_logger) - neptune.init.assert_called_once_with(project="test-project", api_token=None, run="TEST-42") + neptune.init.assert_called_once_with(name="Test name", run=unpickleable_run._short_id) self.assertIsNotNone(unpickled.experiment) @patch("pytorch_lightning.loggers.neptune.Run", Run) @@ -360,7 +360,7 @@ def test_legacy_functions(self, neptune, neptune_file_mock, warnings_mock): logger = NeptuneLogger(api_key="test", project="project") # test deprecated functions which will be shut down in pytorch-lightning 1.7.0 - attr_mock = logger._run_instance.__getitem__ + attr_mock = logger.run.__getitem__ attr_mock.reset_mock() fake_image = {}