Merge branch 'master' into fix/neptune-ddp

Lightning-AI · Dec 17, 2021 · 6c819c4 · 6c819c4
2 parents d31bd7b + 62f1e82
commit 6c819c4
Show file tree

Hide file tree

Showing 28 changed files with 348 additions and 381 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -120,12 +120,18 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 - Removed duplicated file extension when uploading model checkpoints with `NeptuneLogger` ([#11015](https://github.com/PyTorchLightning/pytorch-lightning/pull/11015))
 
 
+- Removed `__getstate__` and `__setstate__` of `RichProgressBar` ([#11100](https://github.com/PyTorchLightning/pytorch-lightning/pull/11100))
+
+
 - The `DDPPlugin` and `DDPSpawnPlugin` and their subclasses now remove the `SyncBatchNorm` wrappers in `teardown()` to enable proper support at inference after fitting ([#11078](https://github.com/PyTorchLightning/pytorch-lightning/pull/11078))
 
 
 - Moved ownership of the `Accelerator` instance to the `TrainingTypePlugin`; all training-type plugins now take an optional parameter `accelerator` ([#11022](https://github.com/PyTorchLightning/pytorch-lightning/pull/11022))
 
 
+- Marked the `ResultCollection`, `ResultMetric`, and `ResultMetricCollection` classes as protected ([#11130](https://github.com/PyTorchLightning/pytorch-lightning/pull/11130))
+
+
 - DeepSpeed does not require lightning module zero 3 partitioning ([#10655](https://github.com/PyTorchLightning/pytorch-lightning/pull/10655))
 
 
@@ -164,6 +170,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 - Deprecated `Trainer.run_stage` in favor of `Trainer.{fit,validate,test,predict}` ([#11000](https://github.com/PyTorchLightning/pytorch-lightning/pull/11000))
 
 
+- Deprecated `Trainer.verbose_evaluate` in favor of `EvaluationLoop(verbose=...)` ([#10931](https://github.com/PyTorchLightning/pytorch-lightning/pull/10931))
+
+
 - Deprecated `Trainer.should_rank_save_checkpoint` Trainer property ([#11068](https://github.com/PyTorchLightning/pytorch-lightning/pull/11068))
 
 ### Removed
@@ -284,10 +293,13 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 
 ### Fixed
 
--
+- Fixed `NeptuneLogger` when using DDP ([#11030](https://github.com/PyTorchLightning/pytorch-lightning/pull/11030))
 
 
-- Fixed `NeptuneLogger` when using DDP ([#11030](https://github.com/PyTorchLightning/pytorch-lightning/pull/11030))
+- Fixed security vulnerabilities CVE-2020-1747 and CVE-2020-14343 caused by the `PyYAML` dependency ([#11099](https://github.com/PyTorchLightning/pytorch-lightning/pull/11099))
+
+
+- Fixed a bug to disable logging hyperparameters in logger if there are no hparams ([#11105](https://github.com/PyTorchLightning/pytorch-lightning/issues/11105))
 
 
 - Avoid the deprecated `onnx.export(example_outputs=...)` in torch 1.10 ([#11116](https://github.com/PyTorchLightning/pytorch-lightning/pull/11116))
@@ -296,9 +308,14 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 - Fixed an issue when torch-scripting a `LightningModule` after training with `Trainer(sync_batchnorm=True)` ([#11078](https://github.com/PyTorchLightning/pytorch-lightning/pull/11078))
 
 
+- Fixed an `AttributeError` occuring when using a `CombinedLoader` (multiple dataloaders) for prediction ([#11111](https://github.com/PyTorchLightning/pytorch-lightning/pull/11111))
+
+
 - Fixed bug where `Trainer(track_grad_norm=..., logger=False)' would fail ([#11114](https://github.com/PyTorchLightning/pytorch-lightning/pull/11114))
 
 
+- Fixed double evaluation bug with fault-tolerance enabled where the second call was completely skipped ([#11119](https://github.com/PyTorchLightning/pytorch-lightning/pull/11119))
+
 ## [1.5.6] - 2021-12-15
 
 ### Fixed

diff --git a/pytorch_lightning/callbacks/progress/rich_progress.py b/pytorch_lightning/callbacks/progress/rich_progress.py
@@ -318,17 +318,6 @@ def on_validation_start(self, trainer, pl_module):
         super().on_validation_start(trainer, pl_module)
         self._init_progress(trainer)
 
-    def __getstate__(self):
-        # can't pickle the rich progress objects
-        state = self.__dict__.copy()
-        state["progress"] = None
-        state["_console"] = None
-        return state
-
-    def __setstate__(self, state):
-        self.__dict__ = state
-        self._console = Console(**self._console_kwargs)
-
     def on_sanity_check_start(self, trainer, pl_module):
         super().on_sanity_check_start(trainer, pl_module)
         self._init_progress(trainer)

diff --git a/pytorch_lightning/core/lightning.py b/pytorch_lightning/core/lightning.py
@@ -351,7 +351,7 @@ def log(
         results = self.trainer._results
         if results is None:
             raise MisconfigurationException(
-                "You are trying to `self.log()` but the loop `ResultCollection` is not registered"
+                "You are trying to `self.log()` but the loop's result collection is not registered"
                 " yet. This is most likely because you are trying to log in a `predict` hook,"
                 " but it doesn't support logging"
             )

diff --git a/pytorch_lightning/core/mixins/hparams_mixin.py b/pytorch_lightning/core/mixins/hparams_mixin.py
@@ -28,7 +28,7 @@ class HyperparametersMixin:
 
     def __init__(self) -> None:
         super().__init__()
-        self._log_hyperparams = True
+        self._log_hyperparams = False
 
     def save_hyperparameters(
         self,

diff --git a/pytorch_lightning/core/saving.py b/pytorch_lightning/core/saving.py
@@ -344,7 +344,7 @@ def load_hparams_from_yaml(config_yaml: str, use_omegaconf: bool = True) -> Dict
         return {}
 
     with fs.open(config_yaml, "r") as fp:
-        hparams = yaml.load(fp, Loader=yaml.UnsafeLoader)
+        hparams = yaml.full_load(fp)
 
     if _OMEGACONF_AVAILABLE:
         if use_omegaconf:

diff --git a/pytorch_lightning/loops/base.py b/pytorch_lightning/loops/base.py
@@ -19,7 +19,7 @@
 from torchmetrics import Metric
 
 import pytorch_lightning as pl
-from pytorch_lightning.trainer.connectors.logger_connector.result import ResultCollection
+from pytorch_lightning.trainer.connectors.logger_connector.result import _ResultCollection
 from pytorch_lightning.trainer.progress import BaseProgress
 from pytorch_lightning.utilities.enums import _FaultTolerantMode
 from pytorch_lightning.utilities.exceptions import MisconfigurationException
@@ -282,7 +282,7 @@ def state_dict(self, destination: Optional[Dict] = None, prefix: str = "") -> Di
                 destination[key] = v.state_dict()
             elif isinstance(v, Loop):
                 v.state_dict(destination, key + ".")
-            elif isinstance(v, ResultCollection):
+            elif isinstance(v, _ResultCollection):
                 # sync / unsync metrics
                 v.sync()
                 destination[key] = v.state_dict()
@@ -312,7 +312,7 @@ def _load_from_state_dict(self, state_dict: Dict, prefix: str, metrics: Optional
             if isinstance(v, BaseProgress):
                 v.load_state_dict(state_dict[key])
             elif (
-                isinstance(v, ResultCollection)
+                isinstance(v, _ResultCollection)
                 and self.trainer is not None
                 and self.trainer.lightning_module is not None
             ):
@@ -324,10 +324,10 @@ def _load_from_state_dict(self, state_dict: Dict, prefix: str, metrics: Optional
                 if metrics:
                     metric_attributes.update(metrics)
 
-                # The `ResultCollection` objects have 2 types of metrics: `Tensor` and `torchmetrics.Metric`.
+                # The `_ResultCollection` objects have 2 types of metrics: `Tensor` and `torchmetrics.Metric`.
                 # When creating a checkpoint, the `Metric`s are dropped from the loop `state_dict` to serialize only
                 # Python primitives. However, their states are saved with the model's `state_dict`.
-                # On reload, we need to re-attach the `Metric`s back to the `ResultCollection`.
+                # On reload, we need to re-attach the `Metric`s back to the `_ResultCollection`.
                 # The references are provided through the `metric_attributes` dictionary.
                 v.load_state_dict(
                     state_dict[key], metrics=metric_attributes, sync_fn=self.trainer.training_type_plugin.reduce
@@ -337,6 +337,4 @@ def _load_from_state_dict(self, state_dict: Dict, prefix: str, metrics: Optional
                     v.reset(metrics=False)
 
         self.on_load_checkpoint(state_dict[prefix + "state_dict"])
-
-        if _FaultTolerantMode.detect_current_mode().is_enabled:
-            self.restarting = True
+        self.restarting = True
diff --git a/pytorch_lightning/loops/dataloader/evaluation_loop.py b/pytorch_lightning/loops/dataloader/evaluation_loop.py
@@ -19,19 +19,20 @@
 
 from pytorch_lightning.loops.dataloader import DataLoaderLoop
 from pytorch_lightning.loops.epoch import EvaluationEpochLoop
-from pytorch_lightning.trainer.connectors.logger_connector.result import _OUT_DICT, ResultCollection
+from pytorch_lightning.trainer.connectors.logger_connector.result import _OUT_DICT, _ResultCollection
 from pytorch_lightning.trainer.states import RunningStage, TrainerFn
 from pytorch_lightning.utilities.types import EPOCH_OUTPUT
 
 
 class EvaluationLoop(DataLoaderLoop):
     """Loops over all dataloaders for evaluation."""
 
-    def __init__(self) -> None:
+    def __init__(self, verbose: bool = True) -> None:
         super().__init__()
         self.epoch_loop = EvaluationEpochLoop()
+        self.verbose = verbose
 
-        self._results = ResultCollection(training=False)
+        self._results = _ResultCollection(training=False)
         self._outputs: List[EPOCH_OUTPUT] = []
         self._logged_outputs: List[_OUT_DICT] = []
         self._max_batches: List[int] = []
@@ -84,6 +85,10 @@ def reset(self) -> None:
             self._max_batches = [self._max_batches] * len(self.dataloaders)
 
         super().reset()
+        # when restarting, if we are running `validate` or `test` twice, since there's no concept of `max_epochs` we
+        # need to reset the current state when the loop has finished running
+        if self.done and self.trainer.state.fn != TrainerFn.FITTING:
+            self.dataloader_progress.reset_on_run()
 
     def on_skip(self) -> List:
         return []
@@ -156,13 +161,7 @@ def on_run_end(self) -> List[_OUT_DICT]:
         # enable train mode again
         self._on_evaluation_model_train()
 
-        if (
-            self.trainer.state.fn not in (TrainerFn.FITTING, TrainerFn.TUNING)
-            and not self.trainer.sanity_checking
-            and self.trainer.is_global_zero
-            # TODO: this should be defined in this loop, not the Trainer
-            and self.trainer.verbose_evaluate
-        ):
+        if self.verbose and self.trainer.is_global_zero:
             assert self.trainer.state.stage is not None
             self._print_results(logged_outputs, self.trainer.state.stage)
 

diff --git a/pytorch_lightning/loops/dataloader/prediction_loop.py b/pytorch_lightning/loops/dataloader/prediction_loop.py
@@ -70,10 +70,15 @@ def connect(self, epoch_loop: PredictionEpochLoop) -> None:  # type: ignore[over
 
     def reset(self) -> None:
         """Resets the internal state of the loop for a new run."""
-        super().reset()
         self.predictions = []
         self.epoch_batch_indices = []
 
+        super().reset()
+        # when restarting, if we are running twice, since there's no concept of `max_epochs` we need to reset the
+        # current state when the loop has finished running
+        if self.done:
+            self.dataloader_progress.reset_on_run()
+
     def on_run_start(self) -> None:  # type: ignore[override]
         """Calls ``_on_predict_start`` hook."""
         self._on_predict_start()

diff --git a/pytorch_lightning/loops/epoch/evaluation_epoch_loop.py b/pytorch_lightning/loops/epoch/evaluation_epoch_loop.py
@@ -22,6 +22,7 @@
 from pytorch_lightning.loops.base import Loop
 from pytorch_lightning.loops.utilities import _update_dataloader_iter
 from pytorch_lightning.trainer.progress import BatchProgress
+from pytorch_lightning.trainer.states import TrainerFn
 from pytorch_lightning.trainer.supporters import CombinedLoader
 from pytorch_lightning.utilities.auto_restart import (
     _collect_states_on_rank_zero_over_collection,
@@ -67,6 +68,10 @@ def reset(self) -> None:
             self.batch_progress.reset_on_run()
         else:
             self.batch_progress.reset_on_restart()
+        # when restarting, if we are running `validate` or `test` twice, since there's no concept of `max_epochs` we
+        # need to reset the current state when the loop has finished running
+        if self.done and self.trainer.state.fn != TrainerFn.FITTING:
+            self.batch_progress.reset_on_run()
 
     def on_run_start(  # type: ignore[override]
         self, data_fetcher: AbstractDataFetcher, dataloader_idx: Optional[int], dl_max_batches: int

diff --git a/pytorch_lightning/loops/epoch/prediction_epoch_loop.py b/pytorch_lightning/loops/epoch/prediction_epoch_loop.py
@@ -162,7 +162,8 @@ def _build_kwargs(self, batch: Any, batch_idx: int, dataloader_idx: int) -> Dict
     def _get_batch_indices(self, dataloader_idx: int) -> List[List[int]]:
         """Returns a reference to the seen batch indices if the dataloader has a batch sampler wrapped by our
         :class:`~pytorch_lightning.overrides.distributed.IndexBatchSamplerWrapper`."""
-        batch_sampler = self.trainer.predict_dataloaders[dataloader_idx].batch_sampler
+        # the batch_sampler is not be defined in case of CombinedDataLoaders
+        batch_sampler = getattr(self.trainer.predict_dataloaders[dataloader_idx], "batch_sampler", None)
         if isinstance(batch_sampler, IndexBatchSamplerWrapper) and self.should_store_predictions:
             return batch_sampler.seen_batch_indices
 

diff --git a/pytorch_lightning/loops/epoch/training_epoch_loop.py b/pytorch_lightning/loops/epoch/training_epoch_loop.py
@@ -21,7 +21,7 @@
 from pytorch_lightning.loops.batch import TrainingBatchLoop
 from pytorch_lightning.loops.batch.training_batch_loop import _OUTPUTS_TYPE as _BATCH_OUTPUTS_TYPE
 from pytorch_lightning.loops.utilities import _get_active_optimizers, _is_max_limit_reached, _update_dataloader_iter
-from pytorch_lightning.trainer.connectors.logger_connector.result import ResultCollection
+from pytorch_lightning.trainer.connectors.logger_connector.result import _ResultCollection
 from pytorch_lightning.trainer.progress import BatchProgress, SchedulerProgress
 from pytorch_lightning.utilities import rank_zero_warn
 from pytorch_lightning.utilities.apply_func import apply_to_collection
@@ -63,9 +63,9 @@ def __init__(self, min_steps: Optional[int] = None, max_steps: int = -1) -> None
         self.scheduler_progress = SchedulerProgress()
 
         self.batch_loop = TrainingBatchLoop()
-        self.val_loop = loops.EvaluationLoop()
+        self.val_loop = loops.EvaluationLoop(verbose=False)
 
-        self._results = ResultCollection(training=True)
+        self._results = _ResultCollection(training=True)
         self._outputs: _OUTPUTS_TYPE = []
         self._warning_cache = WarningCache()
         self._dataloader_iter: Optional[Iterator] = None

diff --git a/pytorch_lightning/loops/fit_loop.py b/pytorch_lightning/loops/fit_loop.py
@@ -17,7 +17,7 @@
 from pytorch_lightning.loops import Loop
 from pytorch_lightning.loops.epoch import TrainingEpochLoop
 from pytorch_lightning.loops.utilities import _is_max_limit_reached
-from pytorch_lightning.trainer.connectors.logger_connector.result import ResultCollection
+from pytorch_lightning.trainer.connectors.logger_connector.result import _ResultCollection
 from pytorch_lightning.trainer.progress import Progress
 from pytorch_lightning.trainer.supporters import TensorRunningAccum
 from pytorch_lightning.utilities import rank_zero_deprecation
@@ -136,7 +136,7 @@ def _skip_backward(self, value: bool) -> None:
         self.epoch_loop.batch_loop.optimizer_loop._skip_backward = value
 
     @property
-    def _results(self) -> ResultCollection:
+    def _results(self) -> _ResultCollection:
         if self.trainer.training:
             return self.epoch_loop._results
         if self.trainer.validating: