[Doc] Minor fixes to comments and docstrings (#2443)

thomasbbrunner · web-flow · commit c94d07f3b614 · 2024-09-30T14:28:38.000+01:00
diff --git a/sota-implementations/impala/utils.py b/sota-implementations/impala/utils.py
@@ -100,7 +100,7 @@ def make_ppo_modules_pixels(proof_environment):
         out_keys=["common_features"],
     )
 
-    # Define on head for the policy
+    # Define one head for the policy
     policy_net = MLP(
         in_features=common_mlp_output.shape[-1],
         out_features=num_outputs,
diff --git a/torchrl/envs/common.py b/torchrl/envs/common.py
@@ -2136,9 +2136,9 @@ def reset(
             self._assert_tensordict_shape(tensordict)
 
         tensordict_reset = self._reset(tensordict, **kwargs)
-        #        We assume that this is done properly
-        #        if reset.device != self.device:
-        #            reset = reset.to(self.device, non_blocking=True)
+        # We assume that this is done properly
+        # if reset.device != self.device:
+        #     reset = reset.to(self.device, non_blocking=True)
         if tensordict_reset is tensordict:
             raise RuntimeError(
                 "EnvBase._reset should return outplace changes to the input "
diff --git a/torchrl/envs/libs/gym.py b/torchrl/envs/libs/gym.py
@@ -1281,7 +1281,7 @@ class GymEnv(GymWrapper):
         pixels_only (bool, optional): if ``True``, only the pixel observations will
             be returned (by default under the ``"pixels"`` entry in the output tensordict).
             If ``False``, observations (eg, states) and pixels will be returned
-            whenever ``from_pixels=True``. Defaults to ``True``.
+            whenever ``from_pixels=True``. Defaults to ``False``.
         frame_skip (int, optional): if provided, indicates for how many steps the
             same action is to be repeated. The observation returned will be the
             last observation of the sequence, whereas the reward will be the sum
diff --git a/torchrl/envs/utils.py b/torchrl/envs/utils.py
@@ -69,13 +69,13 @@
 
 
 ACTION_MASK_ERROR = RuntimeError(
-    "An out-of-bounds actions has been provided to an env with an 'action_mask' output."
-    " If you are using a custom policy, make sure to take the action mask into account when computing the output."
-    " If you are using a default policy, please add the torchrl.envs.transforms.ActionMask transform to your environment."
+    "An out-of-bounds actions has been provided to an env with an 'action_mask' output. "
+    "If you are using a custom policy, make sure to take the action mask into account when computing the output. "
+    "If you are using a default policy, please add the torchrl.envs.transforms.ActionMask transform to your environment. "
     "If you are using a ParallelEnv or another batched inventor, "
-    "make sure to add the transform to the ParallelEnv (and not to the sub-environments)."
-    " For more info on using action masks, see the docs at: "
-    "https://pytorch.org/rl/reference/envs.html#environments-with-masked-actions"
+    "make sure to add the transform to the ParallelEnv (and not to the sub-environments). "
+    "For more info on using action masks, see the docs at: "
+    "https://pytorch.org/rl/main/reference/envs.html#environments-with-masked-actions"
 )
 
 
diff --git a/torchrl/modules/distributions/continuous.py b/torchrl/modules/distributions/continuous.py
@@ -374,8 +374,8 @@ class TanhNormal(FasterTransformedDistribution):
             .. math::
                 loc = tanh(loc / upscale) * upscale.
 
-        min (torch.Tensor or number, optional): minimum value of the distribution. Default is -1.0;
-        max (torch.Tensor or number, optional): maximum value of the distribution. Default is 1.0;
+        low (torch.Tensor or number, optional): minimum value of the distribution. Default is -1.0;
+        high (torch.Tensor or number, optional): maximum value of the distribution. Default is 1.0;
         event_dims (int, optional): number of dimensions describing the action.
             Default is 1. Setting ``event_dims`` to ``0`` will result in a log-probability that has the same shape
             as the input, ``1`` will reduce (sum over) the last dimension, ``2`` the last two etc.
diff --git a/torchrl/objectives/value/functional.py b/torchrl/objectives/value/functional.py
@@ -230,7 +230,7 @@ def _fast_vec_gae(
     ``[*Batch x TimeSteps x F]``, with ``F`` feature dimensions.
 
     """
-    # _gen_num_per_traj and _split_and_pad_sequence need
+    # _get_num_per_traj and _split_and_pad_sequence need
     # time dimension at last position
     done = done.transpose(-2, -1)
     terminated = terminated.transpose(-2, -1)
diff --git a/tutorials/sphinx-tutorials/pendulum.py b/tutorials/sphinx-tutorials/pendulum.py
@@ -128,7 +128,7 @@
 # * :meth:`EnvBase._reset`, which codes for the resetting of the simulator
 #   at a (potentially random) initial state;
 # * :meth:`EnvBase._step` which codes for the state transition dynamic;
-# * :meth:`EnvBase._set_seed`` which implements the seeding mechanism;
+# * :meth:`EnvBase._set_seed` which implements the seeding mechanism;
 # * the environment specs.
 #
 # Let us first describe the problem at hand: we would like to model a simple
diff --git a/tutorials/sphinx-tutorials/torchrl_envs.py b/tutorials/sphinx-tutorials/torchrl_envs.py
@@ -608,7 +608,7 @@ def env_make(env_name):
 ###############################################################################
 # Transforming parallel environments
 # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-# There are two equivalent ways of transforming parallen environments: in each
+# There are two equivalent ways of transforming parallel environments: in each
 # process separately, or on the main process. It is even possible to do both.
 # One can therefore think carefully about the transform design to leverage the
 # device capabilities (e.g. transforms on cuda devices) and vectorizing

Original file line number	Diff line number	Diff line change
`@@ -100,7 +100,7 @@ def make_ppo_modules_pixels(proof_environment):`
`100`	`100`	`out_keys=["common_features"],`
`101`	`101`	`)`
`102`	`102`
`103`		`- # Define on head for the policy`
	`103`	`+ # Define one head for the policy`
`104`	`104`	`policy_net = MLP(`
`105`	`105`	`in_features=common_mlp_output.shape[-1],`
`106`	`106`	`out_features=num_outputs,`
Original file line number	Diff line number	Diff line change
`@@ -128,7 +128,7 @@`
`128`	`128`	# * :meth:`EnvBase._reset`, which codes for the resetting of the simulator
`129`	`129`	`# at a (potentially random) initial state;`
`130`	`130`	# * :meth:`EnvBase._step` which codes for the state transition dynamic;
`131`		-# * :meth:`EnvBase._set_seed`` which implements the seeding mechanism;
	`131`	+# * :meth:`EnvBase._set_seed` which implements the seeding mechanism;
`132`	`132`	`# * the environment specs.`
`133`	`133`	`#`
`134`	`134`	`# Let us first describe the problem at hand: we would like to model a simple`