File tree 8 files changed +16
-16
lines changed
sota-implementations/impala
tutorials/sphinx-tutorials 8 files changed +16
-16
lines changed Original file line number Diff line number Diff line change @@ -100,7 +100,7 @@ def make_ppo_modules_pixels(proof_environment):
100
100
out_keys = ["common_features" ],
101
101
)
102
102
103
- # Define on head for the policy
103
+ # Define one head for the policy
104
104
policy_net = MLP (
105
105
in_features = common_mlp_output .shape [- 1 ],
106
106
out_features = num_outputs ,
Original file line number Diff line number Diff line change @@ -2136,9 +2136,9 @@ def reset(
2136
2136
self ._assert_tensordict_shape (tensordict )
2137
2137
2138
2138
tensordict_reset = self ._reset (tensordict , ** kwargs )
2139
- # We assume that this is done properly
2140
- # if reset.device != self.device:
2141
- # reset = reset.to(self.device, non_blocking=True)
2139
+ # We assume that this is done properly
2140
+ # if reset.device != self.device:
2141
+ # reset = reset.to(self.device, non_blocking=True)
2142
2142
if tensordict_reset is tensordict :
2143
2143
raise RuntimeError (
2144
2144
"EnvBase._reset should return outplace changes to the input "
Original file line number Diff line number Diff line change @@ -1281,7 +1281,7 @@ class GymEnv(GymWrapper):
1281
1281
pixels_only (bool, optional): if ``True``, only the pixel observations will
1282
1282
be returned (by default under the ``"pixels"`` entry in the output tensordict).
1283
1283
If ``False``, observations (eg, states) and pixels will be returned
1284
- whenever ``from_pixels=True``. Defaults to ``True ``.
1284
+ whenever ``from_pixels=True``. Defaults to ``False ``.
1285
1285
frame_skip (int, optional): if provided, indicates for how many steps the
1286
1286
same action is to be repeated. The observation returned will be the
1287
1287
last observation of the sequence, whereas the reward will be the sum
Original file line number Diff line number Diff line change 69
69
70
70
71
71
ACTION_MASK_ERROR = RuntimeError (
72
- "An out-of-bounds actions has been provided to an env with an 'action_mask' output."
73
- " If you are using a custom policy, make sure to take the action mask into account when computing the output."
74
- " If you are using a default policy, please add the torchrl.envs.transforms.ActionMask transform to your environment."
72
+ "An out-of-bounds actions has been provided to an env with an 'action_mask' output. "
73
+ "If you are using a custom policy, make sure to take the action mask into account when computing the output. "
74
+ "If you are using a default policy, please add the torchrl.envs.transforms.ActionMask transform to your environment. "
75
75
"If you are using a ParallelEnv or another batched inventor, "
76
- "make sure to add the transform to the ParallelEnv (and not to the sub-environments)."
77
- " For more info on using action masks, see the docs at: "
78
- "https://pytorch.org/rl/reference/envs.html#environments-with-masked-actions"
76
+ "make sure to add the transform to the ParallelEnv (and not to the sub-environments). "
77
+ "For more info on using action masks, see the docs at: "
78
+ "https://pytorch.org/rl/main/ reference/envs.html#environments-with-masked-actions"
79
79
)
80
80
81
81
Original file line number Diff line number Diff line change @@ -374,8 +374,8 @@ class TanhNormal(FasterTransformedDistribution):
374
374
.. math::
375
375
loc = tanh(loc / upscale) * upscale.
376
376
377
- min (torch.Tensor or number, optional): minimum value of the distribution. Default is -1.0;
378
- max (torch.Tensor or number, optional): maximum value of the distribution. Default is 1.0;
377
+ low (torch.Tensor or number, optional): minimum value of the distribution. Default is -1.0;
378
+ high (torch.Tensor or number, optional): maximum value of the distribution. Default is 1.0;
379
379
event_dims (int, optional): number of dimensions describing the action.
380
380
Default is 1. Setting ``event_dims`` to ``0`` will result in a log-probability that has the same shape
381
381
as the input, ``1`` will reduce (sum over) the last dimension, ``2`` the last two etc.
Original file line number Diff line number Diff line change @@ -230,7 +230,7 @@ def _fast_vec_gae(
230
230
``[*Batch x TimeSteps x F]``, with ``F`` feature dimensions.
231
231
232
232
"""
233
- # _gen_num_per_traj and _split_and_pad_sequence need
233
+ # _get_num_per_traj and _split_and_pad_sequence need
234
234
# time dimension at last position
235
235
done = done .transpose (- 2 , - 1 )
236
236
terminated = terminated .transpose (- 2 , - 1 )
Original file line number Diff line number Diff line change 128
128
# * :meth:`EnvBase._reset`, which codes for the resetting of the simulator
129
129
# at a (potentially random) initial state;
130
130
# * :meth:`EnvBase._step` which codes for the state transition dynamic;
131
- # * :meth:`EnvBase._set_seed`` which implements the seeding mechanism;
131
+ # * :meth:`EnvBase._set_seed` which implements the seeding mechanism;
132
132
# * the environment specs.
133
133
#
134
134
# Let us first describe the problem at hand: we would like to model a simple
Original file line number Diff line number Diff line change @@ -608,7 +608,7 @@ def env_make(env_name):
608
608
###############################################################################
609
609
# Transforming parallel environments
610
610
# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
611
- # There are two equivalent ways of transforming parallen environments: in each
611
+ # There are two equivalent ways of transforming parallel environments: in each
612
612
# process separately, or on the main process. It is even possible to do both.
613
613
# One can therefore think carefully about the transform design to leverage the
614
614
# device capabilities (e.g. transforms on cuda devices) and vectorizing
You can’t perform that action at this time.
0 commit comments