pytorch
diff --git a/‎.github/unittest/linux/scripts/environment.yml
Lines changed: 2 additions & 2 deletions b/‎.github/unittest/linux/scripts/environment.yml
Lines changed: 2 additions & 2 deletions
diff --git a/‎.github/unittest/linux/scripts/run_all.sh
Lines changed: 1 addition & 1 deletion b/‎.github/unittest/linux/scripts/run_all.sh
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/unittest/linux_distributed/scripts/environment.yml
Lines changed: 2 additions & 2 deletions b/‎.github/unittest/linux_distributed/scripts/environment.yml
Lines changed: 2 additions & 2 deletions
diff --git a/‎.github/unittest/linux_examples/scripts/environment.yml
Lines changed: 2 additions & 2 deletions b/‎.github/unittest/linux_examples/scripts/environment.yml
Lines changed: 2 additions & 2 deletions
diff --git a/‎.github/unittest/linux_libs/scripts_envpool/environment.yml
Lines changed: 2 additions & 2 deletions b/‎.github/unittest/linux_libs/scripts_envpool/environment.yml
Lines changed: 2 additions & 2 deletions
diff --git a/‎.github/unittest/linux_olddeps/scripts_gym_0_13/environment.yml
Lines changed: 1 addition & 1 deletion b/‎.github/unittest/linux_olddeps/scripts_gym_0_13/environment.yml
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/benchmarks.yml
Lines changed: 2 additions & 2 deletions b/‎.github/workflows/benchmarks.yml
Lines changed: 2 additions & 2 deletions
diff --git a/‎.github/workflows/benchmarks_pr.yml
Lines changed: 2 additions & 2 deletions b/‎.github/workflows/benchmarks_pr.yml
Lines changed: 2 additions & 2 deletions
diff --git a/‎.github/workflows/wheels-legacy.yml
Lines changed: 1 addition & 0 deletions b/‎.github/workflows/wheels-legacy.yml
Lines changed: 1 addition & 0 deletions
diff --git a/‎docs/requirements.txt
Lines changed: 2 additions & 2 deletions b/‎docs/requirements.txt
Lines changed: 2 additions & 2 deletions
diff --git a/‎docs/source/reference/collectors.rst
Lines changed: 1 addition & 1 deletion b/‎docs/source/reference/collectors.rst
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/source/reference/data.rst
Lines changed: 1 addition & 1 deletion b/‎docs/source/reference/data.rst
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/source/reference/envs.rst
Lines changed: 5 additions & 5 deletions b/‎docs/source/reference/envs.rst
Lines changed: 5 additions & 5 deletions
diff --git a/‎docs/source/reference/modules.rst
Lines changed: 1 addition & 1 deletion b/‎docs/source/reference/modules.rst
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/distributed/replay_buffers/distributed_replay_buffer.py
Lines changed: 2 additions & 2 deletions b/‎examples/distributed/replay_buffers/distributed_replay_buffer.py
Lines changed: 2 additions & 2 deletions
diff --git a/‎setup.py
Lines changed: 1 addition & 1 deletion b/‎setup.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎sota-implementations/cql/cql_offline.py
Lines changed: 3 additions & 6 deletions b/‎sota-implementations/cql/cql_offline.py
Lines changed: 3 additions & 6 deletions
diff --git a/‎sota-implementations/redq/redq.py
Lines changed: 1 addition & 1 deletion b/‎sota-implementations/redq/redq.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎test/_utils_internal.py
Lines changed: 2 additions & 2 deletions b/‎test/_utils_internal.py
Lines changed: 2 additions & 2 deletions
@@ -24,8 +24,8 @@ dependencies:
     - tensorboard
     - imageio==2.26.0
     - wandb
-    - dm_control<1.0.21
-    - mujoco<3.2.1
+    - dm_control
+    - mujoco
     - mlflow
     - av
     - coverage
 
@@ -91,7 +91,7 @@ echo "installing gymnasium"
 pip3 install "gymnasium"
 pip3 install ale_py
 pip3 install mo-gymnasium[mujoco]  # requires here bc needs mujoco-py
-pip3 install "mujoco<3.2.1" -U
+pip3 install "mujoco" -U
 
 # sanity check: remove?
 python3 -c """
 
@@ -23,8 +23,8 @@ dependencies:
     - tensorboard
     - imageio==2.26.0
     - wandb
-    - dm_control<1.0.21
-    - mujoco<3.2.1
+    - dm_control
+    - mujoco
     - mlflow
     - av
     - coverage
 
@@ -21,8 +21,8 @@ dependencies:
     - scipy
     - hydra-core
     - imageio==2.26.0
-    - dm_control<1.0.21
-    - mujoco<3.2.1
+    - dm_control
+    - mujoco
     - mlflow
     - av
     - coverage
 
@@ -18,6 +18,6 @@ dependencies:
     - expecttest
     - pyyaml
     - scipy
-    - dm_control<1.0.21
-    - mujoco<3.2.1
+    - dm_control
+    - mujoco
     - coverage
@@ -22,7 +22,7 @@ dependencies:
     - scipy
     - hydra-core
     - dm_control -e git+https://github.com/deepmind/dm_control.git@c053360edea6170acfd9c8f65446703307d9d352#egg={dm_control}
-    - mujoco<3.2.1
+    - mujoco
     - patchelf
     - pyopengl==3.1.4
     - ray
 
@@ -35,7 +35,7 @@ jobs:
           python3 setup.py develop
           python3 -m pip install pytest pytest-benchmark
           python3 -m pip install "gym[accept-rom-license,atari]"
-          python3 -m pip install "dm_control<1.0.21" "mujoco<3.2.1"
+          python3 -m pip install "dm_control" "mujoco"
           export TD_GET_DEFAULTS_TO_NONE=1
       - name: Run benchmarks
         run: |
@@ -97,7 +97,7 @@ jobs:
           python3 setup.py develop
           python3 -m pip install pytest pytest-benchmark
           python3 -m pip install "gym[accept-rom-license,atari]"
-          python3 -m pip install "dm_control<1.0.21" "mujoco<3.2.1"
+          python3 -m pip install "dm_control" "mujoco"
           export TD_GET_DEFAULTS_TO_NONE=1
       - name: check GPU presence
         run: |
 
@@ -34,7 +34,7 @@ jobs:
           python3 setup.py develop
           python3 -m pip install pytest pytest-benchmark
           python3 -m pip install "gym[accept-rom-license,atari]"
-          python3 -m pip install "dm_control<1.0.21" "mujoco<3.2.1"
+          python3 -m pip install "dm_control" "mujoco"
           export TD_GET_DEFAULTS_TO_NONE=1
       - name: Setup benchmarks
         run: |
@@ -108,7 +108,7 @@ jobs:
           python3 setup.py develop
           python3 -m pip install pytest pytest-benchmark
           python3 -m pip install "gym[accept-rom-license,atari]"
-          python3 -m pip install "dm_control<1.0.21" "mujoco<3.2.1"
+          python3 -m pip install "dm_control" "mujoco"
           export TD_GET_DEFAULTS_TO_NONE=1
       - name: check GPU presence
         run: |
 
@@ -5,6 +5,7 @@ on:
   push:
     branches:
       - release/*
+      - main
 
 concurrency:
   # Documentation suggests ${{ github.head_ref }}, but that's only available on pull_request/pull_request_target triggers, so using ${{ github.ref }}.
 
@@ -14,8 +14,8 @@ docutils
 sphinx_design
 
 torchvision
-dm_control<1.0.21
-mujoco<3.2.1
+dm_control
+mujoco
 atari-py
 ale-py
 gym[classic_control,accept-rom-license]
 
@@ -45,7 +45,7 @@ worker) may also impact the memory management. The key parameters to control are
 :obj:`devices` which controls the execution devices (ie the device of the policy)
 and :obj:`storing_device` which will control the device where the environment and
 data are stored during a rollout. A good heuristic is usually to use the same device
-for storage and compute, which is the default behaviour when only the `devices` argument
+for storage and compute, which is the default behavior when only the `devices` argument
 is being passed.
 
 Besides those compute parameters, users may choose to configure the following parameters:
 
@@ -171,7 +171,7 @@ using the following components:
 Storage choice is very influential on replay buffer sampling latency, especially
 in distributed reinforcement learning settings with larger data volumes.
 :class:`~torchrl.data.replay_buffers.storages.LazyMemmapStorage` is highly
-advised in distributed settings with shared storage due to the lower serialisation
+advised in distributed settings with shared storage due to the lower serialization
 cost of MemoryMappedTensors as well as the ability to specify file storage locations
 for improved node failure recovery.
 The following mean sampling latency improvements over using :class:`~torchrl.data.replay_buffers.ListStorage`
 
@@ -318,7 +318,7 @@ have on an environment returning zeros after reset:
 
 We also offer the :class:`~.SerialEnv` class that enjoys the exact same API but is executed
 serially. This is mostly useful for testing purposes, when one wants to assess the
-behaviour of a :class:`~.ParallelEnv` without launching the subprocesses.
+behavior of a :class:`~.ParallelEnv` without launching the subprocesses.
 
 In addition to :class:`~.ParallelEnv`, which offers process-based parallelism, we also provide a way to create
 multithreaded environments with :obj:`~.MultiThreadedEnv`. This class uses `EnvPool <https://github.com/sail-sg/envpool>`_
@@ -499,7 +499,7 @@ current episode.
 To handle these cases, torchrl provides a :class:`~torchrl.envs.AutoResetTransform` that will copy the observations
 that result from the call to `step` to the next `reset` and skip the calls to `reset` during rollouts (in both
 :meth:`~torchrl.envs.EnvBase.rollout` and :class:`~torchrl.collectors.SyncDataCollector` iterations).
-This transform class also provides a fine-grained control over the behaviour to be adopted for the invalid observations,
+This transform class also provides a fine-grained control over the behavior to be adopted for the invalid observations,
 which can be masked with `"nan"` or any other values, or not masked at all.
 
 To tell torchrl that an environment is auto-resetting, it is sufficient to provide an ``auto_reset`` argument
@@ -755,10 +755,10 @@ registered buffers:
     >>> TransformedEnv(base_env, third_transform.clone())  # works
 
 On a single process or if the buffers are placed in shared memory, this will
-result in all the clone transforms to keep the same behaviour even if the
+result in all the clone transforms to keep the same behavior even if the
 buffers are changed in place (which is what will happen with the :class:`CatFrames`
 transform, for instance). In distributed settings, this may not hold and one
-should be careful about the expected behaviour of the cloned transforms in this
+should be careful about the expected behavior of the cloned transforms in this
 context.
 Finally, notice that indexing multiple transforms from a :class:`Compose` transform
 may also result in loss of parenthood for these transforms: the reason is that
@@ -1061,7 +1061,7 @@ the current gym backend or any of its modules:
 Another tool that comes in handy with gym and other external dependencies is
 the :class:`torchrl._utils.implement_for` class. Decorating a function
 with ``@implement_for`` will tell torchrl that, depending on the version
-indicated, a specific behaviour is to be expected. This allows us to easily
+indicated, a specific behavior is to be expected. This allows us to easily
 support multiple versions of gym without requiring any effort from the user side.
 For example, considering that our virtual environment has the v0.26.2 installed,
 the following function will return ``1`` when queried:
 
@@ -62,7 +62,7 @@ Exploration wrappers
 
 To efficiently explore the environment, TorchRL proposes a series of wrappers
 that will override the action sampled by the policy by a noisier version.
-Their behaviour is controlled by :func:`~torchrl.envs.utils.exploration_mode`:
+Their behavior is controlled by :func:`~torchrl.envs.utils.exploration_mode`:
 if the exploration is set to ``"random"``, the exploration is active. In all
 other cases, the action written in the tensordict is simply the network output.
 
 
@@ -150,8 +150,8 @@ def _create_and_launch_data_collectors(self) -> None:
 
 class ReplayBufferNode(RemoteTensorDictReplayBuffer):
     """Experience replay buffer node that is capable of accepting remote connections. Being a `RemoteTensorDictReplayBuffer`
-    means all of it's public methods are remotely invokable using `torch.rpc`.
-    Using a LazyMemmapStorage is highly advised in distributed settings with shared storage due to the lower serialisation
+    means all of its public methods are remotely invokable using `torch.rpc`.
+    Using a LazyMemmapStorage is highly advised in distributed settings with shared storage due to the lower serialization
     cost of MemoryMappedTensors as well as the ability to specify file storage locations which can improve ability to recover from node failures.
 
     Args:
 
@@ -191,7 +191,7 @@ def _main(argv):
     # tag = _run_cmd(["git", "describe", "--tags", "--exact-match", "@"])
 
     this_directory = Path(__file__).parent
-    long_description = (this_directory / "README.md").read_text()
+    long_description = (this_directory / "README.md").read_text(encoding="utf8")
     sys.argv = [sys.argv[0]] + unknown
 
     extra_requires = {
 
@@ -58,14 +58,14 @@ def main(cfg: "DictConfig"):  # noqa: F821
             device = "cpu"
     device = torch.device(device)
 
+    # Create replay buffer
+    replay_buffer = make_offline_replay_buffer(cfg.replay_buffer)
+
     # Create env
     train_env, eval_env = make_environment(
         cfg, train_num_envs=1, eval_num_envs=cfg.logger.eval_envs, logger=logger
     )
 
-    # Create replay buffer
-    replay_buffer = make_offline_replay_buffer(cfg.replay_buffer)
-
     # Create agent
     model = make_cql_model(cfg, train_env, eval_env, device)
     del train_env
@@ -107,9 +107,6 @@ def main(cfg: "DictConfig"):  # noqa: F821
 
         q_loss = q_loss + cql_loss
 
-        alpha_loss = loss_vals["loss_alpha"]
-        alpha_prime_loss = loss_vals["loss_alpha_prime"]
-
         # update model
         alpha_loss = loss_vals["loss_alpha"]
         alpha_prime_loss = loss_vals["loss_alpha_prime"]
 
@@ -159,7 +159,7 @@ def main(cfg: "DictConfig"):  # noqa: F821
         use_env_creator=False,
     )()
     if isinstance(create_env_fn, ParallelEnv):
-        raise NotImplementedError("This behaviour is deprecated")
+        raise NotImplementedError("This behavior is deprecated")
     elif isinstance(create_env_fn, EnvCreator):
         recorder.transform[1:].load_state_dict(
             get_norm_state_dict(create_env_fn()), strict=False
 
@@ -56,7 +56,7 @@ def HALFCHEETAH_VERSIONED():
 
 def PONG_VERSIONED():
     # load gym
-    # Gymnasium says that the ale_py behaviour changes from 1.0
+    # Gymnasium says that the ale_py behavior changes from 1.0
     # but with python 3.12 it is already the case with 0.29.1
     try:
         import ale_py  # noqa
@@ -70,7 +70,7 @@ def PONG_VERSIONED():
 
 def BREAKOUT_VERSIONED():
     # load gym
-    # Gymnasium says that the ale_py behaviour changes from 1.0
+    # Gymnasium says that the ale_py behavior changes from 1.0
     # but with python 3.12 it is already the case with 0.29.1
     try:
         import ale_py  # noqa