Merge pull request #1 from vmoens/tutorials_py2

se-yi · web-flow · commit 5d168b42d0af · 2022-11-10T10:20:13.000+01:00
Bug fixes and syntax changes for:
- docs/source/config.py
- docs/source/content_generation.py
- tutorials/sphinx-tutorials/multi_task.py
- tutorials/sphinx-tutorials/tensordict.py
- tutorials/sphinx-tutorials/tensordict_module.py
- tutorials/sphinx-tutorials/torchrl_demo.py
diff --git a/docs/source/conf.py b/docs/source/conf.py
@@ -23,7 +23,6 @@
 # -- Project information -----------------------------------------------------
 import os.path
 import sys
-from pathlib import Path
 import warnings
 
 import pytorch_sphinx_theme
@@ -75,8 +74,8 @@
     "gallery_dirs": "tutorials",  # path to where to save gallery generated output
     "backreferences_dir": "gen_modules/backreferences",
     "doc_module": ("torchrl",),
-    "filename_pattern": "reference/generated/tutorials/", # files to parse
-    "notebook_images": "reference/generated/tutorials/media/", # images to parse
+    "filename_pattern": "reference/generated/tutorials/",  # files to parse
+    "notebook_images": "reference/generated/tutorials/media/",  # images to parse
 }
 
 napoleon_use_ivar = True
@@ -162,7 +161,10 @@
 # -- Generate knowledge base references -----------------------------------
 current_path = os.path.dirname(os.path.realpath(__file__))
 sys.path.append(current_path)
-from content_generation import generate_knowledge_base_references, generate_tutorial_references
+from content_generation import (
+    generate_knowledge_base_references,
+    generate_tutorial_references,
+)
 
 generate_knowledge_base_references("../../knowledge_base")
 generate_tutorial_references("../../tutorials/sphinx-tutorials/", "tutorial")
diff --git a/docs/source/content_generation.py b/docs/source/content_generation.py
@@ -1,6 +1,6 @@
 import os
-from pathlib import Path
 import shutil
+from pathlib import Path
 from typing import List
 
 FILE_DIR = os.path.dirname(__file__)
@@ -78,9 +78,11 @@ def generate_tutorial_references(tutorial_path: str, file_type: str) -> None:
     Path(target_path).mkdir(parents=True, exist_ok=True)
 
     # Iterate tutorial files and copy
-    file_paths = [os.path.join(tutorial_path, f)
-                  for f in os.listdir(tutorial_path)
-                  if f.endswith((".py", ".rst", ".png"))]
+    file_paths = [
+        os.path.join(tutorial_path, f)
+        for f in os.listdir(tutorial_path)
+        if f.endswith((".py", ".rst", ".png"))
+    ]
 
     for file_path in file_paths:
         shutil.copyfile(file_path, os.path.join(target_path, Path(file_path).name))
diff --git a/tutorials/sphinx-tutorials/multi_task.py b/tutorials/sphinx-tutorials/multi_task.py
@@ -4,6 +4,9 @@
 ================================================
 This tutorial details how multi-task policies and batched environments can be used.
 """
+import torch
+from torch import nn
+
 ##############################################################################
 # At the end of this tutorial, you will be capable of writing policies that
 # can compute actions in diverse settings using a distinct set of weights.
@@ -12,8 +15,6 @@
 from torchrl.envs import TransformedEnv, CatTensors, Compose, DoubleToFloat, ParallelEnv
 from torchrl.envs.libs.dm_control import DMControlEnv
 from torchrl.modules import TensorDictModule, TensorDictSequential, MLP
-from torch import nn
-import torch
 
 ###############################################################################
 # We design two environments, one humanoid that must complete the stand task
@@ -26,8 +27,11 @@
     Compose(
         CatTensors(env1_obs_keys, "next_observation_stand", del_keys=False),
         CatTensors(env1_obs_keys, "next_observation"),
-        DoubleToFloat(keys_in=["next_observation_stand", "next_observation"], keys_inv_in=["action"]),
-    )
+        DoubleToFloat(
+            in_keys=["next_observation_stand", "next_observation"],
+            in_keys_inv=["action"],
+        ),
+    ),
 )
 env2 = DMControlEnv("humanoid", "walk")
 env2_obs_keys = list(env2.observation_spec.keys())
@@ -36,8 +40,11 @@
     Compose(
         CatTensors(env2_obs_keys, "next_observation_walk", del_keys=False),
         CatTensors(env2_obs_keys, "next_observation"),
-        DoubleToFloat(keys_in=["next_observation_walk", "next_observation"], keys_inv_in=["action"]),
-    )
+        DoubleToFloat(
+            in_keys=["next_observation_walk", "next_observation"],
+            in_keys_inv=["action"],
+        ),
+    ),
 )
 
 ###############################################################################
@@ -66,10 +73,22 @@
 
 ###############################################################################
 
-policy_common = TensorDictModule(nn.Linear(67, 64), in_keys=["observation"], out_keys=["hidden"])
-policy_stand = TensorDictModule(MLP(67 + 64, action_dim, depth=2), in_keys=["observation_stand", "hidden"], out_keys=["action"])
-policy_walk = TensorDictModule(MLP(67 + 64, action_dim, depth=2), in_keys=["observation_walk", "hidden"], out_keys=["action"])
-seq = TensorDictSequential(policy_common, policy_stand, policy_walk, partial_tolerant=True)
+policy_common = TensorDictModule(
+    nn.Linear(67, 64), in_keys=["observation"], out_keys=["hidden"]
+)
+policy_stand = TensorDictModule(
+    MLP(67 + 64, action_dim, depth=2),
+    in_keys=["observation_stand", "hidden"],
+    out_keys=["action"],
+)
+policy_walk = TensorDictModule(
+    MLP(67 + 64, action_dim, depth=2),
+    in_keys=["observation_walk", "hidden"],
+    out_keys=["action"],
+)
+seq = TensorDictSequential(
+    policy_common, policy_stand, policy_walk, partial_tolerant=True
+)
 
 ###############################################################################
 # Let's check that our sequence outputs actions for a single env (stand).
@@ -101,22 +120,35 @@
 # a single task has to be performed. If a list of functions is provided, then
 # it will assume that we are in a multi-task setting.
 
-env1_maker = lambda: TransformedEnv(
-    DMControlEnv("humanoid", "stand"),
-    Compose(
-        CatTensors(env1_obs_keys, "next_observation_stand", del_keys=False),
-        CatTensors(env1_obs_keys, "next_observation"),
-        DoubleToFloat(keys_in=["next_observation_stand", "next_observation"], keys_inv_in=["action"]),
+
+def env1_maker():
+    return TransformedEnv(
+        DMControlEnv("humanoid", "stand"),
+        Compose(
+            CatTensors(env1_obs_keys, "next_observation_stand", del_keys=False),
+            CatTensors(env1_obs_keys, "next_observation"),
+            DoubleToFloat(
+                in_keys=["next_observation_stand", "next_observation"],
+                in_keys_inv=["action"],
+            ),
+        ),
     )
-)
-env2_maker = lambda: TransformedEnv(
-    DMControlEnv("humanoid", "walk"),
-    Compose(
-        CatTensors(env2_obs_keys, "next_observation_walk", del_keys=False),
-        CatTensors(env2_obs_keys, "next_observation"),
-        DoubleToFloat(keys_in=["next_observation_walk", "next_observation"], keys_inv_in=["action"]),
+
+
+def env2_maker():
+    return TransformedEnv(
+        DMControlEnv("humanoid", "walk"),
+        Compose(
+            CatTensors(env2_obs_keys, "next_observation_walk", del_keys=False),
+            CatTensors(env2_obs_keys, "next_observation"),
+            DoubleToFloat(
+                in_keys=["next_observation_walk", "next_observation"],
+                in_keys_inv=["action"],
+            ),
+        ),
     )
-)
+
+
 env = ParallelEnv(2, [env1_maker, env2_maker])
 assert not env._single_task
 
@@ -148,8 +180,8 @@
 
 ###############################################################################
 
-td_rollout[:, 0] # tensordict of the first step: only the common keys are shown
+td_rollout[:, 0]  # tensordict of the first step: only the common keys are shown
 
 ###############################################################################
 
-td_rollout[0] # tensordict of the first env: the stand obs is present
+td_rollout[0]  # tensordict of the first env: the stand obs is present
diff --git a/tutorials/sphinx-tutorials/tensordict.py b/tutorials/sphinx-tutorials/tensordict.py
@@ -81,15 +81,19 @@
 # However to achieve this you would need to write a complicated collate
 # function that make sure that every modality is aggregated properly.
 
+
 def collate_dict_fn(dict_list):
     final_dict = {}
     for key in dict_list[0].keys():
-        final_dict[key]= []
+        final_dict[key] = []
         for single_dict in dict_list:
             final_dict[key].append(single_dict[key])
         final_dict[key] = torch.stack(final_dict[key], dim=0)
     return final_dict
 
+
+import torch
+
 ###############################################################################
 # dataloader = Dataloader(DictDataset(), collate_fn = collate_dict_fn)
 #
@@ -120,11 +124,9 @@ def collate_dict_fn(dict_list):
 from torchrl.data import TensorDict
 from torchrl.data.tensordict.tensordict import (
     UnsqueezedTensorDict,
-    ViewedTensorDict,
+    _ViewedTensorDict,
     PermutedTensorDict,
-    LazyStackedTensorDict,
 )
-import torch
 
 ###############################################################################
 # TensorDict is a Datastructure indexed by either keys or numerical indices.
@@ -147,7 +149,7 @@ def collate_dict_fn(dict_list):
 # does not work
 try:
     tensordict = TensorDict({"a": a, "b": b}, batch_size=[3, 4, 5])
-except:
+except RuntimeError:
     print("caramba!")
 
 ###############################################################################
@@ -158,10 +160,10 @@ def collate_dict_fn(dict_list):
 a = torch.zeros(3, 4)
 b = TensorDict(
     {
-    "c": torch.zeros(3, 4, 5, dtype=torch.int32),
-    "d": torch.zeros(3, 4, 5, 6, dtype=torch.float32)
+        "c": torch.zeros(3, 4, 5, dtype=torch.int32),
+        "d": torch.zeros(3, 4, 5, 6, dtype=torch.float32),
     },
-    batch_size=[3, 4, 5]
+    batch_size=[3, 4, 5],
 )
 tensordict = TensorDict({"a": a, "b": b}, batch_size=[3, 4])
 print(tensordict)
@@ -233,7 +235,7 @@ def collate_dict_fn(dict_list):
 # The ``update`` method can be used to update a TensorDict with another one
 # (or with a dict):
 
-tensordict.update({"a": torch.ones((3, 4, 5)), "d": 2*torch.ones((3, 4, 2))})
+tensordict.update({"a": torch.ones((3, 4, 5)), "d": 2 * torch.ones((3, 4, 2))})
 # Also works with tensordict.update(TensorDict({"a":torch.ones((3, 4, 5)),
 # "c":torch.ones((3, 4, 2))}, batch_size=[3,4]))
 print(f"a is now equal to 1: {(tensordict['a'] == 1).all()}")
@@ -262,7 +264,9 @@ def collate_dict_fn(dict_list):
 # but it must be shared across tensors. Indeed, you cannot have items that don't
 # share the batch size inside the same TensorDict:
 
-tensordict = TensorDict({"a": torch.zeros(3, 4, 5), "b": torch.zeros(3, 4)}, batch_size=[3, 4])
+tensordict = TensorDict(
+    {"a": torch.zeros(3, 4, 5), "b": torch.zeros(3, 4)}, batch_size=[3, 4]
+)
 print(f"Our TensorDict is of size {tensordict.shape}")
 
 ###############################################################################
@@ -302,8 +306,10 @@ def collate_dict_fn(dict_list):
 tensordict = TensorDict({}, [10])
 for i in range(2):
     tensordict[i] = TensorDict({"a": torch.randn(3, 4)}, [])
-assert (tensordict[9]["a"] == torch.zeros((3,4))).all()
-tensordict = TensorDict({"a": torch.zeros(3, 4, 5), "b": torch.zeros(3, 4)}, batch_size=[3, 4])
+assert (tensordict[9]["a"] == torch.zeros((3, 4))).all()
+tensordict = TensorDict(
+    {"a": torch.zeros(3, 4, 5), "b": torch.zeros(3, 4)}, batch_size=[3, 4]
+)
 
 ###############################################################################
 # Devices
@@ -327,7 +333,9 @@ def collate_dict_fn(dict_list):
 # than the original item.
 
 tensordict_clone = tensordict.clone()
-print(f"Content is identical ({(tensordict['a'] == tensordict_clone['a']).all()}) but duplicated ({tensordict['a'] is not tensordict_clone['a']})")
+print(
+    f"Content is identical ({(tensordict['a'] == tensordict_clone['a']).all()}) but duplicated ({tensordict['a'] is not tensordict_clone['a']})"
+)
 
 ###############################################################################
 # **Slicing and Indexing**
@@ -356,7 +364,9 @@ def collate_dict_fn(dict_list):
 # to the original tensordict as well as the desired index such that tensor
 # modifications can be achieved easily.
 
-tensordict = TensorDict({"a": torch.zeros(3, 4, 5), "b": torch.zeros(3, 4)}, batch_size=[3, 4])
+tensordict = TensorDict(
+    {"a": torch.zeros(3, 4, 5), "b": torch.zeros(3, 4)}, batch_size=[3, 4]
+)
 # a SubTensorDict keeps track of the original one: it does not create a copy in memory of the original data
 subtd = tensordict.get_sub_tensordict((slice(None), torch.tensor([1, 3])))
 tensordict.fill_("a", -1)
@@ -422,10 +432,10 @@ def collate_dict_fn(dict_list):
 ###############################################################################
 # **View**
 #
-# Support for the view operation returning a ``ViewedTensorDict``.
+# Support for the view operation returning a ``_ViewedTensorDict``.
 # Use ``to_tensordict`` to comeback to retrieve TensorDict.
 
-assert type(tensordict.view(-1)) == ViewedTensorDict
+assert type(tensordict.view(-1)) == _ViewedTensorDict
 assert tensordict.view(-1).shape[0] == 12
 
 ###############################################################################
@@ -434,8 +444,8 @@ def collate_dict_fn(dict_list):
 # We can permute the dims of ``TensorDict``. Permute is a Lazy operation that
 # returns PermutedTensorDict. Use ``to_tensordict`` to convert to ``TensorDict``.
 
-assert type(tensordict.permute(1,0)) == PermutedTensorDict
-assert tensordict.permute(1,0).batch_size == torch.Size([4, 3])
+assert type(tensordict.permute(1, 0)) == PermutedTensorDict
+assert tensordict.permute(1, 0).batch_size == torch.Size([4, 3])
 
 ###############################################################################
 # **Reshape**
diff --git a/tutorials/sphinx-tutorials/tensordict_module.py b/tutorials/sphinx-tutorials/tensordict_module.py
diff --git a/tutorials/sphinx-tutorials/torchrl_demo.py b/tutorials/sphinx-tutorials/torchrl_demo.py