WIP.

ray-project · sven1977 · Jan 9, 2021 · Dec 29, 2020 · Dec 29, 2020 · Dec 29, 2020
commit 5819e2fd798a816bbfe4064ab54c01d6aab212b7
@@ -0,0 +1,104 @@
+import argparse
+from gym.spaces import Box, Discrete
+import numpy as np
+
+from ray.rllib.examples.models.custom_model_api import DuelingQModel, \
+    TorchDuelingQModel, ContActionQModel, TorchContActionQModel
+from ray.rllib.models.catalog import ModelCatalog, MODEL_DEFAULTS
+from ray.rllib.utils.framework import try_import_tf, try_import_torch
+
+tf1, tf, tfv = try_import_tf()
+torch, _ = try_import_torch()
+
+parser = argparse.ArgumentParser()
+parser.add_argument(
+    "--framework", choices=["tf2", "tf", "tfe", "torch"], default="tf")
+
+
+if __name__ == "__main__":
+    args = parser.parse_args()
+
+    # Test API wrapper for dueling Q-head.
+
+    obs_space = Box(-1.0, 1.0, (3, ))
+    action_space = Discrete(3)
+
+    # Run in eager mode for value checking and debugging.
+    tf1.enable_eager_execution()
+
+    # __sphinx_doc_model_construct_begin__
+    my_dueling_model = ModelCatalog.get_model_v2(
+        obs_space=obs_space,
+        action_space=action_space,
+        num_outputs=action_space.n,
+        model_config=MODEL_DEFAULTS,
+        framework=args.framework,
+        # Providing the `model_interface` arg will make the factory
+        # wrap the chosen default model with our new model API class
+        # (DuelingQModel). This way, both `forward` and `get_q_values`
+        # are available in the returned class.
+        model_interface=DuelingQModel if args.framework != "torch"
+            else TorchDuelingQModel,
+        name="dueling_q_model",
+    )
+    # __sphinx_doc_model_construct_end__
+
+    batch_size = 10
+    input_ = np.array([obs_space.sample() for _ in range(batch_size)])
+    # Note that for PyTorch, you will have to provide torch tensors here.
+    if args.framework == "torch":
+        input_ = torch.from_numpy(input_)
+
+    input_dict = {
+        "obs": input_,
+        "is_training": False,
+    }
+    out, state_outs = my_dueling_model(input_dict=input_dict)
+    assert out.shape == (10, 256)
+    # Pass `out` into `get_q_values`
+    q_values = my_dueling_model.get_q_values(out)
+    assert q_values.shape == (10, action_space.n)
+
+    # Test API wrapper for single value Q-head from obs/action input.
+
+    obs_space = Box(-1.0, 1.0, (3, ))
+    action_space = Box(-1.0, -1.0, (2, ))
+
+    # __sphinx_doc_model_construct_begin__
+    my_cont_action_q_model = ModelCatalog.get_model_v2(
+        obs_space=obs_space,
+        action_space=action_space,
+        num_outputs=2,
+        model_config=MODEL_DEFAULTS,
+        framework=args.framework,
+        # Providing the `model_interface` arg will make the factory
+        # wrap the chosen default model with our new model API class
+        # (DuelingQModel). This way, both `forward` and `get_q_values`
+        # are available in the returned class.
+        model_interface=ContActionQModel if args.framework != "torch"
+            else TorchContActionQModel,
+        name="cont_action_q_model",
+    )
+    # __sphinx_doc_model_construct_end__
+
+    batch_size = 10
+    input_ = np.array([obs_space.sample() for _ in range(batch_size)])
+
+    # Note that for PyTorch, you will have to provide torch tensors here.
+    if args.framework == "torch":
+        input_ = torch.from_numpy(input_)
+
+    input_dict = {
+        "obs": input_,
+        "is_training": False,
+    }
+    # Note that for PyTorch, you will have to provide torch tensors here.
+    out, state_outs = my_cont_action_q_model(input_dict=input_dict)
+    assert out.shape == (10, 256)
+    # Pass `out` and an action into `my_cont_action_q_model`
+    action = np.array([action_space.sample() for _ in range(batch_size)])
+    if args.framework == "torch":
+        action = torch.from_numpy(action)
+
+    q_value = my_cont_action_q_model.get_single_q_value(out, action)
+    assert q_value.shape == (10, 1)
diff --git a/rllib/examples/custom_model_api_tf.py b/rllib/examples/custom_model_api_tf.py
diff --git a/rllib/examples/custom_model_api_torch.py b/rllib/examples/custom_model_api_torch.py
@@ -2,7 +2,8 @@
 
 from ray.rllib.models.tf.fcnet import FullyConnectedNetwork
 from ray.rllib.models.tf.tf_modelv2 import TFModelV2
-from ray.rllib.models.torch.fcnet import TorchFullyConnectedNetwork
+from ray.rllib.models.torch.fcnet import FullyConnectedNetwork as \
+    TorchFullyConnectedNetwork
 from ray.rllib.models.torch.misc import SlimFC
 from ray.rllib.models.torch.torch_modelv2 import TorchModelV2
 from ray.rllib.utils.framework import try_import_tf, try_import_torch
@@ -102,15 +103,20 @@ def __init__(self, obs_space, action_space, num_outputs, model_config,
 
         # Nest an RLlib FullyConnectedNetwork (torch or tf) into this one here
         # to be used for Q-value calculation.
+        # Use the current value of self.num_outputs, which is the wrapped
+        # model's output layer size.
         combined_space = Box(
             -1.0, 1.0, (self.num_outputs + action_space.shape[0], ))
         self.q_head = FullyConnectedNetwork(
             combined_space, action_space, 1, model_config, "q_head")
 
+        # Missing here: Probably still have to provide action output layer
+        # and value layer and make sure self.num_outputs is correctly set.
+
     def get_single_q_value(self, underlying_output, action):
         # Calculate the q-value after concating the underlying output with
         # the given action.
-        input_ = torch.cat([underlying_output, action], dim=-1)
+        input_ = tf.concat([underlying_output, action], axis=-1)
         # Construct a simple input_dict (needed for self.q_head as it's an
         # RLlib ModelV2).
         input_dict = {"obs": input_}
@@ -120,30 +126,36 @@ def get_single_q_value(self, underlying_output, action):
 
 
 # __sphinx_doc_model_api_torch_start__
-class TorchContActionQModel(TorchModelV2):  # or: TFModelV2
+class TorchContActionQModel(TorchModelV2):
     """A simple, q-value-from-cont-action model (for e.g. SAC type algos)."""
 
     def __init__(self, obs_space, action_space, num_outputs, model_config,
                  name):
+        nn.Module.__init__(self)
         # Pass num_outputs=None into super constructor (so that no action/
         # logits output layer is built).
         # Alternatively, you can pass in num_outputs=[last layer size of
         # config[model][fcnet_hiddens]] AND set no_last_linear=True, but
         # this seems more tedious as you will have to explain users of this
         # class that num_outputs is NOT the size of your Q-output layer.
-        super(ContActionQModel, self).__init__(
+        super(TorchContActionQModel, self).__init__(
             obs_space, action_space, None, model_config, name)
 
         # Now: self.num_outputs contains the last layer's size, which
         # we can use to construct the single q-value computing head.
 
         # Nest an RLlib FullyConnectedNetwork (torch or tf) into this one here
         # to be used for Q-value calculation.
+        # Use the current value of self.num_outputs, which is the wrapped
+        # model's output layer size.
         combined_space = Box(
             -1.0, 1.0, (self.num_outputs + action_space.shape[0], ))
         self.q_head = TorchFullyConnectedNetwork(
             combined_space, action_space, 1, model_config, "q_head")
 
+        # Missing here: Probably still have to provide action output layer
+        # and value layer and make sure self.num_outputs is correctly set.
+
     def get_single_q_value(self, underlying_output, action):
         # Calculate the q-value after concating the underlying output with
         # the given action.