Add Bernoulli output layer

Isaac Poulton · Isaac Poulton · commit 9eda405a02d9 · 2019-04-24T19:02:20.000+09:00
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -37,10 +37,12 @@ add_custom_target(
 
 # Dependencies
 ## PyTorch
-find_package(Torch REQUIRED)
-if (TORCH_CXX_FLAGS)
-    set(CMAKE_CXX_FLAGS ${TORCH_CXX_FLAGS})
-endif()
+if (NOT TORCH_FOUND)
+    find_package(Torch REQUIRED)
+    if (TORCH_CXX_FLAGS)
+        set(CMAKE_CXX_FLAGS ${TORCH_CXX_FLAGS})
+    endif()
+endif (NOT TORCH_FOUND)
 
 # Define targets
 add_library(cpprl STATIC "")
diff --git a/include/cpprl/model/output_layers.h b/include/cpprl/model/output_layers.h
@@ -20,6 +20,17 @@ class OutputLayer : public nn::Module
 
 inline OutputLayer::~OutputLayer() {}
 
+class BernoulliOutput : public OutputLayer
+{
+  private:
+    nn::Linear linear;
+
+  public:
+    BernoulliOutput(unsigned int num_inputs, unsigned int num_outputs);
+
+    std::unique_ptr<Distribution> forward(torch::Tensor x);
+};
+
 class CategoricalOutput : public OutputLayer
 {
   private:
diff --git a/src/model/output_layers.cpp b/src/model/output_layers.cpp
@@ -5,6 +5,7 @@
 #include "cpprl/model/output_layers.h"
 #include "cpprl/model/model_utils.h"
 #include "cpprl/distributions/distribution.h"
+#include "cpprl/distributions/bernoulli.h"
 #include "cpprl/distributions/categorical.h"
 #include "cpprl/distributions/normal.h"
 #include "third_party/doctest.h"
@@ -13,6 +14,20 @@ using namespace torch;
 
 namespace cpprl
 {
+BernoulliOutput::BernoulliOutput(unsigned int num_inputs,
+                                 unsigned int num_outputs)
+    : linear(num_inputs, num_outputs)
+{
+    register_module("linear", linear);
+    init_weights(linear->named_parameters(), 0.01, 0);
+}
+
+std::unique_ptr<Distribution> BernoulliOutput::forward(torch::Tensor x)
+{
+    x = linear(x);
+    return std::make_unique<Bernoulli>(nullptr, &x);
+}
+
 CategoricalOutput::CategoricalOutput(unsigned int num_inputs,
                                      unsigned int num_outputs)
     : linear(num_inputs, num_outputs)
@@ -43,6 +58,24 @@ std::unique_ptr<Distribution> NormalOutput::forward(torch::Tensor x)
     return std::make_unique<Normal>(loc, scale);
 }
 
+TEST_CASE("BernoulliOutput")
+{
+    auto output_layer = BernoulliOutput(3, 5);
+
+    SUBCASE("Output distribution has correct output shape")
+    {
+        float input_array[2][3] = {{0, 1, 2}, {3, 4, 5}};
+        auto input_tensor = torch::from_blob(input_array,
+                                             {2, 3},
+                                             TensorOptions(torch::kFloat));
+        auto dist = output_layer.forward(input_tensor);
+
+        auto output = dist->sample();
+
+        CHECK(output.sizes().vec() == std::vector<int64_t>{2, 5});
+    }
+}
+
 TEST_CASE("CategoricalOutput")
 {
     auto output_layer = CategoricalOutput(3, 5);
diff --git a/src/model/policy.cpp b/src/model/policy.cpp
@@ -29,8 +29,8 @@ PolicyImpl::PolicyImpl(ActionSpace action_space, std::shared_ptr<NNBase> base)
     }
     else if (action_space.type == "MultiBinary")
     {
-        // num_outputs = action_space.shape[0];
-        // self.dist = Bernoulli(self.base.output_size, num_outputs)
+        output_layer = std::make_shared<BernoulliOutput>(
+            base->get_output_size(), num_outputs);
     }
     else
     {

Original file line number	Diff line number	Diff line change
`@@ -29,8 +29,8 @@ PolicyImpl::PolicyImpl(ActionSpace action_space, std::shared_ptr<NNBase> base)`
`29`	`29`	`}`
`30`	`30`	`else if (action_space.type == "MultiBinary")`
`31`	`31`	`{`
`32`		`- // num_outputs = action_space.shape[0];`
`33`		`- // self.dist = Bernoulli(self.base.output_size, num_outputs)`
	`32`	`+ output_layer = std::make_shared<BernoulliOutput>(`
	`33`	`+ base->get_output_size(), num_outputs);`
`34`	`34`	`}`
`35`	`35`	`else`
`36`	`36`	`{`