Add Bernoulli distribution (#10)

Isaac Poulton · web-flow · commit 6460f0a27d52 · 2019-04-23T12:47:46.000+09:00
* Add Bernoulli distribution

* Refactor Distribution::extended_shape
diff --git a/example/gym_client.cpp b/example/gym_client.cpp
@@ -15,16 +15,16 @@ using namespace cpprl;
 
 // Algorithm hyperparameters
 const std::string algorithm = "PPO";
-const int batch_size = 256;
+const int batch_size = 2048;
 const float clip_param = 0.2;
 const float discount_factor = 0.99;
-const float entropy_coef = 0.0;
+const float entropy_coef = 0.001;
 const float gae = 0.95;
-const float learning_rate = 3e-4;
+const float learning_rate = 2.5e-4;
 const int log_interval = 1;
-const int max_frames = 1e+7;
+const int max_frames = 10e+7;
 const int num_epoch = 10;
-const int num_mini_batch = 8;
+const int num_mini_batch = 32;
 const int reward_average_window_size = 10;
 const bool use_gae = true;
 const bool use_lr_decay = true;
@@ -33,12 +33,12 @@ const float value_loss_coef = 0.5;
 // Environment hyperparameters
 const float env_gamma = discount_factor; // Set to -1 to disable
 const std::string env_name = "BipedalWalkerHardcore-v2";
-const int num_envs = 8;
-const float render_reward_threshold = 250;
+const int num_envs = 16;
+const float render_reward_threshold = 160;
 
 // Model hyperparameters
 const int hidden_size = 64;
-const bool recurrent = true;
+const bool recurrent = false;
 const bool use_cuda = false;
 
 std::vector<float> flatten_vector(std::vector<float> const &input)
diff --git a/include/cpprl/distributions/bernoulli.h b/include/cpprl/distributions/bernoulli.h
@@ -0,0 +1,25 @@
+#pragma once
+
+#include <c10/util/ArrayRef.h>
+#include <torch/torch.h>
+
+#include "cpprl/distributions/distribution.h"
+
+namespace cpprl
+{
+class Bernoulli : public Distribution
+{
+  private:
+    torch::Tensor probs, logits, param;
+
+  public:
+    Bernoulli(const torch::Tensor *probs, const torch::Tensor *logits);
+
+    torch::Tensor entropy();
+    torch::Tensor log_prob(torch::Tensor value);
+    torch::Tensor sample(c10::ArrayRef<int64_t> sample_shape = {});
+
+    inline torch::Tensor get_logits() { return logits; }
+    inline torch::Tensor get_probs() { return probs; }
+};
+}
diff --git a/include/cpprl/distributions/categorical.h b/include/cpprl/distributions/categorical.h
@@ -10,15 +10,9 @@ namespace cpprl
 class Categorical : public Distribution
 {
   private:
-    torch::Tensor probs;
-    torch::Tensor logits;
-    std::vector<int64_t> batch_shape;
-    std::vector<int64_t> event_shape;
-    torch::Tensor param;
+    torch::Tensor probs, logits, param;
     int num_events;
 
-    std::vector<int64_t> extended_shape(c10::ArrayRef<int64_t> sample_shape);
-
   public:
     Categorical(const torch::Tensor *probs, const torch::Tensor *logits);
 
diff --git a/include/cpprl/distributions/distribution.h b/include/cpprl/distributions/distribution.h
@@ -6,6 +6,11 @@ namespace cpprl
 {
 class Distribution
 {
+  protected:
+    std::vector<int64_t> batch_shape, event_shape;
+
+    std::vector<int64_t> extended_shape(c10::ArrayRef<int64_t> sample_shape);
+
   public:
     virtual ~Distribution() = 0;
 
diff --git a/include/cpprl/distributions/normal.h b/include/cpprl/distributions/normal.h
@@ -11,9 +11,6 @@ class Normal : public Distribution
 {
   private:
     torch::Tensor loc, scale;
-    std::vector<int64_t> batch_shape, event_shape;
-
-    std::vector<int64_t> extended_shape(c10::ArrayRef<int64_t> sample_shape);
 
   public:
     Normal(const torch::Tensor loc, const torch::Tensor scale);
diff --git a/src/distributions/CMakeLists.txt b/src/distributions/CMakeLists.txt
@@ -1,13 +1,17 @@
 target_sources(cpprl
     PRIVATE
+    ${CMAKE_CURRENT_LIST_DIR}/bernoulli.cpp
     ${CMAKE_CURRENT_LIST_DIR}/categorical.cpp
+    ${CMAKE_CURRENT_LIST_DIR}/distribution.cpp
     ${CMAKE_CURRENT_LIST_DIR}/normal.cpp
 )
 
 if (CPPRL_BUILD_TESTS)
     target_sources(cpprl_tests
         PRIVATE
+        ${CMAKE_CURRENT_LIST_DIR}/bernoulli.cpp
         ${CMAKE_CURRENT_LIST_DIR}/categorical.cpp
+        ${CMAKE_CURRENT_LIST_DIR}/distribution.cpp
         ${CMAKE_CURRENT_LIST_DIR}/normal.cpp
     )
 endif (CPPRL_BUILD_TESTS)
diff --git a/src/distributions/bernoulli.cpp b/src/distributions/bernoulli.cpp
@@ -0,0 +1,168 @@
+#include <ATen/core/Reduction.h>
+#include <c10/util/ArrayRef.h>
+#include <spdlog/spdlog.h>
+#include <torch/torch.h>
+
+#include "cpprl/distributions/bernoulli.h"
+#include "third_party/doctest.h"
+
+namespace cpprl
+{
+Bernoulli::Bernoulli(const torch::Tensor *probs,
+                     const torch::Tensor *logits)
+{
+    if ((probs == nullptr) == (logits == nullptr))
+    {
+        spdlog::error("Either probs or logits is required, but not both");
+        throw std::exception();
+    }
+
+    if (probs != nullptr)
+    {
+        if (probs->dim() < 1)
+        {
+            throw std::exception();
+        }
+        this->probs = *probs;
+        // 1.21e-7 is used as the epsilon to match PyTorch's Python results as closely
+        // as possible
+        auto clamped_probs = this->probs.clamp(1.21e-7, 1. - 1.21e-7);
+        this->logits = torch::log(clamped_probs) - torch::log1p(-clamped_probs);
+    }
+    else
+    {
+        if (logits->dim() < 1)
+        {
+            throw std::exception();
+        }
+        this->logits = *logits;
+        this->probs = torch::sigmoid(*logits);
+    }
+
+    param = probs != nullptr ? *probs : *logits;
+    batch_shape = param.sizes().vec();
+}
+
+torch::Tensor Bernoulli::entropy()
+{
+    return torch::binary_cross_entropy_with_logits(logits, probs, torch::Tensor(), torch::Tensor(), Reduction::None);
+}
+
+torch::Tensor Bernoulli::log_prob(torch::Tensor value)
+{
+    auto broadcasted_tensors = torch::broadcast_tensors({logits, value});
+    return -torch::binary_cross_entropy_with_logits(broadcasted_tensors[0], broadcasted_tensors[1], torch::Tensor(), torch::Tensor(), Reduction::None);
+}
+
+torch::Tensor Bernoulli::sample(c10::ArrayRef<int64_t> sample_shape)
+{
+    auto ext_sample_shape = extended_shape(sample_shape);
+    torch::NoGradGuard no_grad_guard;
+    return torch::bernoulli(probs.expand(ext_sample_shape));
+}
+
+TEST_CASE("Bernoulli")
+{
+    SUBCASE("Throws when provided both probs and logits")
+    {
+        auto tensor = torch::Tensor();
+        CHECK_THROWS(Bernoulli(&tensor, &tensor));
+    }
+
+    SUBCASE("Sampled numbers are in the right range")
+    {
+        float probabilities[] = {0.2, 0.2, 0.2, 0.2, 0.2};
+        auto probabilities_tensor = torch::from_blob(probabilities, {5});
+        auto dist = Bernoulli(&probabilities_tensor, nullptr);
+
+        auto output = dist.sample({100});
+        auto more_than_1 = output > 1;
+        auto less_than_0 = output < 0;
+        CHECK(!more_than_1.any().item().toInt());
+        CHECK(!less_than_0.any().item().toInt());
+    }
+
+    SUBCASE("Sampled tensors are of the right shape")
+    {
+        float probabilities[] = {0.2, 0.2, 0.2, 0.2, 0.2};
+        auto probabilities_tensor = torch::from_blob(probabilities, {5});
+        auto dist = Bernoulli(&probabilities_tensor, nullptr);
+
+        CHECK(dist.sample({20}).sizes().vec() == std::vector<int64_t>{20, 5});
+        CHECK(dist.sample({2, 20}).sizes().vec() == std::vector<int64_t>{2, 20, 5});
+        CHECK(dist.sample({1, 2, 3, 4}).sizes().vec() == std::vector<int64_t>{1, 2, 3, 4, 5});
+    }
+
+    SUBCASE("Multi-dimensional input probabilities are handled correctly")
+    {
+        SUBCASE("Sampled tensors are of the right shape")
+        {
+            float probabilities[2][4] = {{0.5, 0.5, 0.0, 0.0},
+                                         {0.25, 0.25, 0.25, 0.25}};
+            auto probabilities_tensor = torch::from_blob(probabilities, {2, 4});
+            auto dist = Bernoulli(&probabilities_tensor, nullptr);
+
+            CHECK(dist.sample({20}).sizes().vec() == std::vector<int64_t>{20, 2, 4});
+            CHECK(dist.sample({10, 5}).sizes().vec() == std::vector<int64_t>{10, 5, 2, 4});
+        }
+    }
+
+    SUBCASE("entropy()")
+    {
+        float probabilities[2][2] = {{0.5, 0.0},
+                                     {0.25, 0.25}};
+        auto probabilities_tensor = torch::from_blob(probabilities, {2, 2});
+        auto dist = Bernoulli(&probabilities_tensor, nullptr);
+
+        auto entropies = dist.entropy();
+
+        SUBCASE("Returns correct values")
+        {
+            CHECK(entropies[0][0].item().toDouble() ==
+                  doctest::Approx(0.6931).epsilon(1e-3));
+            CHECK(entropies[0][1].item().toDouble() ==
+                  doctest::Approx(0.0000).epsilon(1e-3));
+            CHECK(entropies[1][0].item().toDouble() ==
+                  doctest::Approx(0.5623).epsilon(1e-3));
+            CHECK(entropies[1][1].item().toDouble() ==
+                  doctest::Approx(0.5623).epsilon(1e-3));
+        }
+
+        SUBCASE("Output tensor is the correct size")
+        {
+            CHECK(entropies.sizes().vec() == std::vector<int64_t>{2, 2});
+        }
+    }
+
+    SUBCASE("log_prob()")
+    {
+        float probabilities[2][2] = {{0.5, 0.0},
+                                     {0.25, 0.25}};
+        auto probabilities_tensor = torch::from_blob(probabilities, {2, 2});
+        auto dist = Bernoulli(&probabilities_tensor, nullptr);
+
+        float actions[2][2] = {{1, 0},
+                               {1, 0}};
+        auto actions_tensor = torch::from_blob(actions, {2, 2});
+        auto log_probs = dist.log_prob(actions_tensor);
+
+        INFO(log_probs << "\n");
+        SUBCASE("Returns correct values")
+        {
+            CHECK(log_probs[0][0].item().toDouble() ==
+                  doctest::Approx(-0.6931).epsilon(1e-3));
+            CHECK(log_probs[0][1].item().toDouble() ==
+                  doctest::Approx(0.0000).epsilon(1e-3));
+            CHECK(log_probs[1][0].item().toDouble() ==
+                  doctest::Approx(-1.3863).epsilon(1e-3));
+            CHECK(log_probs[1][1].item().toDouble() ==
+                  doctest::Approx(-0.2876).epsilon(1e-3));
+        }
+
+        SUBCASE("Output tensor is correct size")
+        {
+            CHECK(log_probs.sizes().vec() == std::vector<int64_t>{2, 2});
+        }
+    }
+}
+}
diff --git a/src/distributions/categorical.cpp b/src/distributions/categorical.cpp
@@ -1,4 +1,5 @@
 #include <c10/util/ArrayRef.h>
+#include <spdlog/spdlog.h>
 #include <torch/torch.h>
 
 #include "cpprl/distributions/categorical.h"
@@ -11,6 +12,7 @@ Categorical::Categorical(const torch::Tensor *probs,
 {
     if ((probs == nullptr) == (logits == nullptr))
     {
+        spdlog::error("Either probs or logits is required, but not both");
         throw std::exception();
     }
 
@@ -51,21 +53,6 @@ torch::Tensor Categorical::entropy()
     return -p_log_p.sum(-1);
 }
 
-std::vector<int64_t> Categorical::extended_shape(c10::ArrayRef<int64_t> sample_shape)
-{
-    std::vector<int64_t> output_shape;
-    output_shape.insert(output_shape.end(),
-                        sample_shape.begin(),
-                        sample_shape.end());
-    output_shape.insert(output_shape.end(),
-                        batch_shape.begin(),
-                        batch_shape.end());
-    output_shape.insert(output_shape.end(),
-                        event_shape.begin(),
-                        event_shape.end());
-    return output_shape;
-}
-
 torch::Tensor Categorical::log_prob(torch::Tensor value)
 {
     value = value.to(torch::kLong).unsqueeze(-1);
diff --git a/src/distributions/distribution.cpp b/src/distributions/distribution.cpp
@@ -0,0 +1,22 @@
+#include <vector>
+#include <ctype.h>
+
+#include "cpprl/distributions/distribution.h"
+
+namespace cpprl
+{
+std::vector<int64_t> Distribution::extended_shape(c10::ArrayRef<int64_t> sample_shape)
+{
+    std::vector<int64_t> output_shape;
+    output_shape.insert(output_shape.end(),
+                        sample_shape.begin(),
+                        sample_shape.end());
+    output_shape.insert(output_shape.end(),
+                        batch_shape.begin(),
+                        batch_shape.end());
+    output_shape.insert(output_shape.end(),
+                        event_shape.begin(),
+                        event_shape.end());
+    return output_shape;
+}
+}
diff --git a/src/distributions/normal.cpp b/src/distributions/normal.cpp
@@ -26,21 +26,6 @@ torch::Tensor Normal::entropy()
     return (0.5 + 0.5 * std::log(2 * M_PI) + torch::log(scale)).sum(-1);
 }
 
-std::vector<int64_t> Normal::extended_shape(c10::ArrayRef<int64_t> sample_shape)
-{
-    std::vector<int64_t> output_shape;
-    output_shape.insert(output_shape.end(),
-                        sample_shape.begin(),
-                        sample_shape.end());
-    output_shape.insert(output_shape.end(),
-                        batch_shape.begin(),
-                        batch_shape.end());
-    output_shape.insert(output_shape.end(),
-                        event_shape.begin(),
-                        event_shape.end());
-    return output_shape;
-}
-
 torch::Tensor Normal::log_prob(torch::Tensor value)
 {
     auto variance = scale.pow(2);

Original file line number	Diff line number	Diff line change
`@@ -6,6 +6,11 @@ namespace cpprl`
`6`	`6`	`{`
`7`	`7`	`class Distribution`
`8`	`8`	`{`
	`9`	`+ protected:`
	`10`	`+ std::vector<int64_t> batch_shape, event_shape;`
	`11`	`+`
	`12`	`+ std::vector<int64_t> extended_shape(c10::ArrayRef<int64_t> sample_shape);`
	`13`	`+`
`9`	`14`	`public:`
`10`	`15`	`virtual ~Distribution() = 0;`
`11`	`16`
Original file line number	Diff line number	Diff line change
`@@ -11,9 +11,6 @@ class Normal : public Distribution`
`11`	`11`	`{`
`12`	`12`	`private:`
`13`	`13`	`torch::Tensor loc, scale;`
`14`		`- std::vector<int64_t> batch_shape, event_shape;`
`15`		`-`
`16`		`- std::vector<int64_t> extended_shape(c10::ArrayRef<int64_t> sample_shape);`
`17`	`14`
`18`	`15`	`public:`
`19`	`16`	`Normal(const torch::Tensor loc, const torch::Tensor scale);`