Utils: Added simple Adam gradient descent implementation.

Svalorzen · Svalorzen · commit 7046d767a8f0 · 2021-04-23T16:28:09.000+02:00
This is a simple implementation of Adam. It tries to be as
straightforward as possible, and extremely cheap to call and update
(although not to build given the internal Vector allocation required for
the moment estimates).

It currently doesn't have much use in the library, but hopefully when I
get around to pushing GPs it will make more sense. In the meantime it
cannot hurt to have it around in Utils.
diff --git a/include/AIToolbox/Utils/Adam.hpp b/include/AIToolbox/Utils/Adam.hpp
@@ -0,0 +1,124 @@
+#ifndef AI_TOOLBOX_ADAM_HEADER_FILE
+#define AI_TOOLBOX_ADAM_HEADER_FILE
+
+#include <AIToolbox/Types.hpp>
+
+namespace AIToolbox {
+    /**
+     * @brief This class implements the ADAM gradient descent algorithm.
+     *
+     * This class keeps things simple and fast. It takes two pointers to two
+     * equally-sized vectors; one used to track the currently examined point,
+     * and the other to provide Adam with the gradient.
+     *
+     * This class expects you to compute the gradient of the currently examined
+     * point. At each step() call, the point vector is updated following the
+     * gradient using the Adam algorithm.
+     *
+     * We take pointers rather than references so that the pointers can be
+     * updated as needed, while the class instance kept around. This only works
+     * if the new vectors have the same size as before, but it allows to avoid
+     * reallocation of the internal helper vectors.
+     */
+    class Adam {
+        public:
+            /**
+             * @brief Basic constructor.
+             *
+             * We expect the pointers to not be null, and the vectors to be preallocated.
+             *
+             * The point vector should contain the point where to start the
+             * gradient descent process. The gradient vector should contain
+             * the gradient at that point.
+             *
+             * @param point A pointer to preallocated space where to write the point.
+             * @param gradient A pointer to preallocated space containing the current gradient.
+             * @param alpha Adam's step size/learning rate.
+             * @param beta1 Adam's exponential decay rate for first moment estimates.
+             * @param beta2 Adam's exponential decay rate for second moment estimates.
+             * @param epsilon Additive parameter to prevent division by zero.
+             */
+            Adam(AIToolbox::Vector * point, const AIToolbox::Vector * gradient, double alpha = 0.001, double beta1 = 0.9, double beta2 = 0.999, double epsilon = 1e-8);
+
+            /**
+             * @brief This function updates the point using the currently set gradient.
+             *
+             * This function overwrites the vector pointed by the `point`
+             * pointer, by following the currently set gradient.
+             *
+             * It is expected that the gradient is correct and has been updated
+             * by the user before calling this function.
+             */
+            void step();
+
+            /**
+             * @brief This function resets the gradient descent process.
+             *
+             * This function clears all internal values so that the gradient
+             * descent process can be restarted from scratch.
+             *
+             * The point vector is not modified.
+             */
+            void reset();
+
+            /**
+             * @brief This function resets the gradient descent process.
+             *
+             * This function clears all internal values so that the gradient
+             * descent process can be restarted from scratch.
+             *
+             * The point and gradient pointers are updated with the new inputs.
+             */
+            void reset(AIToolbox::Vector * point, const AIToolbox::Vector * gradient);
+
+            /**
+             * @brief This function sets the current learning rate.
+             */
+            void setAlpha(double alpha);
+
+            /**
+             * @brief This function sets the current exponential decay rate for first moment estimates.
+             */
+            void setBeta1(double beta1);
+
+            /**
+             * @brief This function sets the current exponential decay rate for second moment estimates.
+             */
+            void setBeta2(double beta2);
+
+            /**
+             * @brief This function sets the current additive division parameter.
+             */
+            void setEpsilon(double epsilon);
+
+            /**
+             * @brief This function returns the current learning rate.
+             */
+            double getAlpha() const;
+
+            /**
+             * @brief This function returns the current exponential decay rate for first moment estimates.
+             */
+            double getBeta1() const;
+
+            /**
+             * @brief This function returns the current exponential decay rate for second moment estimates.
+             */
+            double getBeta2() const;
+
+            /**
+             * @brief This function returns the current additive division parameter.
+             */
+            double getEpsilon() const;
+
+        private:
+            AIToolbox::Vector * point_;
+            const AIToolbox::Vector * gradient_;
+            AIToolbox::Vector m_, v_;
+
+            double beta1_, beta2_, alpha_, epsilon_;
+            unsigned step_;
+    };
+}
+
+#endif
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
@@ -4,6 +4,7 @@ if (MAKE_MDP)
     add_library(AIToolboxMDP
         Impl/Seeder.cpp
         Impl/CassandraParser.cpp
+        Utils/Adam.cpp
         Utils/Combinatorics.cpp
         Utils/Probability.cpp
         Utils/Polytope.cpp
diff --git a/src/Utils/Adam.cpp b/src/Utils/Adam.cpp
@@ -0,0 +1,48 @@
+#include <AIToolbox/Utils/Adam.hpp>
+
+namespace AIToolbox {
+    Adam::Adam(AIToolbox::Vector * point, const AIToolbox::Vector * gradient, const double alpha, const double beta1, const double beta2, const double epsilon) :
+        point_(point), gradient_(gradient),
+        m_(point_->size()), v_(point_->size()),
+        beta1_(beta1), beta2_(beta2), alpha_(alpha), epsilon_(epsilon),
+        step_(1)
+    {
+        reset();
+    }
+
+    void Adam::step() {
+        assert(point);
+        assert(gradient);
+
+        m_ = beta1_ * m_ + (1.0 - beta1_) * (*gradient_);
+        v_ = beta2_ * v_ + (1.0 - beta2_) * (*gradient_).array().square().matrix();
+
+        const double alphaHat = alpha_ * std::sqrt(1.0 - std::pow(beta2_, step_)) / (1.0 - std::pow(beta1_, step_));
+
+        (*point_).array() -= alphaHat * m_.array() / (v_.array().sqrt() + epsilon_);
+
+        ++step_;
+    }
+
+    void Adam::reset() {
+        m_.fill(0.0);
+        v_.fill(0.0);
+        step_ = 1;
+    }
+
+    void Adam::reset(AIToolbox::Vector * point, const AIToolbox::Vector * gradient) {
+        point_ = point;
+        gradient_ = gradient;
+        reset();
+    }
+
+    void Adam::setBeta1(double beta1) { beta1_ = beta1; }
+    void Adam::setBeta2(double beta2) { beta2_ = beta2; }
+    void Adam::setAlpha(double alpha) { alpha_ = alpha; }
+    void Adam::setEpsilon(double epsilon) { epsilon_ = epsilon; }
+
+    double Adam::getBeta1() const { return beta1_; }
+    double Adam::getBeta2() const { return beta2_; }
+    double Adam::getAlpha() const { return alpha_; }
+    double Adam::getEpsilon() const { return epsilon_; }
+}
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
@@ -3,6 +3,7 @@ cmake_minimum_required(VERSION 3.9) # CMP0069 NEW
 set(GlobalFileDependencies
     ${PROJECT_SOURCE_DIR}/src/Impl/Seeder.cpp
     ${PROJECT_SOURCE_DIR}/src/Tools/Statistics.cpp
+    ${PROJECT_SOURCE_DIR}/src/Utils/Adam.cpp
     ${PROJECT_SOURCE_DIR}/src/Utils/Combinatorics.cpp
     ${PROJECT_SOURCE_DIR}/src/Utils/Probability.cpp
     ${PROJECT_SOURCE_DIR}/src/Utils/LP/LpSolveWrapper.cpp
@@ -39,6 +40,7 @@ function (AddTestPython type name)
 endfunction (AddTestPython)
 
 if (MAKE_MDP)
+    AddTestGlobal(UtilsAdam)
     AddTestGlobal(UtilsCore)
     AddTestGlobal(UtilsProbability)
     AddTestGlobal(UtilsPrune)
diff --git a/test/UtilsAdamTests.cpp b/test/UtilsAdamTests.cpp
@@ -0,0 +1,39 @@
+#define BOOST_TEST_MODULE UtilsAdam
+#define BOOST_TEST_DYN_LINK
+#define BOOST_TEST_MAIN
+#include <boost/test/unit_test.hpp>
+
+#include <AIToolbox/Types.hpp>
+#include <AIToolbox/Utils/Adam.hpp>
+
+namespace ai = AIToolbox;
+
+double objective(const ai::Vector & p) {
+    return p.squaredNorm();
+}
+
+void derivative(const ai::Vector & p, ai::Vector & grad) {
+    grad[0] = 2 * p[0];
+    grad[1] = 2 * p[1];
+}
+
+BOOST_AUTO_TEST_CASE( simple_gradient_descent ) {
+    using namespace AIToolbox;
+
+    ai::Vector point(2);
+    point << -0.21, 0.47;
+
+    ai::Vector gradient(2);
+    derivative(point, gradient);
+
+    ai::Adam adam(&point, &gradient, 0.02);
+
+    for (auto i = 0; i < 100; ++i) {
+        adam.step();
+        derivative(point, gradient);
+    }
+
+    double val = objective(point);
+    BOOST_TEST_INFO(val);
+    BOOST_CHECK(val < 1e-5);
+}