Svalorzen
diff --git a/‎README.md
+155-147 b/‎README.md
+155-147
diff --git a/‎include/AIToolbox/Bandit/Model.hpp
+116 b/‎include/AIToolbox/Bandit/Model.hpp
+116
diff --git a/‎include/AIToolbox/Factored/Bandit/FlattenedModel.hpp
+96 b/‎include/AIToolbox/Factored/Bandit/FlattenedModel.hpp
+96
diff --git a/‎include/AIToolbox/Factored/Bandit/Model.hpp
+121 b/‎include/AIToolbox/Factored/Bandit/Model.hpp
+121
diff --git a/‎include/AIToolbox/POMDP/Algorithms/LinearSupport.hpp
+1-1 b/‎include/AIToolbox/POMDP/Algorithms/LinearSupport.hpp
+1-1
@@ -0,0 +1,116 @@
+#ifndef AI_TOOLBOX_BANDIT_MODEL_HEADER_FILE
+#define AI_TOOLBOX_BANDIT_MODEL_HEADER_FILE
+
+#include <AIToolbox/Types.hpp>
+#include <AIToolbox/Impl/Seeder.hpp>
+
+namespace AIToolbox::Bandit {
+    /**
+     * @brief This class represent a multi-armed bandit.
+     *
+     * This class contains a set of distributions, each of which corresponds to
+     * a specific bandit arm. The arms are all assumed to be of the same
+     * family; we could work with different distributions but it would
+     * complicate the code for something that is not really commonly used.
+     *
+     * The class is fairly easy to use, as one can only pull a given arm and
+     * obtain a sampled reward in return.
+     *
+     * The distribution is assumed to be one of the standard C++ distributions.
+     * Custom ones may be used, as long as they can be sampled by passing a
+     * RandomEngine to their operator().
+     *
+     * @tparam Dist The distribution family to use for all arms.
+     */
+    template <typename Dist>
+    class Model {
+        public:
+            /**
+             * @brief Basic constructor.
+             *
+             * We take as input a variable number of tuples (possibly
+             * containing different types). Each tuple is used to initialize a
+             * single arm.
+             *
+             * The number of arms will be equal to the number of tuples passed
+             * as arguments.
+             *
+             * @param tupleArgs A set tuples, each containing the parameters to initialize an arm.
+             */
+            template <typename... TupleArgs>
+            Model(TupleArgs... tupleArgs);
+
+            /**
+             * @brief Basic constructor.
+             *
+             * This constructor initializes each arm from one of the tuples
+             * contained by the parameter.
+             *
+             * The number of arms will be equal to the size of the input
+             * vector.
+             *
+             * @param args The arguments with which to initialize the bandit arms.
+             */
+            template <typename... Args>
+            Model(std::vector<std::tuple<Args...>> args);
+
+            /**
+             * @brief This function samples the specified bandit arm.
+             *
+             * @param a The arm to sample.
+             *
+             * @return A return sampled from the arm's underlying distribution.
+             */
+            double sampleR(size_t a) const;
+
+            /**
+             * @brief This function returns the number of arms of the bandit.
+             *
+             * @return The number of arms of the bandit.
+             */
+            size_t getA() const;
+
+            /**
+             * @brief This function returns a reference to the underlying arms.
+             *
+             * @return A vector containing the arms of the bandit.
+             */
+            const std::vector<Dist> & getArms() const;
+
+        private:
+            mutable std::vector<Dist> arms_;
+            mutable AIToolbox::RandomEngine rand_;
+    };
+
+    template <typename Dist>
+    template <typename... TupleArgs>
+    Model<Dist>::Model(TupleArgs... tupleArgs) :
+        arms_({std::make_from_tuple<Dist>(std::move(tupleArgs))...}), rand_(AIToolbox::Impl::Seeder::getSeed())
+    {}
+
+    template <typename Dist>
+    template <typename... Args>
+    Model<Dist>::Model(std::vector<std::tuple<Args...>> args) :
+        rand_(AIToolbox::Impl::Seeder::getSeed())
+    {
+        arms_.reserve(args.size());
+
+        // Here we use a lambda to avoid having to static_cast the correct
+        // emplace_back method on the vector.
+        for (auto && t : args)
+            std::apply([this](auto&&... params){arms_.emplace_back(std::move(params)...);}, std::move(t));
+    }
+
+    template <typename Dist>
+    double Model<Dist>::sampleR(const size_t a) const {
+        return arms_[a](rand_);
+    }
+
+    template <typename Dist>
+    size_t Model<Dist>::getA() const { return arms_.size(); }
+
+    template <typename Dist>
+    const std::vector<Dist> & Model<Dist>::getArms() const { return arms_; }
+}
+
+#endif
@@ -0,0 +1,96 @@
+#ifndef AI_TOOLBOX_FACTORED_BANDIT_FLATTENED_MODEL_HEADER_FILE
+#define AI_TOOLBOX_FACTORED_BANDIT_FLATTENED_MODEL_HEADER_FILE
+
+#include <AIToolbox/Factored/Bandit/Model.hpp>
+
+namespace AIToolbox::Factored::Bandit {
+    /**
+     * @brief This class flattens a factored bandit model.
+     *
+     * This class allows to flatten a factored bandit model back into its
+     * equivalent single-agent multi-armed bandit. This class is simply a
+     * wrapper, and does not copy nor store the original model. Instead, all
+     * conversions between joint-actions and flattened actions are done
+     * on-the-fly as needed.
+     *
+     * Note that flattening the problem makes it harder, as the new bandit has
+     * an effective action space equal to the full product of all the agents'
+     * actions in the original problem, and does not get access to the
+     * structure of the factorization.
+     *
+     * @tparam Dist The distribution to use for all arms.
+     */
+    template <typename Dist>
+    class FlattenedModel {
+        public:
+            /**
+             * @brief Basic constructor.
+             *
+             * @param model The factored multi-armed bandit to wrap.
+             */
+            FlattenedModel(const Model<Dist> & model);
+
+            /**
+             * @brief This function samples the specified bandit arm.
+             *
+             * This function converts the input action into its equivalent
+             * joint-action for the wrapped model. It then returns the sum of
+             * the obtained reward vector.
+             *
+             * @param a The arm to sample.
+             *
+             * @return The sampled reward for the selected arm.
+             */
+            double sampleR(size_t a) const;
+
+            /**
+             * @brief This function converts the input action to its equivalent joint-action.
+             *
+             * @param a The input action.
+             *
+             * @return The equivalent joint-action for the wrapped bandit.
+             */
+            Action convertA(size_t a) const;
+
+            /**
+             * @brief This function returns the number of arms of the bandit.
+             *
+             * This value is pre-computed, not computed on the fly, to keep
+             * this function fast.
+             *
+             * @return The number of arms of the bandit.
+             */
+            size_t getA() const;
+
+            /**
+             * @brief This function returns a reference to the wrapped factored bandit.
+             *
+             * @return The wrapped factored bandit.
+             */
+            const Model<Dist> & getModel() const;
+
+        private:
+            const Model<Dist> & model_;
+
+            size_t A;
+            mutable Action helper_;
+    };
+
+    template <typename Dist>
+    FlattenedModel<Dist>::FlattenedModel(const Model<Dist> & model) :
+            model_(model), A(factorSpace(model.getA())), helper_(model.getA().size())
+    {}
+
+    template <typename Dist>
+    double FlattenedModel<Dist>::sampleR(size_t a) const {
+        toFactors(model_.getA(), a, &helper_);
+        return model_.sampleR(helper_).sum();
+    }
+
+    template <typename Dist>
+    size_t FlattenedModel<Dist>::getA() const { return A; }
+    template <typename Dist>
+    const Model<Dist> & FlattenedModel<Dist>::getModel() const { return model_; }
+}
+
+#endif
@@ -0,0 +1,121 @@
+#ifndef AI_TOOLBOX_FACTORED_BANDIT_MODEL_HEADER_FILE
+#define AI_TOOLBOX_FACTORED_BANDIT_MODEL_HEADER_FILE
+
+#include <AIToolbox/Bandit/Model.hpp>
+#include <AIToolbox/Factored/Bandit/Types.hpp>
+#include <AIToolbox/Factored/Utils/Core.hpp>
+
+namespace AIToolbox::Factored::Bandit {
+    /**
+     * @brief This class represents a factored multi-armed bandit.
+     *
+     * A factored multi-armed bandit is a specific bandit class, where the
+     * reward function is factored into independent components, each of which
+     * only depends on a subset of agents. The goal is generally to maximize
+     * the sum of the rewards of all local arms.
+     *
+     * It effectively behaves as a collection of multi-armed bandits, aside
+     * from the fact that the action each agent takes will be the same in all
+     * bandits that it participates into. Each "local" bandit's effective
+     * action will be the combination of all participating agents.
+     *
+     * This structure can make learning learning how to act much more
+     * efficient, as exploiting the factorization allows to extract more
+     * information from each joint action performed by the agents.
+     *
+     * @tparam Dist The distribution to use for all local arms.
+     */
+    template <typename Dist>
+    class Model {
+        public:
+            /**
+             * @brief Basic constructor.
+             *
+             * This constructor creates the factored multi-armed bandit from a
+             * set of standard bandits, each associated with a group of agents.
+             *
+             * Note that the action space of each bandit must be equal to the
+             * product of the action spaces of all agents in its group. For
+             * example, a bandit associated with agents with action spaces 2,
+             * 3, 2 should have 12 arms in total.
+             *
+             * @param A The joint action space.
+             * @param deps The agents associated with each bandit.
+             * @param arms The local bandits to use.
+             */
+            template <typename... TupleArgs>
+            Model(Action A, std::vector<PartialKeys> deps, std::vector<AIToolbox::Bandit::Model<Dist>> arms);
+
+            /**
+             * @brief This function samples the specified joint bandit arm.
+             *
+             * @param a The joint arm to sample.
+             *
+             * @return A vector containing the rewards of each local arm.
+             */
+            Rewards sampleR(const Action & a) const;
+
+            /**
+             * @brief This function returns the joint action space.
+             */
+            const Action & getA() const;
+
+            /**
+             * @brief This function returns a reference to the agent groupings.
+             */
+            const std::vector<PartialKeys> & getGroups() const;
+
+            /**
+             * @brief This function returns a reference to the internal local arms.
+             */
+            const std::vector<AIToolbox::Bandit::Model<Dist>> & getArms() const;
+
+        private:
+            Action A;
+            std::vector<PartialKeys> groups_;
+
+            mutable std::vector<AIToolbox::Bandit::Model<Dist>> arms_;
+    };
+
+    template <typename Dist>
+    template <typename... TupleArgs>
+    Model<Dist>::Model(Action a, std::vector<PartialKeys> deps, std::vector<AIToolbox::Bandit::Model<Dist>> arms) :
+            A(std::move(a)), groups_(std::move(deps)), arms_(std::move(arms))
+    {
+        // Sanity checks
+        //
+        // - The number of groups is equal to the number of local arms.
+        // - Each local arm has an action space equal to the product
+        //   of its participating agents.
+        assert(groups_.size() == arms_.size());
+
+        for (size_t i = 0; i < groups_.size(); ++i) {
+            const auto bSize = factorSpacePartial(groups_[i], A);
+            (void)bSize;
+
+            assert(bSize == arms_[i].getA());
+        }
+    }
+
+    template <typename Dist>
+    Rewards Model<Dist>::sampleR(const Action & a) const {
+        Rewards rews(groups_.size());
+
+        for (size_t i = 0; i < groups_.size(); ++i) {
+            const auto aid = toIndexPartial(groups_[i], A, a);
+
+            rews[i] = arms_[i].sampleR(aid);
+        }
+
+        return rews;
+    }
+
+    template <typename Dist>
+    const Action & Model<Dist>::getA() const { return A; }
+    template <typename Dist>
+    const std::vector<PartialKeys> & Model<Dist>::getGroups() const { return groups_; }
+    template <typename Dist>
+    const std::vector<AIToolbox::Bandit::Model<Dist>> & Model<Dist>::getArms() const { return arms_; }
+}
+
+#endif
@@ -63,7 +63,7 @@ namespace AIToolbox::POMDP {
              * as the difference between two iterations is less than the
              * tolerance specified.
              *
-             * @param h The horizon chosen.
+             * @param horizon The horizon chosen.
              * @param tolerance The tolerance factor to stop the value iteration loop.
              */
             LinearSupport(unsigned horizon, double tolerance);
Original file line number	Diff line number	Diff line change
`@@ -63,7 +63,7 @@ namespace AIToolbox::POMDP {`
`63`	`63`	`* as the difference between two iterations is less than the`
`64`	`64`	`* tolerance specified.`
`65`	`65`	`*`
`66`		`- * @param h The horizon chosen.`
	`66`	`+ * @param horizon The horizon chosen.`
`67`	`67`	`* @param tolerance The tolerance factor to stop the value iteration loop.`
`68`	`68`	`*/`
`69`	`69`	`LinearSupport(unsigned horizon, double tolerance);`