Skip to content

Commit 10ec7da

Browse files
committed
Merge branch 'BanditModel' into test
2 parents 4411b8b + ca01b64 commit 10ec7da

24 files changed

+653
-170
lines changed

README.md

+155-147
Large diffs are not rendered by default.

include/AIToolbox/Bandit/Model.hpp

+116
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,116 @@
1+
#ifndef AI_TOOLBOX_BANDIT_MODEL_HEADER_FILE
2+
#define AI_TOOLBOX_BANDIT_MODEL_HEADER_FILE
3+
4+
#include <AIToolbox/Types.hpp>
5+
#include <AIToolbox/Impl/Seeder.hpp>
6+
7+
namespace AIToolbox::Bandit {
8+
/**
9+
* @brief This class represent a multi-armed bandit.
10+
*
11+
* This class contains a set of distributions, each of which corresponds to
12+
* a specific bandit arm. The arms are all assumed to be of the same
13+
* family; we could work with different distributions but it would
14+
* complicate the code for something that is not really commonly used.
15+
*
16+
* The class is fairly easy to use, as one can only pull a given arm and
17+
* obtain a sampled reward in return.
18+
*
19+
* The distribution is assumed to be one of the standard C++ distributions.
20+
* Custom ones may be used, as long as they can be sampled by passing a
21+
* RandomEngine to their operator().
22+
*
23+
* @tparam Dist The distribution family to use for all arms.
24+
*/
25+
template <typename Dist>
26+
class Model {
27+
public:
28+
/**
29+
* @brief Basic constructor.
30+
*
31+
* We take as input a variable number of tuples (possibly
32+
* containing different types). Each tuple is used to initialize a
33+
* single arm.
34+
*
35+
* The number of arms will be equal to the number of tuples passed
36+
* as arguments.
37+
*
38+
* @param tupleArgs A set tuples, each containing the parameters to initialize an arm.
39+
*/
40+
template <typename... TupleArgs>
41+
Model(TupleArgs... tupleArgs);
42+
43+
/**
44+
* @brief Basic constructor.
45+
*
46+
* This constructor initializes each arm from one of the tuples
47+
* contained by the parameter.
48+
*
49+
* The number of arms will be equal to the size of the input
50+
* vector.
51+
*
52+
* @param args The arguments with which to initialize the bandit arms.
53+
*/
54+
template <typename... Args>
55+
Model(std::vector<std::tuple<Args...>> args);
56+
57+
/**
58+
* @brief This function samples the specified bandit arm.
59+
*
60+
* @param a The arm to sample.
61+
*
62+
* @return A return sampled from the arm's underlying distribution.
63+
*/
64+
double sampleR(size_t a) const;
65+
66+
/**
67+
* @brief This function returns the number of arms of the bandit.
68+
*
69+
* @return The number of arms of the bandit.
70+
*/
71+
size_t getA() const;
72+
73+
/**
74+
* @brief This function returns a reference to the underlying arms.
75+
*
76+
* @return A vector containing the arms of the bandit.
77+
*/
78+
const std::vector<Dist> & getArms() const;
79+
80+
private:
81+
mutable std::vector<Dist> arms_;
82+
mutable AIToolbox::RandomEngine rand_;
83+
};
84+
85+
template <typename Dist>
86+
template <typename... TupleArgs>
87+
Model<Dist>::Model(TupleArgs... tupleArgs) :
88+
arms_({std::make_from_tuple<Dist>(std::move(tupleArgs))...}), rand_(AIToolbox::Impl::Seeder::getSeed())
89+
{}
90+
91+
template <typename Dist>
92+
template <typename... Args>
93+
Model<Dist>::Model(std::vector<std::tuple<Args...>> args) :
94+
rand_(AIToolbox::Impl::Seeder::getSeed())
95+
{
96+
arms_.reserve(args.size());
97+
98+
// Here we use a lambda to avoid having to static_cast the correct
99+
// emplace_back method on the vector.
100+
for (auto && t : args)
101+
std::apply([this](auto&&... params){arms_.emplace_back(std::move(params)...);}, std::move(t));
102+
}
103+
104+
template <typename Dist>
105+
double Model<Dist>::sampleR(const size_t a) const {
106+
return arms_[a](rand_);
107+
}
108+
109+
template <typename Dist>
110+
size_t Model<Dist>::getA() const { return arms_.size(); }
111+
112+
template <typename Dist>
113+
const std::vector<Dist> & Model<Dist>::getArms() const { return arms_; }
114+
}
115+
116+
#endif
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
#ifndef AI_TOOLBOX_FACTORED_BANDIT_FLATTENED_MODEL_HEADER_FILE
2+
#define AI_TOOLBOX_FACTORED_BANDIT_FLATTENED_MODEL_HEADER_FILE
3+
4+
#include <AIToolbox/Factored/Bandit/Model.hpp>
5+
6+
namespace AIToolbox::Factored::Bandit {
7+
/**
8+
* @brief This class flattens a factored bandit model.
9+
*
10+
* This class allows to flatten a factored bandit model back into its
11+
* equivalent single-agent multi-armed bandit. This class is simply a
12+
* wrapper, and does not copy nor store the original model. Instead, all
13+
* conversions between joint-actions and flattened actions are done
14+
* on-the-fly as needed.
15+
*
16+
* Note that flattening the problem makes it harder, as the new bandit has
17+
* an effective action space equal to the full product of all the agents'
18+
* actions in the original problem, and does not get access to the
19+
* structure of the factorization.
20+
*
21+
* @tparam Dist The distribution to use for all arms.
22+
*/
23+
template <typename Dist>
24+
class FlattenedModel {
25+
public:
26+
/**
27+
* @brief Basic constructor.
28+
*
29+
* @param model The factored multi-armed bandit to wrap.
30+
*/
31+
FlattenedModel(const Model<Dist> & model);
32+
33+
/**
34+
* @brief This function samples the specified bandit arm.
35+
*
36+
* This function converts the input action into its equivalent
37+
* joint-action for the wrapped model. It then returns the sum of
38+
* the obtained reward vector.
39+
*
40+
* @param a The arm to sample.
41+
*
42+
* @return The sampled reward for the selected arm.
43+
*/
44+
double sampleR(size_t a) const;
45+
46+
/**
47+
* @brief This function converts the input action to its equivalent joint-action.
48+
*
49+
* @param a The input action.
50+
*
51+
* @return The equivalent joint-action for the wrapped bandit.
52+
*/
53+
Action convertA(size_t a) const;
54+
55+
/**
56+
* @brief This function returns the number of arms of the bandit.
57+
*
58+
* This value is pre-computed, not computed on the fly, to keep
59+
* this function fast.
60+
*
61+
* @return The number of arms of the bandit.
62+
*/
63+
size_t getA() const;
64+
65+
/**
66+
* @brief This function returns a reference to the wrapped factored bandit.
67+
*
68+
* @return The wrapped factored bandit.
69+
*/
70+
const Model<Dist> & getModel() const;
71+
72+
private:
73+
const Model<Dist> & model_;
74+
75+
size_t A;
76+
mutable Action helper_;
77+
};
78+
79+
template <typename Dist>
80+
FlattenedModel<Dist>::FlattenedModel(const Model<Dist> & model) :
81+
model_(model), A(factorSpace(model.getA())), helper_(model.getA().size())
82+
{}
83+
84+
template <typename Dist>
85+
double FlattenedModel<Dist>::sampleR(size_t a) const {
86+
toFactors(model_.getA(), a, &helper_);
87+
return model_.sampleR(helper_).sum();
88+
}
89+
90+
template <typename Dist>
91+
size_t FlattenedModel<Dist>::getA() const { return A; }
92+
template <typename Dist>
93+
const Model<Dist> & FlattenedModel<Dist>::getModel() const { return model_; }
94+
}
95+
96+
#endif
+121
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,121 @@
1+
#ifndef AI_TOOLBOX_FACTORED_BANDIT_MODEL_HEADER_FILE
2+
#define AI_TOOLBOX_FACTORED_BANDIT_MODEL_HEADER_FILE
3+
4+
#include <AIToolbox/Bandit/Model.hpp>
5+
#include <AIToolbox/Factored/Bandit/Types.hpp>
6+
#include <AIToolbox/Factored/Utils/Core.hpp>
7+
8+
namespace AIToolbox::Factored::Bandit {
9+
/**
10+
* @brief This class represents a factored multi-armed bandit.
11+
*
12+
* A factored multi-armed bandit is a specific bandit class, where the
13+
* reward function is factored into independent components, each of which
14+
* only depends on a subset of agents. The goal is generally to maximize
15+
* the sum of the rewards of all local arms.
16+
*
17+
* It effectively behaves as a collection of multi-armed bandits, aside
18+
* from the fact that the action each agent takes will be the same in all
19+
* bandits that it participates into. Each "local" bandit's effective
20+
* action will be the combination of all participating agents.
21+
*
22+
* This structure can make learning learning how to act much more
23+
* efficient, as exploiting the factorization allows to extract more
24+
* information from each joint action performed by the agents.
25+
*
26+
* @tparam Dist The distribution to use for all local arms.
27+
*/
28+
template <typename Dist>
29+
class Model {
30+
public:
31+
/**
32+
* @brief Basic constructor.
33+
*
34+
* This constructor creates the factored multi-armed bandit from a
35+
* set of standard bandits, each associated with a group of agents.
36+
*
37+
* Note that the action space of each bandit must be equal to the
38+
* product of the action spaces of all agents in its group. For
39+
* example, a bandit associated with agents with action spaces 2,
40+
* 3, 2 should have 12 arms in total.
41+
*
42+
* @param A The joint action space.
43+
* @param deps The agents associated with each bandit.
44+
* @param arms The local bandits to use.
45+
*/
46+
template <typename... TupleArgs>
47+
Model(Action A, std::vector<PartialKeys> deps, std::vector<AIToolbox::Bandit::Model<Dist>> arms);
48+
49+
/**
50+
* @brief This function samples the specified joint bandit arm.
51+
*
52+
* @param a The joint arm to sample.
53+
*
54+
* @return A vector containing the rewards of each local arm.
55+
*/
56+
Rewards sampleR(const Action & a) const;
57+
58+
/**
59+
* @brief This function returns the joint action space.
60+
*/
61+
const Action & getA() const;
62+
63+
/**
64+
* @brief This function returns a reference to the agent groupings.
65+
*/
66+
const std::vector<PartialKeys> & getGroups() const;
67+
68+
/**
69+
* @brief This function returns a reference to the internal local arms.
70+
*/
71+
const std::vector<AIToolbox::Bandit::Model<Dist>> & getArms() const;
72+
73+
private:
74+
Action A;
75+
std::vector<PartialKeys> groups_;
76+
77+
mutable std::vector<AIToolbox::Bandit::Model<Dist>> arms_;
78+
};
79+
80+
template <typename Dist>
81+
template <typename... TupleArgs>
82+
Model<Dist>::Model(Action a, std::vector<PartialKeys> deps, std::vector<AIToolbox::Bandit::Model<Dist>> arms) :
83+
A(std::move(a)), groups_(std::move(deps)), arms_(std::move(arms))
84+
{
85+
// Sanity checks
86+
//
87+
// - The number of groups is equal to the number of local arms.
88+
// - Each local arm has an action space equal to the product
89+
// of its participating agents.
90+
assert(groups_.size() == arms_.size());
91+
92+
for (size_t i = 0; i < groups_.size(); ++i) {
93+
const auto bSize = factorSpacePartial(groups_[i], A);
94+
(void)bSize;
95+
96+
assert(bSize == arms_[i].getA());
97+
}
98+
}
99+
100+
template <typename Dist>
101+
Rewards Model<Dist>::sampleR(const Action & a) const {
102+
Rewards rews(groups_.size());
103+
104+
for (size_t i = 0; i < groups_.size(); ++i) {
105+
const auto aid = toIndexPartial(groups_[i], A, a);
106+
107+
rews[i] = arms_[i].sampleR(aid);
108+
}
109+
110+
return rews;
111+
}
112+
113+
template <typename Dist>
114+
const Action & Model<Dist>::getA() const { return A; }
115+
template <typename Dist>
116+
const std::vector<PartialKeys> & Model<Dist>::getGroups() const { return groups_; }
117+
template <typename Dist>
118+
const std::vector<AIToolbox::Bandit::Model<Dist>> & Model<Dist>::getArms() const { return arms_; }
119+
}
120+
121+
#endif

include/AIToolbox/POMDP/Algorithms/LinearSupport.hpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ namespace AIToolbox::POMDP {
6363
* as the difference between two iterations is less than the
6464
* tolerance specified.
6565
*
66-
* @param h The horizon chosen.
66+
* @param horizon The horizon chosen.
6767
* @param tolerance The tolerance factor to stop the value iteration loop.
6868
*/
6969
LinearSupport(unsigned horizon, double tolerance);

0 commit comments

Comments
 (0)