8
8
#define INCLUDED_ml_maths_CBoostedTreeLoss_h
9
9
10
10
#include < maths/CBasicStatistics.h>
11
+ #include < maths/CKMeansOnline.h>
11
12
#include < maths/CLinearAlgebra.h>
12
13
#include < maths/CLinearAlgebraEigen.h>
14
+ #include < maths/CPRNG.h>
13
15
#include < maths/ImportExport.h>
14
16
#include < maths/MathsTypes.h>
15
17
@@ -66,9 +68,26 @@ class MATHS_EXPORT CArgMinMseImpl final : public CArgMinLossImpl {
66
68
67
69
// ! \brief Finds the value to add to a set of predicted log-odds which minimises
68
70
// ! regularised cross entropy loss w.r.t. the actual categories.
69
- class MATHS_EXPORT CArgMinLogisticImpl final : public CArgMinLossImpl {
71
+ // !
72
+ // ! DESCRIPTION:\n
73
+ // ! We want to find the weight which minimizes the log-loss, i.e. which satisfies
74
+ // ! <pre class="fragment">
75
+ // ! \f$\displaystyle arg\min_w{ \lambda w^2 -\sum_i{ a_i \log(S(p_i + w)) + (1 - a_i) \log(1 - S(p_i + w)) } }\f$
76
+ // ! </pre>
77
+ // !
78
+ // ! Rather than working with this function directly we bucket the predictions `p_i`
79
+ // ! in a first pass over the data and compute weight which minimizes the approximate
80
+ // ! function
81
+ // ! <pre class="fragment">
82
+ // ! \f$\displaystyle arg\min_w{ \lambda w^2 -\sum_{B}{ c_{1,B} \log(S(\bar{p}_B + w)) + c_{0,B} \log(1 - S(\bar{p}_B + w)) } }\f$
83
+ // ! </pre>
84
+ // !
85
+ // ! Here, \f$B\f$ ranges over the buckets, \f$\bar{p}_B\f$ denotes the B'th bucket
86
+ // ! centre and \f$c_{0,B}\f$ and \f$c_{1,B}\f$ denote the counts of actual classes
87
+ // ! 0 and 1, respectively, in the bucket \f$B\f$.
88
+ class MATHS_EXPORT CArgMinBinomialLogisticLossImpl final : public CArgMinLossImpl {
70
89
public:
71
- CArgMinLogisticImpl (double lambda);
90
+ CArgMinBinomialLogisticLossImpl (double lambda);
72
91
std::unique_ptr<CArgMinLossImpl> clone () const override ;
73
92
bool nextPass () override ;
74
93
void add (const TMemoryMappedFloatVector& prediction, double actual, double weight = 1.0 ) override ;
@@ -80,11 +99,13 @@ class MATHS_EXPORT CArgMinLogisticImpl final : public CArgMinLossImpl {
80
99
using TDoubleVector2x1 = CVectorNx1<double , 2 >;
81
100
using TDoubleVector2x1Vec = std::vector<TDoubleVector2x1>;
82
101
102
+ private:
103
+ static constexpr std::size_t NUMBER_BUCKETS = 128 ;
104
+
83
105
private:
84
106
std::size_t bucket (double prediction) const {
85
107
double bucket{(prediction - m_PredictionMinMax.min ()) / this ->bucketWidth ()};
86
- return std::min (static_cast <std::size_t >(bucket),
87
- m_BucketCategoryCounts.size () - 1 );
108
+ return std::min (static_cast <std::size_t >(bucket), m_BucketsClassCounts.size () - 1 );
88
109
}
89
110
90
111
double bucketCentre (std::size_t bucket) const {
@@ -95,15 +116,74 @@ class MATHS_EXPORT CArgMinLogisticImpl final : public CArgMinLossImpl {
95
116
double bucketWidth () const {
96
117
return m_PredictionMinMax.initialized ()
97
118
? m_PredictionMinMax.range () /
98
- static_cast <double >(m_BucketCategoryCounts .size ())
119
+ static_cast <double >(m_BucketsClassCounts .size ())
99
120
: 0.0 ;
100
121
}
101
122
102
123
private:
103
124
std::size_t m_CurrentPass = 0 ;
104
125
TMinMaxAccumulator m_PredictionMinMax;
105
- TDoubleVector2x1 m_CategoryCounts;
106
- TDoubleVector2x1Vec m_BucketCategoryCounts;
126
+ TDoubleVector2x1 m_ClassCounts;
127
+ TDoubleVector2x1Vec m_BucketsClassCounts;
128
+ };
129
+
130
+ // ! \brief Finds the value to add to a set of predicted multinomial logit which
131
+ // ! minimises regularised cross entropy loss w.r.t. the actual classes.
132
+ // !
133
+ // ! DESCRIPTION:\n
134
+ // ! We want to find the weight which minimizes the log-loss, i.e. which satisfies
135
+ // ! <pre class="fragment">
136
+ // ! \f$\displaystyle arg\min_w{ \lambda \|w\|^2 -\sum_i{ \log([softmax(p_i + w)]_{a_i}) } }\f$
137
+ // ! </pre>
138
+ // !
139
+ // ! Here, \f$a_i\f$ is the index of the i'th example's true class. Rather than
140
+ // ! working with this function directly we approximate it by the means and count
141
+ // ! of predictions in a partition of the original data, i.e. we compute the weight
142
+ // ! weight which satisfies
143
+ // ! <pre class="fragment">
144
+ // ! \f$\displaystyle arg\min_w{ \lambda \|w\|^2 -\sum_P{ c_{a_i, P} \log([softmax(\bar{p}_P + w)]) } }\f$
145
+ // ! </pre>
146
+ // !
147
+ // ! Here, \f$P\f$ ranges over the subsets of the partition, \f$\bar{p}_P\f$ denotes
148
+ // ! the mean of the predictions in the P'th subset and \f$c_{a_i, P}\f$ denote the
149
+ // ! counts of each classes \f$\{a_i\}\f$ in the subset \f$P\f$. We compute this
150
+ // ! partition by k-means.
151
+ class MATHS_EXPORT CArgMinMultinomialLogisticLossImpl final : public CArgMinLossImpl {
152
+ public:
153
+ using TObjective = std::function<double (const TDoubleVector&)>;
154
+ using TObjectiveGradient = std::function<TDoubleVector(const TDoubleVector&)>;
155
+
156
+ public:
157
+ CArgMinMultinomialLogisticLossImpl (std::size_t numberClasses,
158
+ double lambda,
159
+ const CPRNG::CXorOShiro128Plus& rng);
160
+ std::unique_ptr<CArgMinLossImpl> clone () const override ;
161
+ bool nextPass () override ;
162
+ void add (const TMemoryMappedFloatVector& prediction, double actual, double weight = 1.0 ) override ;
163
+ void merge (const CArgMinLossImpl& other) override ;
164
+ TDoubleVector value () const override ;
165
+
166
+ // Exposed for unit testing.
167
+ TObjective objective () const ;
168
+ TObjectiveGradient objectiveGradient () const ;
169
+
170
+ private:
171
+ using TDoubleVectorVec = std::vector<TDoubleVector>;
172
+ using TKMeans = CKMeansOnline<TDoubleVector>;
173
+
174
+ private:
175
+ static constexpr std::size_t NUMBER_CENTRES = 128 ;
176
+ static constexpr std::size_t NUMBER_RESTARTS = 5 ;
177
+
178
+ private:
179
+ std::size_t m_NumberClasses = 0 ;
180
+ std::size_t m_CurrentPass = 0 ;
181
+ mutable CPRNG::CXorOShiro128Plus m_Rng;
182
+ TDoubleVector m_ClassCounts;
183
+ TDoubleVector m_DoublePrediction;
184
+ TKMeans m_PredictionSketch;
185
+ TDoubleVectorVec m_Centres;
186
+ TDoubleVectorVec m_CentresClassCounts;
107
187
};
108
188
}
109
189
@@ -185,7 +265,8 @@ class MATHS_EXPORT CLoss {
185
265
// ! Transforms a prediction from the forest to the target space.
186
266
virtual TDoubleVector transform (const TMemoryMappedFloatVector& prediction) const = 0;
187
267
// ! Get an object which computes the leaf value that minimises loss.
188
- virtual CArgMinLoss minimizer (double lambda) const = 0;
268
+ virtual CArgMinLoss minimizer (double lambda,
269
+ const CPRNG::CXorOShiro128Plus& rng) const = 0;
189
270
// ! Get the name of the loss function
190
271
virtual const std::string& name () const = 0;
191
272
@@ -214,7 +295,7 @@ class MATHS_EXPORT CMse final : public CLoss {
214
295
double weight = 1.0 ) const override ;
215
296
bool isCurvatureConstant () const override ;
216
297
TDoubleVector transform (const TMemoryMappedFloatVector& prediction) const override ;
217
- CArgMinLoss minimizer (double lambda) const override ;
298
+ CArgMinLoss minimizer (double lambda, const CPRNG::CXorOShiro128Plus& rng ) const override ;
218
299
const std::string& name () const override ;
219
300
};
220
301
@@ -227,11 +308,47 @@ class MATHS_EXPORT CMse final : public CLoss {
227
308
// ! </pre>
228
309
// ! where \f$a_i\f$ denotes the actual class of the i'th example, \f$p\f$ is the
229
310
// ! prediction and \f$S(\cdot)\f$ denotes the logistic function.
230
- class MATHS_EXPORT CBinomialLogistic final : public CLoss {
311
+ class MATHS_EXPORT CBinomialLogisticLoss final : public CLoss {
312
+ public:
313
+ static const std::string NAME;
314
+
315
+ public:
316
+ std::unique_ptr<CLoss> clone () const override ;
317
+ std::size_t numberParameters () const override ;
318
+ double value (const TMemoryMappedFloatVector& prediction,
319
+ double actual,
320
+ double weight = 1.0 ) const override ;
321
+ void gradient (const TMemoryMappedFloatVector& prediction,
322
+ double actual,
323
+ TWriter writer,
324
+ double weight = 1.0 ) const override ;
325
+ void curvature (const TMemoryMappedFloatVector& prediction,
326
+ double actual,
327
+ TWriter writer,
328
+ double weight = 1.0 ) const override ;
329
+ bool isCurvatureConstant () const override ;
330
+ TDoubleVector transform (const TMemoryMappedFloatVector& prediction) const override ;
331
+ CArgMinLoss minimizer (double lambda, const CPRNG::CXorOShiro128Plus& rng) const override ;
332
+ const std::string& name () const override ;
333
+ };
334
+
335
+ // ! \brief Implements loss for multinomial logistic regression.
336
+ // !
337
+ // ! DESCRIPTION:\n
338
+ // ! This targets the cross-entropy loss using the forest to predict the class
339
+ // ! probabilities via the softmax function:
340
+ // ! <pre class="fragment">
341
+ // ! \f$\displaystyle l_i(p) = -\sum_i a_{ij} \log(\sigma(p))\f$
342
+ // ! </pre>
343
+ // ! where \f$a_i\f$ denotes the actual class of the i'th example, \f$p\f$ denotes
344
+ // ! the vector valued prediction and \f$\sigma(p)\$ is the softmax function, i.e.
345
+ // ! \f$[\sigma(p)]_j = \frac{e^{p_i}}{\sum_k e^{p_k}}\f$.
346
+ class MATHS_EXPORT CMultinomialLogisticLoss final : public CLoss {
231
347
public:
232
348
static const std::string NAME;
233
349
234
350
public:
351
+ CMultinomialLogisticLoss (std::size_t numberClasses);
235
352
std::unique_ptr<CLoss> clone () const override ;
236
353
std::size_t numberParameters () const override ;
237
354
double value (const TMemoryMappedFloatVector& prediction,
@@ -247,8 +364,11 @@ class MATHS_EXPORT CBinomialLogistic final : public CLoss {
247
364
double weight = 1.0 ) const override ;
248
365
bool isCurvatureConstant () const override ;
249
366
TDoubleVector transform (const TMemoryMappedFloatVector& prediction) const override ;
250
- CArgMinLoss minimizer (double lambda) const override ;
367
+ CArgMinLoss minimizer (double lambda, const CPRNG::CXorOShiro128Plus& rng ) const override ;
251
368
const std::string& name () const override ;
369
+
370
+ private:
371
+ std::size_t m_NumberClasses;
252
372
};
253
373
}
254
374
}
0 commit comments