Skip to content

Commit ec7bcf4

Browse files
committed
Improvements to trend modelling and periodicity testing for forecasting (#7)
This is a merge of a feature branch for issue #5.
1 parent 3e2f7e2 commit ec7bcf4

File tree

106 files changed

+13887
-7918
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

106 files changed

+13887
-7918
lines changed

include/maths/CAdaptiveBucketing.h

Lines changed: 18 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -71,12 +71,10 @@ namespace maths
7171
class MATHS_EXPORT CAdaptiveBucketing
7272
{
7373
public:
74-
typedef std::vector<double> TDoubleVec;
75-
typedef std::vector<CFloatStorage> TFloatVec;
76-
typedef std::pair<core_t::TTime, core_t::TTime> TTimeTimePr;
77-
typedef CBasicStatistics::SSampleMeanVar<double>::TAccumulator TDoubleMeanVarAccumulator;
78-
typedef std::pair<TTimeTimePr, TDoubleMeanVarAccumulator> TTimeTimePrMeanVarPr;
79-
typedef std::vector<TTimeTimePrMeanVarPr> TTimeTimePrMeanVarPrVec;
74+
using TDoubleVec = std::vector<double>;
75+
using TFloatVec = std::vector<CFloatStorage>;
76+
using TFloatMeanAccumulator = CBasicStatistics::SSampleMean<CFloatStorage>::TAccumulator;
77+
using TFloatMeanAccumulatorVec = std::vector<TFloatMeanAccumulator>;
8078

8179
public:
8280
//! Restore by traversing a state document
@@ -107,14 +105,17 @@ class MATHS_EXPORT CAdaptiveBucketing
107105
//! \param[in] n The number of buckets.
108106
bool initialize(double a, double b, std::size_t n);
109107

110-
//! Add the function moments \f$([a_i,b_i], S_i)\f$ where
111-
//! \f$S_i\f$ are the means and variances of the function
112-
//! in the time intervals \f$([a_i,b_i])\f$.
108+
//! Add the function mean values \f$([a_i,b_i], m_i)\f$ where
109+
//! \f$m_i\f$ are the means of the function in the time intervals
110+
//! \f$([a+(i-1)l,b+il])\f$, \f$i\in[n]\f$ and \f$l=(b-a)/n\f$.
113111
//!
114-
//! \param[in] time The start of the period including \p values.
115-
//! \param[in] values Time ranges and the corresponding function
116-
//! value moments.
117-
void initialValues(core_t::TTime time, const TTimeTimePrMeanVarPrVec &values);
112+
//! \param[in] startTime The start of the period.
113+
//! \param[in] endTime The start of the period.
114+
//! \param[in] values The mean values in a regular subdivision
115+
//! of [\p start,\p end].
116+
void initialValues(core_t::TTime startTime,
117+
core_t::TTime endTime,
118+
const TFloatMeanAccumulatorVec &values);
118119

119120
//! Get the number of buckets.
120121
std::size_t size(void) const;
@@ -195,21 +196,18 @@ class MATHS_EXPORT CAdaptiveBucketing
195196
//! Get the memory used by this component
196197
std::size_t memoryUsage(void) const;
197198

198-
private:
199-
typedef CBasicStatistics::SSampleMean<CFloatStorage>::TAccumulator TFloatMeanAccumulator;
200-
201199
private:
202200
//! Compute the values corresponding to the change in end
203201
//! points from \p endpoints. The values are assigned based
204202
//! on their intersection with each bucket in the previous
205203
//! bucket configuration.
206204
virtual void refresh(const TFloatVec &endpoints) = 0;
207205

206+
//! Check if \p time is in the this component's window.
207+
virtual bool inWindow(core_t::TTime time) const = 0;
208+
208209
//! Add the function value at \p time.
209-
virtual void add(std::size_t bucket,
210-
core_t::TTime time,
211-
double offset,
212-
const TDoubleMeanVarAccumulator &value) = 0;
210+
virtual void add(std::size_t bucket, core_t::TTime time, double value, double weight) = 0;
213211

214212
//! Get the offset w.r.t. the start of the bucketing of \p time.
215213
virtual double offset(core_t::TTime time) const = 0;

include/maths/CBasicStatistics.h

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,23 @@ class MATHS_EXPORT CBasicStatistics
7676
//! Compute the sample median.
7777
static double median(const TDoubleVec &dataIn);
7878

79+
//! Compute the maximum of \p first, \p second and \p third.
80+
template<typename T>
81+
static T max(T first, T second, T third)
82+
{
83+
return first >= second ?
84+
(third >= first ? third : first) :
85+
(third >= second ? third : second);
86+
}
87+
88+
//! Compute the minimum of \p first, \p second and \p third.
89+
template<typename T>
90+
static T min(T first, T second, T third)
91+
{
92+
return first <= second ?
93+
(third <= first ? third : first) :
94+
(third <= second ? third : second);
95+
}
7996

8097
/////////////////////////// ACCUMULATORS ///////////////////////////
8198

@@ -1611,6 +1628,12 @@ class MATHS_EXPORT CBasicStatistics
16111628
return m_Max[0];
16121629
}
16131630

1631+
//! Get the range.
1632+
T range(void) const
1633+
{
1634+
return m_Max[0] - m_Min[0];
1635+
}
1636+
16141637
//! Get the margin by which all the values have the same sign.
16151638
T signMargin(void) const
16161639
{

include/maths/CCalendarComponentAdaptiveBucketing.h

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -38,8 +38,7 @@ class CSeasonalTime;
3838
class MATHS_EXPORT CCalendarComponentAdaptiveBucketing : private CAdaptiveBucketing
3939
{
4040
public:
41-
typedef CAdaptiveBucketing::TTimeTimePrMeanVarPrVec TTimeTimePrMeanVarPrVec;
42-
typedef CBasicStatistics::SSampleMeanVar<CFloatStorage>::TAccumulator TFloatMeanVarAccumulator;
41+
using TFloatMeanVarAccumulator = CBasicStatistics::SSampleMeanVar<CFloatStorage>::TAccumulator;
4342

4443
public:
4544
CCalendarComponentAdaptiveBucketing(void);
@@ -151,7 +150,7 @@ class MATHS_EXPORT CCalendarComponentAdaptiveBucketing : private CAdaptiveBucket
151150
//@}
152151

153152
private:
154-
typedef std::vector<TFloatMeanVarAccumulator> TFloatMeanVarVec;
153+
using TFloatMeanVarVec = std::vector<TFloatMeanVarAccumulator>;
155154

156155
private:
157156
//! Restore by traversing a state document
@@ -165,11 +164,11 @@ class MATHS_EXPORT CCalendarComponentAdaptiveBucketing : private CAdaptiveBucket
165164
//! \param[in] endpoints The old end points.
166165
void refresh(const TFloatVec &endpoints);
167166

167+
//! Check if \p time is in the this component's window.
168+
virtual bool inWindow(core_t::TTime time) const;
169+
168170
//! Add the function value to \p bucket.
169-
virtual void add(std::size_t bucket,
170-
core_t::TTime time,
171-
double offset,
172-
const TDoubleMeanVarAccumulator &value);
171+
virtual void add(std::size_t bucket, core_t::TTime time, double value, double weight);
173172

174173
//! Get the offset w.r.t. the start of the bucketing of \p time.
175174
virtual double offset(core_t::TTime time) const;

include/maths/CDecompositionComponent.h

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -34,15 +34,15 @@ namespace maths
3434
class MATHS_EXPORT CDecompositionComponent
3535
{
3636
public:
37-
typedef maths_t::TDoubleDoublePr TDoubleDoublePr;
38-
typedef std::vector<double> TDoubleVec;
39-
typedef std::vector<CFloatStorage> TFloatVec;
40-
typedef CSpline<boost::reference_wrapper<const TFloatVec>,
41-
boost::reference_wrapper<const TFloatVec>,
42-
boost::reference_wrapper<const TDoubleVec> > TSplineCRef;
43-
typedef CSpline<boost::reference_wrapper<TFloatVec>,
44-
boost::reference_wrapper<TFloatVec>,
45-
boost::reference_wrapper<TDoubleVec> > TSplineRef;
37+
using TDoubleDoublePr = maths_t::TDoubleDoublePr;
38+
using TDoubleVec = std::vector<double>;
39+
using TFloatVec = std::vector<CFloatStorage>;
40+
using TSplineCRef = CSpline<boost::reference_wrapper<const TFloatVec>,
41+
boost::reference_wrapper<const TFloatVec>,
42+
boost::reference_wrapper<const TDoubleVec>>;
43+
using TSplineRef = CSpline<boost::reference_wrapper<TFloatVec>,
44+
boost::reference_wrapper<TFloatVec>,
45+
boost::reference_wrapper<TDoubleVec>>;
4646

4747
public:
4848
//! Persist state by passing information to \p inserter.
@@ -63,9 +63,9 @@ class MATHS_EXPORT CDecompositionComponent
6363
};
6464

6565
public:
66-
typedef boost::array<CSplineTypes::EType, 2> TTypeArray;
67-
typedef boost::array<TFloatVec, 2> TFloatVecArray;
68-
typedef boost::array<TDoubleVec, 2> TDoubleVecArray;
66+
using TTypeArray = boost::array<CSplineTypes::EType, 2>;
67+
using TFloatVecArray = boost::array<TFloatVec, 2>;
68+
using TDoubleVecArray = boost::array<TDoubleVec, 2>;
6969

7070
public:
7171
CPackedSplines(CSplineTypes::EType valueInterpolationType,

include/maths/CExpandingWindow.h

Lines changed: 126 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,126 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the Elastic License;
4+
* you may not use this file except in compliance with the Elastic License.
5+
*/
6+
7+
#ifndef INCLUDED_ml_maths_CExpandingWindow_h
8+
#define INCLUDED_ml_maths_CExpandingWindow_h
9+
10+
#include <core/CFloatStorage.h>
11+
#include <core/CoreTypes.h>
12+
#include <core/CVectorRange.h>
13+
14+
#include <maths/CBasicStatistics.h>
15+
#include <maths/ImportExport.h>
16+
17+
#include <cstddef>
18+
#include <functional>
19+
#include <vector>
20+
21+
namespace ml
22+
{
23+
namespace core
24+
{
25+
class CStatePersistInserter;
26+
class CStateRestoreTraverser;
27+
}
28+
29+
namespace maths
30+
{
31+
32+
//! \brief Implements a fixed memory expanding time window.
33+
//!
34+
//! DESCRIPTION:\n
35+
//! As the window expands it compresses by merging adjacent values
36+
//! and maintaining means of merged values. It cycles through a
37+
//! sequence of increasing compression factors, which are determined
38+
//! by a sequence of increasing bucketing lengths supplied to the
39+
//! constructor. At the point it overflows, i.e. time since the
40+
//! beginning of the window exceeds "size" x "maximum bucket length",
41+
//! it will re-initialize the bucketing and update the start time.
42+
class MATHS_EXPORT CExpandingWindow
43+
{
44+
public:
45+
using TDoubleVec = std::vector<double>;
46+
using TTimeVec = std::vector<core_t::TTime>;
47+
using TTimeCRng = core::CVectorRange<const TTimeVec>;
48+
using TFloatMeanAccumulator = CBasicStatistics::SSampleMean<CFloatStorage>::TAccumulator;
49+
using TFloatMeanAccumulatorVec = std::vector<TFloatMeanAccumulator>;
50+
using TPredictor = std::function<double (core_t::TTime)>;
51+
52+
public:
53+
CExpandingWindow(core_t::TTime bucketLength,
54+
TTimeCRng bucketLengths,
55+
std::size_t size,
56+
double decayRate = 0.0);
57+
58+
//! Initialize by reading state from \p traverser.
59+
bool acceptRestoreTraverser(core::CStateRestoreTraverser &traverser);
60+
61+
//! Persist state by passing information to \p inserter.
62+
void acceptPersistInserter(core::CStatePersistInserter &inserter) const;
63+
64+
//! Get the start time of the sketch.
65+
core_t::TTime startTime() const;
66+
67+
//! Get the end time of the sketch.
68+
core_t::TTime endTime() const;
69+
70+
//! Get the current bucket length.
71+
core_t::TTime bucketLength() const;
72+
73+
//! Get the bucket values.
74+
const TFloatMeanAccumulatorVec &values() const;
75+
76+
//! Get the bucket values minus the values from \p trend.
77+
TFloatMeanAccumulatorVec valuesMinusPrediction(const TPredictor &predictor) const;
78+
79+
//! Set the start time to \p time.
80+
void initialize(core_t::TTime time);
81+
82+
//! Age the bucket values to account for \p time elapsed time.
83+
void propagateForwardsByTime(double time);
84+
85+
//! Add \p value at \p time.
86+
void add(core_t::TTime time, double value, double weight = 1.0);
87+
88+
//! Check if we need to compress by increasing the bucket span.
89+
bool needToCompress(core_t::TTime time) const;
90+
91+
//! Get a checksum for this object.
92+
uint64_t checksum(uint64_t seed = 0) const;
93+
94+
//! Debug the memory used by this object.
95+
void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const;
96+
97+
//! Get the memory used by this object.
98+
std::size_t memoryUsage() const;
99+
100+
private:
101+
//! The rate at which the bucket values are aged.
102+
double m_DecayRate;
103+
104+
//! The data bucketing length.
105+
core_t::TTime m_BucketLength;
106+
107+
//! The bucket lengths to test.
108+
TTimeCRng m_BucketLengths;
109+
110+
//! The index in m_BucketLengths of the current bucketing interval.
111+
std::size_t m_BucketLengthIndex;
112+
113+
//! The time of the first data point.
114+
core_t::TTime m_StartTime;
115+
116+
//! The bucket values.
117+
TFloatMeanAccumulatorVec m_BucketValues;
118+
119+
//! The mean value time modulo the data bucketing length.
120+
TFloatMeanAccumulator m_MeanOffset;
121+
};
122+
123+
}
124+
}
125+
126+
#endif // INCLUDED_ml_maths_CExpandingWindow_h

include/maths/CGammaRateConjugate.h

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -76,15 +76,19 @@ class MATHS_EXPORT CGammaRateConjugate : public CPrior
7676
//! \param[in] priorShape The shape parameter of the gamma prior.
7777
//! \param[in] priorRate The rate parameter of the gamma prior.
7878
//! \param[in] decayRate The rate at which to revert to non-informative.
79+
//! \param[in] offsetMargin The margin between the smallest value and the support
80+
//! left end.
7981
CGammaRateConjugate(maths_t::EDataType dataType,
8082
double offset,
8183
double priorShape,
8284
double priorRate,
83-
double decayRate = 0.0);
85+
double decayRate = 0.0,
86+
double offsetMargin = GAMMA_OFFSET_MARGIN);
8487

8588
//! Construct by traversing a state document.
8689
CGammaRateConjugate(const SDistributionRestoreParams &params,
87-
core::CStateRestoreTraverser &traverser);
90+
core::CStateRestoreTraverser &traverser,
91+
double offsetMargin = GAMMA_OFFSET_MARGIN);
8892

8993
// Default copy constructor and assignment operator work.
9094

@@ -94,10 +98,13 @@ class MATHS_EXPORT CGammaRateConjugate : public CPrior
9498
//! for details).
9599
//! \param[in] offset The offset to apply to the data.
96100
//! \param[in] decayRate The rate at which to revert to the non-informative prior.
101+
//! \param[in] offsetMargin The margin between the smallest value and the support
102+
//! left end.
97103
//! \return A non-informative prior.
98104
static CGammaRateConjugate nonInformativePrior(maths_t::EDataType dataType,
99105
double offset = 0.0,
100-
double decayRate = 0.0);
106+
double decayRate = 0.0,
107+
double offsetMargin = GAMMA_OFFSET_MARGIN);
101108
//@}
102109

103110
//! \name Prior Contract
@@ -114,7 +121,12 @@ class MATHS_EXPORT CGammaRateConjugate : public CPrior
114121
//! Reset the prior to non-informative.
115122
virtual void setToNonInformative(double offset = 0.0, double decayRate = 0.0);
116123

117-
//! Returns false.
124+
//! Get the margin between the smallest value and the support left
125+
//! end. Priors with non-negative support, automatically adjust the
126+
//! offset if a value is seen which is smaller than offset + margin.
127+
virtual double offsetMargin(void) const;
128+
129+
//! Returns true.
118130
virtual bool needsOffset(void) const;
119131

120132
//! Reset m_Offset so the smallest sample is not within some minimum
@@ -390,6 +402,9 @@ class MATHS_EXPORT CGammaRateConjugate : public CPrior
390402
//! us to model data with negative values greater than \f$-u\f$.
391403
double m_Offset;
392404

405+
//! The margin between the smallest value and the support left end.
406+
double m_OffsetMargin;
407+
393408
//! The maximum likelihood estimate of the shape parameter.
394409
double m_LikelihoodShape;
395410

0 commit comments

Comments
 (0)