Skip to content

Commit 1eb8f8a

Browse files
authored
[ML] Implements an absolute goodness-of-fit test to accept a change (#21)
This implements an absolute "goodness-of-fit" test for each change, by additionally testing a change versus its expected BIC given the residual distribution. It means we will only accept changes if they are a reasonably accurate description of the change currently occurring in the time series.
1 parent cf48634 commit 1eb8f8a

File tree

2 files changed

+136
-35
lines changed

2 files changed

+136
-35
lines changed

include/maths/CTimeSeriesChangeDetector.h

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,7 @@ class MATHS_EXPORT CUnivariateTimeSeriesChangeDetector
9999
const TPriorPtr &residualModel,
100100
core_t::TTime minimumTimeToDetect = 6 * core::constants::HOUR,
101101
core_t::TTime maximumTimeToDetect = core::constants::DAY,
102-
double minimumDeltaBicToDetect = 12.0);
102+
double minimumDeltaBicToDetect = 14.0);
103103

104104
//! Initialize by reading state from \p traverser.
105105
bool acceptRestoreTraverser(const SModelRestoreParams &params,
@@ -191,6 +191,9 @@ class MATHS_EXPORT CUnivariateChangeModel : private core::CNonCopyable
191191
//! The BIC of applying the change.
192192
virtual double bic() const = 0;
193193

194+
//! The expected BIC of applying the change.
195+
virtual double expectedBic() const = 0;
196+
194197
//! Get a description of the change.
195198
virtual TOptionalChangeDescription change() const = 0;
196199

@@ -223,10 +226,14 @@ class MATHS_EXPORT CUnivariateChangeModel : private core::CNonCopyable
223226

224227
//! Get the log-likelihood.
225228
double logLikelihood() const;
226-
227229
//! Update the data log-likelihood with \p logLikelihood.
228230
void addLogLikelihood(double logLikelihood);
229231

232+
//! Get the expected log-likelihood.
233+
double expectedLogLikelihood() const;
234+
//! Update the expected data log-likelihood with \p logLikelihood.
235+
void addExpectedLogLikelihood(double logLikelihood);
236+
230237
//! Get the time series trend model.
231238
const CTimeSeriesDecompositionInterface &trendModel() const;
232239
//! Get the time series trend model.
@@ -243,6 +250,9 @@ class MATHS_EXPORT CUnivariateChangeModel : private core::CNonCopyable
243250
//! The likelihood of the data under this model.
244251
double m_LogLikelihood;
245252

253+
//! The expected log-likelihood of the data under this model.
254+
double m_ExpectedLogLikelihood;
255+
246256
//! A model decomposing the time series trend.
247257
TDecompositionPtr m_TrendModel;
248258

@@ -267,6 +277,9 @@ class MATHS_EXPORT CUnivariateNoChangeModel final : public CUnivariateChangeMode
267277
//! Returns the no change BIC.
268278
virtual double bic() const;
269279

280+
//! The expected BIC of applying the change.
281+
virtual double expectedBic() const;
282+
270283
//! Returns a null object.
271284
virtual TOptionalChangeDescription change() const;
272285

@@ -301,6 +314,9 @@ class MATHS_EXPORT CUnivariateLevelShiftModel final : public CUnivariateChangeMo
301314
//! The BIC of applying the level shift.
302315
virtual double bic() const;
303316

317+
//! The expected BIC of applying the change.
318+
virtual double expectedBic() const;
319+
304320
//! Get a description of the level shift.
305321
virtual TOptionalChangeDescription change() const;
306322

@@ -350,6 +366,9 @@ class MATHS_EXPORT CUnivariateTimeShiftModel final : public CUnivariateChangeMod
350366
//! The BIC of applying the time shift.
351367
virtual double bic() const;
352368

369+
//! The expected BIC of applying the change.
370+
virtual double expectedBic() const;
371+
353372
//! Get a description of the time shift.
354373
virtual TOptionalChangeDescription change() const;
355374

lib/maths/CTimeSeriesChangeDetector.cc

Lines changed: 115 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
#include <maths/CBasicStatisticsPersist.h>
2727
#include <maths/CChecksum.h>
2828
#include <maths/CPrior.h>
29+
#include <maths/CPriorDetail.h>
2930
#include <maths/CPriorStateSerialiser.h>
3031
#include <maths/CRestoreParams.h>
3132
#include <maths/CTimeSeriesModel.h>
@@ -50,7 +51,6 @@ using TDouble1Vec = core::CSmallVector<double, 1>;
5051
using TDouble4Vec = core::CSmallVector<double, 4>;
5152
using TDouble4Vec1Vec = core::CSmallVector<TDouble4Vec, 1>;
5253
using TOptionalChangeDescription = CUnivariateTimeSeriesChangeDetector::TOptionalChangeDescription;
53-
5454
const std::string MINIMUM_TIME_TO_DETECT{"a"};
5555
const std::string MAXIMUM_TIME_TO_DETECT{"b"};
5656
const std::string MINIMUM_DELTA_BIC_TO_DETECT{"c"};
@@ -61,9 +61,12 @@ const std::string MIN_TIME_TAG{"g"};
6161
const std::string MAX_TIME_TAG{"h"};
6262
const std::string CHANGE_MODEL_TAG{"i"};
6363
const std::string LOG_LIKELIHOOD_TAG{"j"};
64-
const std::string SHIFT_TAG{"k"};
65-
const std::string TREND_MODEL_TAG{"l"};
66-
const std::string RESIDUAL_MODEL_TAG{"m"};
64+
const std::string EXPECTED_LOG_LIKELIHOOD_TAG{"k"};
65+
const std::string SHIFT_TAG{"l"};
66+
const std::string TREND_MODEL_TAG{"m"};
67+
const std::string RESIDUAL_MODEL_TAG{"n"};
68+
const std::size_t EXPECTED_LOG_LIKELIHOOD_NUMBER_INTERVALS{4u};
69+
const double EXPECTED_EVIDENCE_THRESHOLD_MULTIPLIER{0.9};
6770
}
6871

6972
SChangeDescription::SChangeDescription(EDescription description,
@@ -169,12 +172,25 @@ TOptionalChangeDescription CUnivariateTimeSeriesChangeDetector::change()
169172

170173
double evidences[]{noChangeBic - candidates[0].first,
171174
noChangeBic - candidates[1].first};
172-
m_CurrentEvidenceOfChange = evidences[0];
173-
if ( evidences[0] > m_MinimumDeltaBicToDetect
174-
&& evidences[0] > evidences[1] + m_MinimumDeltaBicToDetect / 2.0)
175+
double expectedEvidence{noChangeBic - (*candidates[0].second)->expectedBic()};
176+
177+
double x[]{evidences[0] / m_MinimumDeltaBicToDetect,
178+
2.0 * (evidences[0] - evidences[1]) / m_MinimumDeltaBicToDetect,
179+
evidences[0] / EXPECTED_EVIDENCE_THRESHOLD_MULTIPLIER / expectedEvidence,
180+
static_cast<double>(m_TimeRange.range() - m_MinimumTimeToDetect)
181+
/ static_cast<double>(m_MaximumTimeToDetect - m_MinimumTimeToDetect)};
182+
double p{ CTools::logisticFunction(x[0], 0.05, 1.0)
183+
* CTools::logisticFunction(x[1], 0.1, 1.0)
184+
* (x[2] < 0.0 ? 1.0 : CTools::logisticFunction(x[2], 0.2, 1.0))
185+
* (0.5 + CTools::logisticFunction(x[3], 0.2, 0.5))};
186+
LOG_TRACE("p = " << p);
187+
188+
if (p > 0.0625/*= std::pow(0.5, 4.0)*/)
175189
{
176190
return (*candidates[0].second)->change();
177191
}
192+
193+
m_CurrentEvidenceOfChange = evidences[0];
178194
}
179195
return TOptionalChangeDescription();
180196
}
@@ -236,9 +252,34 @@ namespace time_series_change_detector_detail
236252

237253
CUnivariateChangeModel::CUnivariateChangeModel(const TDecompositionPtr &trendModel,
238254
const TPriorPtr &residualModel) :
239-
m_LogLikelihood{0.0}, m_TrendModel{trendModel}, m_ResidualModel{residualModel}
255+
m_LogLikelihood{0.0}, m_ExpectedLogLikelihood{0.0},
256+
m_TrendModel{trendModel}, m_ResidualModel{residualModel}
240257
{}
241258

259+
bool CUnivariateChangeModel::acceptRestoreTraverser(const SModelRestoreParams &/*params*/,
260+
core::CStateRestoreTraverser &traverser)
261+
{
262+
do
263+
{
264+
const std::string name{traverser.name()};
265+
RESTORE_BUILT_IN(LOG_LIKELIHOOD_TAG, m_LogLikelihood);
266+
RESTORE_BUILT_IN(EXPECTED_LOG_LIKELIHOOD_TAG, m_ExpectedLogLikelihood);
267+
return true;
268+
}
269+
while (traverser.next());
270+
return true;
271+
}
272+
273+
void CUnivariateChangeModel::acceptPersistInserter(core::CStatePersistInserter &inserter) const
274+
{
275+
inserter.insertValue(LOG_LIKELIHOOD_TAG,
276+
m_LogLikelihood,
277+
core::CIEEE754::E_SinglePrecision);
278+
inserter.insertValue(EXPECTED_LOG_LIKELIHOOD_TAG,
279+
m_ExpectedLogLikelihood,
280+
core::CIEEE754::E_SinglePrecision);
281+
}
282+
242283
void CUnivariateChangeModel::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const
243284
{
244285
// Note if the trend and residual models are shallow copied their
@@ -258,6 +299,7 @@ std::size_t CUnivariateChangeModel::memoryUsage() const
258299
uint64_t CUnivariateChangeModel::checksum(uint64_t seed) const
259300
{
260301
seed = CChecksum::calculate(seed, m_LogLikelihood);
302+
seed = CChecksum::calculate(seed, m_ExpectedLogLikelihood);
261303
seed = CChecksum::calculate(seed, m_TrendModel);
262304
return CChecksum::calculate(seed, m_ResidualModel);
263305
}
@@ -280,6 +322,16 @@ void CUnivariateChangeModel::addLogLikelihood(double logLikelihood)
280322
m_LogLikelihood += logLikelihood;
281323
}
282324

325+
double CUnivariateChangeModel::expectedLogLikelihood() const
326+
{
327+
return m_ExpectedLogLikelihood;
328+
}
329+
330+
void CUnivariateChangeModel::addExpectedLogLikelihood(double logLikelihood)
331+
{
332+
m_ExpectedLogLikelihood += logLikelihood;
333+
}
334+
283335
const CTimeSeriesDecompositionInterface &CUnivariateChangeModel::trendModel() const
284336
{
285337
return *m_TrendModel;
@@ -310,31 +362,29 @@ CUnivariateNoChangeModel::CUnivariateNoChangeModel(const TDecompositionPtr &tren
310362
CUnivariateChangeModel{trendModel, residualModel}
311363
{}
312364

313-
bool CUnivariateNoChangeModel::acceptRestoreTraverser(const SModelRestoreParams &/*params*/,
365+
bool CUnivariateNoChangeModel::acceptRestoreTraverser(const SModelRestoreParams &params,
314366
core::CStateRestoreTraverser &traverser)
315367
{
316-
do
317-
{
318-
const std::string name{traverser.name()};
319-
RESTORE_SETUP_TEARDOWN(LOG_LIKELIHOOD_TAG,
320-
double logLikelihood,
321-
core::CStringUtils::stringToType(traverser.value(), logLikelihood),
322-
this->addLogLikelihood(logLikelihood))
323-
}
324-
while (traverser.next());
325-
return true;
368+
return this->CUnivariateChangeModel::acceptRestoreTraverser(params, traverser);
326369
}
327370

328371
void CUnivariateNoChangeModel::acceptPersistInserter(core::CStatePersistInserter &inserter) const
329372
{
330-
inserter.insertValue(LOG_LIKELIHOOD_TAG, this->logLikelihood());
373+
this->CUnivariateChangeModel::acceptPersistInserter(inserter);
331374
}
332375

333376
double CUnivariateNoChangeModel::bic() const
334377
{
335378
return -2.0 * this->logLikelihood();
336379
}
337380

381+
double CUnivariateNoChangeModel::expectedBic() const
382+
{
383+
// This is irrelevant since this is only used for deciding
384+
// whether to accept a change.
385+
return this->bic();
386+
}
387+
338388
TOptionalChangeDescription CUnivariateNoChangeModel::change() const
339389
{
340390
return TOptionalChangeDescription();
@@ -357,7 +407,7 @@ void CUnivariateNoChangeModel::addSamples(std::size_t count,
357407
samples.push_back(this->trendModel().detrend(sample.first, sample.second, 0.0));
358408
}
359409

360-
double logLikelihood;
410+
double logLikelihood{0.0};
361411
if (this->residualModel().jointLogMarginalLikelihood(weightStyles, samples, weights,
362412
logLikelihood) == maths_t::E_FpNoErrors)
363413
{
@@ -386,13 +436,13 @@ CUnivariateLevelShiftModel::CUnivariateLevelShiftModel(const TDecompositionPtr &
386436
bool CUnivariateLevelShiftModel::acceptRestoreTraverser(const SModelRestoreParams &params,
387437
core::CStateRestoreTraverser &traverser)
388438
{
439+
if (this->CUnivariateChangeModel::acceptRestoreTraverser(params, traverser) == false)
440+
{
441+
return false;
442+
}
389443
do
390444
{
391445
const std::string name{traverser.name()};
392-
RESTORE_SETUP_TEARDOWN(LOG_LIKELIHOOD_TAG,
393-
double logLikelihood,
394-
core::CStringUtils::stringToType(traverser.value(), logLikelihood),
395-
this->addLogLikelihood(logLikelihood))
396446
RESTORE(SHIFT_TAG, m_Shift.fromDelimited(traverser.value()))
397447
RESTORE_BUILT_IN(RESIDUAL_MODEL_MODE_TAG, m_ResidualModelMode)
398448
RESTORE_BUILT_IN(SAMPLE_COUNT_TAG, m_SampleCount)
@@ -404,7 +454,7 @@ bool CUnivariateLevelShiftModel::acceptRestoreTraverser(const SModelRestoreParam
404454

405455
void CUnivariateLevelShiftModel::acceptPersistInserter(core::CStatePersistInserter &inserter) const
406456
{
407-
inserter.insertValue(LOG_LIKELIHOOD_TAG, this->logLikelihood());
457+
this->CUnivariateChangeModel::acceptPersistInserter(inserter);
408458
inserter.insertValue(SHIFT_TAG, m_Shift.toDelimited());
409459
inserter.insertValue(SAMPLE_COUNT_TAG, m_SampleCount);
410460
inserter.insertLevel(RESIDUAL_MODEL_TAG, boost::bind<void>(CPriorStateSerialiser(),
@@ -416,6 +466,11 @@ double CUnivariateLevelShiftModel::bic() const
416466
return -2.0 * this->logLikelihood() + std::log(m_SampleCount);
417467
}
418468

469+
double CUnivariateLevelShiftModel::expectedBic() const
470+
{
471+
return -2.0 * this->expectedLogLikelihood() + std::log(m_SampleCount);
472+
}
473+
419474
TOptionalChangeDescription CUnivariateLevelShiftModel::change() const
420475
{
421476
// The "magic" 0.9 is due to the fact that the trend is updated
@@ -465,12 +520,24 @@ void CUnivariateLevelShiftModel::addSamples(std::size_t count,
465520
residualModel.addSamples(weightStyles, samples, weights);
466521
residualModel.propagateForwardsByTime(1.0);
467522

468-
double logLikelihood;
523+
double logLikelihood{0.0};
469524
if (residualModel.jointLogMarginalLikelihood(weightStyles, samples, weights,
470525
logLikelihood) == maths_t::E_FpNoErrors)
471526
{
472527
this->addLogLikelihood(logLikelihood);
473528
}
529+
for (const auto &weight : weights)
530+
{
531+
double expectedLogLikelihood{0.0};
532+
TDouble4Vec1Vec weight_{weight};
533+
if (residualModel.expectation(maths::CPrior::CLogMarginalLikelihood{
534+
residualModel, weightStyles, weight_},
535+
EXPECTED_LOG_LIKELIHOOD_NUMBER_INTERVALS,
536+
expectedLogLikelihood, weightStyles, weight))
537+
{
538+
this->addExpectedLogLikelihood(expectedLogLikelihood);
539+
}
540+
}
474541
}
475542
}
476543

@@ -496,13 +563,13 @@ CUnivariateTimeShiftModel::CUnivariateTimeShiftModel(const TDecompositionPtr &tr
496563
bool CUnivariateTimeShiftModel::acceptRestoreTraverser(const SModelRestoreParams &params,
497564
core::CStateRestoreTraverser &traverser)
498565
{
566+
if (this->CUnivariateChangeModel::acceptRestoreTraverser(params, traverser) == false)
567+
{
568+
return false;
569+
}
499570
do
500571
{
501572
const std::string name{traverser.name()};
502-
RESTORE_SETUP_TEARDOWN(LOG_LIKELIHOOD_TAG,
503-
double logLikelihood,
504-
core::CStringUtils::stringToType(traverser.value(), logLikelihood),
505-
this->addLogLikelihood(logLikelihood))
506573
RESTORE(RESIDUAL_MODEL_TAG, this->restoreResidualModel(params.s_DistributionParams, traverser))
507574
}
508575
while (traverser.next());
@@ -511,7 +578,7 @@ bool CUnivariateTimeShiftModel::acceptRestoreTraverser(const SModelRestoreParams
511578

512579
void CUnivariateTimeShiftModel::acceptPersistInserter(core::CStatePersistInserter &inserter) const
513580
{
514-
inserter.insertValue(LOG_LIKELIHOOD_TAG, this->logLikelihood());
581+
this->CUnivariateChangeModel::acceptPersistInserter(inserter);
515582
inserter.insertLevel(RESIDUAL_MODEL_TAG, boost::bind<void>(CPriorStateSerialiser(),
516583
boost::cref(this->residualModel()), _1));
517584
}
@@ -521,6 +588,11 @@ double CUnivariateTimeShiftModel::bic() const
521588
return -2.0 * this->logLikelihood();
522589
}
523590

591+
double CUnivariateTimeShiftModel::expectedBic() const
592+
{
593+
return -2.0 * this->expectedLogLikelihood();
594+
}
595+
524596
TOptionalChangeDescription CUnivariateTimeShiftModel::change() const
525597
{
526598
return SChangeDescription{SChangeDescription::E_TimeShift,
@@ -549,12 +621,22 @@ void CUnivariateTimeShiftModel::addSamples(std::size_t count,
549621
residualModel.addSamples(weightStyles, samples, weights);
550622
residualModel.propagateForwardsByTime(1.0);
551623

552-
double logLikelihood;
624+
double logLikelihood{0.0};
553625
if (residualModel.jointLogMarginalLikelihood(weightStyles, samples, weights,
554626
logLikelihood) == maths_t::E_FpNoErrors)
555627
{
556628
this->addLogLikelihood(logLikelihood);
557629
}
630+
for (const auto &weight : weights)
631+
{
632+
double expectedLogLikelihood{0.0};
633+
TDouble4Vec1Vec weight_{weight};
634+
residualModel.expectation(maths::CPrior::CLogMarginalLikelihood{
635+
residualModel, weightStyles, weight_},
636+
EXPECTED_LOG_LIKELIHOOD_NUMBER_INTERVALS,
637+
expectedLogLikelihood, weightStyles, weight);
638+
this->addExpectedLogLikelihood(expectedLogLikelihood);
639+
}
558640
}
559641
}
560642

0 commit comments

Comments
 (0)