Skip to content

Commit 447f31a

Browse files
authored
[ML] Implements an absolute goodness-of-fit test to accept a change (#21)
This implements an absolute "goodness-of-fit" test for each change, by additionally testing a change versus its expected BIC given the residual distribution. It means we will only accept changes if they are a reasonably accurate description of the change currently occurring in the time series.
1 parent 893f0b2 commit 447f31a

File tree

2 files changed

+136
-35
lines changed

2 files changed

+136
-35
lines changed

include/maths/CTimeSeriesChangeDetector.h

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ class MATHS_EXPORT CUnivariateTimeSeriesChangeDetector
9090
const TPriorPtr &residualModel,
9191
core_t::TTime minimumTimeToDetect = 6 * core::constants::HOUR,
9292
core_t::TTime maximumTimeToDetect = core::constants::DAY,
93-
double minimumDeltaBicToDetect = 12.0);
93+
double minimumDeltaBicToDetect = 14.0);
9494

9595
//! Initialize by reading state from \p traverser.
9696
bool acceptRestoreTraverser(const SModelRestoreParams &params,
@@ -182,6 +182,9 @@ class MATHS_EXPORT CUnivariateChangeModel : private core::CNonCopyable
182182
//! The BIC of applying the change.
183183
virtual double bic() const = 0;
184184

185+
//! The expected BIC of applying the change.
186+
virtual double expectedBic() const = 0;
187+
185188
//! Get a description of the change.
186189
virtual TOptionalChangeDescription change() const = 0;
187190

@@ -214,10 +217,14 @@ class MATHS_EXPORT CUnivariateChangeModel : private core::CNonCopyable
214217

215218
//! Get the log-likelihood.
216219
double logLikelihood() const;
217-
218220
//! Update the data log-likelihood with \p logLikelihood.
219221
void addLogLikelihood(double logLikelihood);
220222

223+
//! Get the expected log-likelihood.
224+
double expectedLogLikelihood() const;
225+
//! Update the expected data log-likelihood with \p logLikelihood.
226+
void addExpectedLogLikelihood(double logLikelihood);
227+
221228
//! Get the time series trend model.
222229
const CTimeSeriesDecompositionInterface &trendModel() const;
223230
//! Get the time series trend model.
@@ -234,6 +241,9 @@ class MATHS_EXPORT CUnivariateChangeModel : private core::CNonCopyable
234241
//! The likelihood of the data under this model.
235242
double m_LogLikelihood;
236243

244+
//! The expected log-likelihood of the data under this model.
245+
double m_ExpectedLogLikelihood;
246+
237247
//! A model decomposing the time series trend.
238248
TDecompositionPtr m_TrendModel;
239249

@@ -258,6 +268,9 @@ class MATHS_EXPORT CUnivariateNoChangeModel final : public CUnivariateChangeMode
258268
//! Returns the no change BIC.
259269
virtual double bic() const;
260270

271+
//! The expected BIC of applying the change.
272+
virtual double expectedBic() const;
273+
261274
//! Returns a null object.
262275
virtual TOptionalChangeDescription change() const;
263276

@@ -292,6 +305,9 @@ class MATHS_EXPORT CUnivariateLevelShiftModel final : public CUnivariateChangeMo
292305
//! The BIC of applying the level shift.
293306
virtual double bic() const;
294307

308+
//! The expected BIC of applying the change.
309+
virtual double expectedBic() const;
310+
295311
//! Get a description of the level shift.
296312
virtual TOptionalChangeDescription change() const;
297313

@@ -341,6 +357,9 @@ class MATHS_EXPORT CUnivariateTimeShiftModel final : public CUnivariateChangeMod
341357
//! The BIC of applying the time shift.
342358
virtual double bic() const;
343359

360+
//! The expected BIC of applying the change.
361+
virtual double expectedBic() const;
362+
344363
//! Get a description of the time shift.
345364
virtual TOptionalChangeDescription change() const;
346365

lib/maths/CTimeSeriesChangeDetector.cc

Lines changed: 115 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
#include <maths/CBasicStatisticsPersist.h>
1818
#include <maths/CChecksum.h>
1919
#include <maths/CPrior.h>
20+
#include <maths/CPriorDetail.h>
2021
#include <maths/CPriorStateSerialiser.h>
2122
#include <maths/CRestoreParams.h>
2223
#include <maths/CTimeSeriesModel.h>
@@ -41,7 +42,6 @@ using TDouble1Vec = core::CSmallVector<double, 1>;
4142
using TDouble4Vec = core::CSmallVector<double, 4>;
4243
using TDouble4Vec1Vec = core::CSmallVector<TDouble4Vec, 1>;
4344
using TOptionalChangeDescription = CUnivariateTimeSeriesChangeDetector::TOptionalChangeDescription;
44-
4545
const std::string MINIMUM_TIME_TO_DETECT{"a"};
4646
const std::string MAXIMUM_TIME_TO_DETECT{"b"};
4747
const std::string MINIMUM_DELTA_BIC_TO_DETECT{"c"};
@@ -52,9 +52,12 @@ const std::string MIN_TIME_TAG{"g"};
5252
const std::string MAX_TIME_TAG{"h"};
5353
const std::string CHANGE_MODEL_TAG{"i"};
5454
const std::string LOG_LIKELIHOOD_TAG{"j"};
55-
const std::string SHIFT_TAG{"k"};
56-
const std::string TREND_MODEL_TAG{"l"};
57-
const std::string RESIDUAL_MODEL_TAG{"m"};
55+
const std::string EXPECTED_LOG_LIKELIHOOD_TAG{"k"};
56+
const std::string SHIFT_TAG{"l"};
57+
const std::string TREND_MODEL_TAG{"m"};
58+
const std::string RESIDUAL_MODEL_TAG{"n"};
59+
const std::size_t EXPECTED_LOG_LIKELIHOOD_NUMBER_INTERVALS{4u};
60+
const double EXPECTED_EVIDENCE_THRESHOLD_MULTIPLIER{0.9};
5861
}
5962

6063
SChangeDescription::SChangeDescription(EDescription description,
@@ -160,12 +163,25 @@ TOptionalChangeDescription CUnivariateTimeSeriesChangeDetector::change()
160163

161164
double evidences[]{noChangeBic - candidates[0].first,
162165
noChangeBic - candidates[1].first};
163-
m_CurrentEvidenceOfChange = evidences[0];
164-
if ( evidences[0] > m_MinimumDeltaBicToDetect
165-
&& evidences[0] > evidences[1] + m_MinimumDeltaBicToDetect / 2.0)
166+
double expectedEvidence{noChangeBic - (*candidates[0].second)->expectedBic()};
167+
168+
double x[]{evidences[0] / m_MinimumDeltaBicToDetect,
169+
2.0 * (evidences[0] - evidences[1]) / m_MinimumDeltaBicToDetect,
170+
evidences[0] / EXPECTED_EVIDENCE_THRESHOLD_MULTIPLIER / expectedEvidence,
171+
static_cast<double>(m_TimeRange.range() - m_MinimumTimeToDetect)
172+
/ static_cast<double>(m_MaximumTimeToDetect - m_MinimumTimeToDetect)};
173+
double p{ CTools::logisticFunction(x[0], 0.05, 1.0)
174+
* CTools::logisticFunction(x[1], 0.1, 1.0)
175+
* (x[2] < 0.0 ? 1.0 : CTools::logisticFunction(x[2], 0.2, 1.0))
176+
* (0.5 + CTools::logisticFunction(x[3], 0.2, 0.5))};
177+
LOG_TRACE("p = " << p);
178+
179+
if (p > 0.0625/*= std::pow(0.5, 4.0)*/)
166180
{
167181
return (*candidates[0].second)->change();
168182
}
183+
184+
m_CurrentEvidenceOfChange = evidences[0];
169185
}
170186
return TOptionalChangeDescription();
171187
}
@@ -227,9 +243,34 @@ namespace time_series_change_detector_detail
227243

228244
CUnivariateChangeModel::CUnivariateChangeModel(const TDecompositionPtr &trendModel,
229245
const TPriorPtr &residualModel) :
230-
m_LogLikelihood{0.0}, m_TrendModel{trendModel}, m_ResidualModel{residualModel}
246+
m_LogLikelihood{0.0}, m_ExpectedLogLikelihood{0.0},
247+
m_TrendModel{trendModel}, m_ResidualModel{residualModel}
231248
{}
232249

250+
bool CUnivariateChangeModel::acceptRestoreTraverser(const SModelRestoreParams &/*params*/,
251+
core::CStateRestoreTraverser &traverser)
252+
{
253+
do
254+
{
255+
const std::string name{traverser.name()};
256+
RESTORE_BUILT_IN(LOG_LIKELIHOOD_TAG, m_LogLikelihood);
257+
RESTORE_BUILT_IN(EXPECTED_LOG_LIKELIHOOD_TAG, m_ExpectedLogLikelihood);
258+
return true;
259+
}
260+
while (traverser.next());
261+
return true;
262+
}
263+
264+
void CUnivariateChangeModel::acceptPersistInserter(core::CStatePersistInserter &inserter) const
265+
{
266+
inserter.insertValue(LOG_LIKELIHOOD_TAG,
267+
m_LogLikelihood,
268+
core::CIEEE754::E_SinglePrecision);
269+
inserter.insertValue(EXPECTED_LOG_LIKELIHOOD_TAG,
270+
m_ExpectedLogLikelihood,
271+
core::CIEEE754::E_SinglePrecision);
272+
}
273+
233274
void CUnivariateChangeModel::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const
234275
{
235276
// Note if the trend and residual models are shallow copied their
@@ -249,6 +290,7 @@ std::size_t CUnivariateChangeModel::memoryUsage() const
249290
uint64_t CUnivariateChangeModel::checksum(uint64_t seed) const
250291
{
251292
seed = CChecksum::calculate(seed, m_LogLikelihood);
293+
seed = CChecksum::calculate(seed, m_ExpectedLogLikelihood);
252294
seed = CChecksum::calculate(seed, m_TrendModel);
253295
return CChecksum::calculate(seed, m_ResidualModel);
254296
}
@@ -271,6 +313,16 @@ void CUnivariateChangeModel::addLogLikelihood(double logLikelihood)
271313
m_LogLikelihood += logLikelihood;
272314
}
273315

316+
double CUnivariateChangeModel::expectedLogLikelihood() const
317+
{
318+
return m_ExpectedLogLikelihood;
319+
}
320+
321+
void CUnivariateChangeModel::addExpectedLogLikelihood(double logLikelihood)
322+
{
323+
m_ExpectedLogLikelihood += logLikelihood;
324+
}
325+
274326
const CTimeSeriesDecompositionInterface &CUnivariateChangeModel::trendModel() const
275327
{
276328
return *m_TrendModel;
@@ -301,31 +353,29 @@ CUnivariateNoChangeModel::CUnivariateNoChangeModel(const TDecompositionPtr &tren
301353
CUnivariateChangeModel{trendModel, residualModel}
302354
{}
303355

304-
bool CUnivariateNoChangeModel::acceptRestoreTraverser(const SModelRestoreParams &/*params*/,
356+
bool CUnivariateNoChangeModel::acceptRestoreTraverser(const SModelRestoreParams &params,
305357
core::CStateRestoreTraverser &traverser)
306358
{
307-
do
308-
{
309-
const std::string name{traverser.name()};
310-
RESTORE_SETUP_TEARDOWN(LOG_LIKELIHOOD_TAG,
311-
double logLikelihood,
312-
core::CStringUtils::stringToType(traverser.value(), logLikelihood),
313-
this->addLogLikelihood(logLikelihood))
314-
}
315-
while (traverser.next());
316-
return true;
359+
return this->CUnivariateChangeModel::acceptRestoreTraverser(params, traverser);
317360
}
318361

319362
void CUnivariateNoChangeModel::acceptPersistInserter(core::CStatePersistInserter &inserter) const
320363
{
321-
inserter.insertValue(LOG_LIKELIHOOD_TAG, this->logLikelihood());
364+
this->CUnivariateChangeModel::acceptPersistInserter(inserter);
322365
}
323366

324367
double CUnivariateNoChangeModel::bic() const
325368
{
326369
return -2.0 * this->logLikelihood();
327370
}
328371

372+
double CUnivariateNoChangeModel::expectedBic() const
373+
{
374+
// This is irrelevant since this is only used for deciding
375+
// whether to accept a change.
376+
return this->bic();
377+
}
378+
329379
TOptionalChangeDescription CUnivariateNoChangeModel::change() const
330380
{
331381
return TOptionalChangeDescription();
@@ -348,7 +398,7 @@ void CUnivariateNoChangeModel::addSamples(std::size_t count,
348398
samples.push_back(this->trendModel().detrend(sample.first, sample.second, 0.0));
349399
}
350400

351-
double logLikelihood;
401+
double logLikelihood{0.0};
352402
if (this->residualModel().jointLogMarginalLikelihood(weightStyles, samples, weights,
353403
logLikelihood) == maths_t::E_FpNoErrors)
354404
{
@@ -377,13 +427,13 @@ CUnivariateLevelShiftModel::CUnivariateLevelShiftModel(const TDecompositionPtr &
377427
bool CUnivariateLevelShiftModel::acceptRestoreTraverser(const SModelRestoreParams &params,
378428
core::CStateRestoreTraverser &traverser)
379429
{
430+
if (this->CUnivariateChangeModel::acceptRestoreTraverser(params, traverser) == false)
431+
{
432+
return false;
433+
}
380434
do
381435
{
382436
const std::string name{traverser.name()};
383-
RESTORE_SETUP_TEARDOWN(LOG_LIKELIHOOD_TAG,
384-
double logLikelihood,
385-
core::CStringUtils::stringToType(traverser.value(), logLikelihood),
386-
this->addLogLikelihood(logLikelihood))
387437
RESTORE(SHIFT_TAG, m_Shift.fromDelimited(traverser.value()))
388438
RESTORE_BUILT_IN(RESIDUAL_MODEL_MODE_TAG, m_ResidualModelMode)
389439
RESTORE_BUILT_IN(SAMPLE_COUNT_TAG, m_SampleCount)
@@ -395,7 +445,7 @@ bool CUnivariateLevelShiftModel::acceptRestoreTraverser(const SModelRestoreParam
395445

396446
void CUnivariateLevelShiftModel::acceptPersistInserter(core::CStatePersistInserter &inserter) const
397447
{
398-
inserter.insertValue(LOG_LIKELIHOOD_TAG, this->logLikelihood());
448+
this->CUnivariateChangeModel::acceptPersistInserter(inserter);
399449
inserter.insertValue(SHIFT_TAG, m_Shift.toDelimited());
400450
inserter.insertValue(SAMPLE_COUNT_TAG, m_SampleCount);
401451
inserter.insertLevel(RESIDUAL_MODEL_TAG, boost::bind<void>(CPriorStateSerialiser(),
@@ -407,6 +457,11 @@ double CUnivariateLevelShiftModel::bic() const
407457
return -2.0 * this->logLikelihood() + std::log(m_SampleCount);
408458
}
409459

460+
double CUnivariateLevelShiftModel::expectedBic() const
461+
{
462+
return -2.0 * this->expectedLogLikelihood() + std::log(m_SampleCount);
463+
}
464+
410465
TOptionalChangeDescription CUnivariateLevelShiftModel::change() const
411466
{
412467
// The "magic" 0.9 is due to the fact that the trend is updated
@@ -456,12 +511,24 @@ void CUnivariateLevelShiftModel::addSamples(std::size_t count,
456511
residualModel.addSamples(weightStyles, samples, weights);
457512
residualModel.propagateForwardsByTime(1.0);
458513

459-
double logLikelihood;
514+
double logLikelihood{0.0};
460515
if (residualModel.jointLogMarginalLikelihood(weightStyles, samples, weights,
461516
logLikelihood) == maths_t::E_FpNoErrors)
462517
{
463518
this->addLogLikelihood(logLikelihood);
464519
}
520+
for (const auto &weight : weights)
521+
{
522+
double expectedLogLikelihood{0.0};
523+
TDouble4Vec1Vec weight_{weight};
524+
if (residualModel.expectation(maths::CPrior::CLogMarginalLikelihood{
525+
residualModel, weightStyles, weight_},
526+
EXPECTED_LOG_LIKELIHOOD_NUMBER_INTERVALS,
527+
expectedLogLikelihood, weightStyles, weight))
528+
{
529+
this->addExpectedLogLikelihood(expectedLogLikelihood);
530+
}
531+
}
465532
}
466533
}
467534

@@ -487,13 +554,13 @@ CUnivariateTimeShiftModel::CUnivariateTimeShiftModel(const TDecompositionPtr &tr
487554
bool CUnivariateTimeShiftModel::acceptRestoreTraverser(const SModelRestoreParams &params,
488555
core::CStateRestoreTraverser &traverser)
489556
{
557+
if (this->CUnivariateChangeModel::acceptRestoreTraverser(params, traverser) == false)
558+
{
559+
return false;
560+
}
490561
do
491562
{
492563
const std::string name{traverser.name()};
493-
RESTORE_SETUP_TEARDOWN(LOG_LIKELIHOOD_TAG,
494-
double logLikelihood,
495-
core::CStringUtils::stringToType(traverser.value(), logLikelihood),
496-
this->addLogLikelihood(logLikelihood))
497564
RESTORE(RESIDUAL_MODEL_TAG, this->restoreResidualModel(params.s_DistributionParams, traverser))
498565
}
499566
while (traverser.next());
@@ -502,7 +569,7 @@ bool CUnivariateTimeShiftModel::acceptRestoreTraverser(const SModelRestoreParams
502569

503570
void CUnivariateTimeShiftModel::acceptPersistInserter(core::CStatePersistInserter &inserter) const
504571
{
505-
inserter.insertValue(LOG_LIKELIHOOD_TAG, this->logLikelihood());
572+
this->CUnivariateChangeModel::acceptPersistInserter(inserter);
506573
inserter.insertLevel(RESIDUAL_MODEL_TAG, boost::bind<void>(CPriorStateSerialiser(),
507574
boost::cref(this->residualModel()), _1));
508575
}
@@ -512,6 +579,11 @@ double CUnivariateTimeShiftModel::bic() const
512579
return -2.0 * this->logLikelihood();
513580
}
514581

582+
double CUnivariateTimeShiftModel::expectedBic() const
583+
{
584+
return -2.0 * this->expectedLogLikelihood();
585+
}
586+
515587
TOptionalChangeDescription CUnivariateTimeShiftModel::change() const
516588
{
517589
return SChangeDescription{SChangeDescription::E_TimeShift,
@@ -540,12 +612,22 @@ void CUnivariateTimeShiftModel::addSamples(std::size_t count,
540612
residualModel.addSamples(weightStyles, samples, weights);
541613
residualModel.propagateForwardsByTime(1.0);
542614

543-
double logLikelihood;
615+
double logLikelihood{0.0};
544616
if (residualModel.jointLogMarginalLikelihood(weightStyles, samples, weights,
545617
logLikelihood) == maths_t::E_FpNoErrors)
546618
{
547619
this->addLogLikelihood(logLikelihood);
548620
}
621+
for (const auto &weight : weights)
622+
{
623+
double expectedLogLikelihood{0.0};
624+
TDouble4Vec1Vec weight_{weight};
625+
residualModel.expectation(maths::CPrior::CLogMarginalLikelihood{
626+
residualModel, weightStyles, weight_},
627+
EXPECTED_LOG_LIKELIHOOD_NUMBER_INTERVALS,
628+
expectedLogLikelihood, weightStyles, weight);
629+
this->addExpectedLogLikelihood(expectedLogLikelihood);
630+
}
549631
}
550632
}
551633

0 commit comments

Comments
 (0)