Skip to content

[ML] Implements an absolute goodness-of-fit test to accept a change #21

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Mar 26, 2018
23 changes: 21 additions & 2 deletions include/maths/CTimeSeriesChangeDetector.h
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ class MATHS_EXPORT CUnivariateTimeSeriesChangeDetector
const TPriorPtr &residualModel,
core_t::TTime minimumTimeToDetect = 6 * core::constants::HOUR,
core_t::TTime maximumTimeToDetect = core::constants::DAY,
double minimumDeltaBicToDetect = 12.0);
double minimumDeltaBicToDetect = 14.0);

//! Initialize by reading state from \p traverser.
bool acceptRestoreTraverser(const SModelRestoreParams &params,
Expand Down Expand Up @@ -191,6 +191,9 @@ class MATHS_EXPORT CUnivariateChangeModel : private core::CNonCopyable
//! The BIC of applying the change.
virtual double bic() const = 0;

//! The expected BIC of applying the change.
virtual double expectedBic() const = 0;

//! Get a description of the change.
virtual TOptionalChangeDescription change() const = 0;

Expand Down Expand Up @@ -223,10 +226,14 @@ class MATHS_EXPORT CUnivariateChangeModel : private core::CNonCopyable

//! Get the log-likelihood.
double logLikelihood() const;

//! Update the data log-likelihood with \p logLikelihood.
void addLogLikelihood(double logLikelihood);

//! Get the expected log-likelihood.
double expectedLogLikelihood() const;
//! Update the expected data log-likelihood with \p logLikelihood.
void addExpectedLogLikelihood(double logLikelihood);

//! Get the time series trend model.
const CTimeSeriesDecompositionInterface &trendModel() const;
//! Get the time series trend model.
Expand All @@ -243,6 +250,9 @@ class MATHS_EXPORT CUnivariateChangeModel : private core::CNonCopyable
//! The likelihood of the data under this model.
double m_LogLikelihood;

//! The expected log-likelihood of the data under this model.
double m_ExpectedLogLikelihood;

//! A model decomposing the time series trend.
TDecompositionPtr m_TrendModel;

Expand All @@ -267,6 +277,9 @@ class MATHS_EXPORT CUnivariateNoChangeModel final : public CUnivariateChangeMode
//! Returns the no change BIC.
virtual double bic() const;

//! The expected BIC of applying the change.
virtual double expectedBic() const;

//! Returns a null object.
virtual TOptionalChangeDescription change() const;

Expand Down Expand Up @@ -301,6 +314,9 @@ class MATHS_EXPORT CUnivariateLevelShiftModel final : public CUnivariateChangeMo
//! The BIC of applying the level shift.
virtual double bic() const;

//! The expected BIC of applying the change.
virtual double expectedBic() const;

//! Get a description of the level shift.
virtual TOptionalChangeDescription change() const;

Expand Down Expand Up @@ -350,6 +366,9 @@ class MATHS_EXPORT CUnivariateTimeShiftModel final : public CUnivariateChangeMod
//! The BIC of applying the time shift.
virtual double bic() const;

//! The expected BIC of applying the change.
virtual double expectedBic() const;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Going forward I think it would be best practice to use the 'override' keyword in cases such as this.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Visual Studio 2013 doesn't support override, so bear in mind this will create a backporting headache. I agree for 7.0-only code we should use it though.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I noticed using override on one method generates warnings for every other virtual function that is not marked override (-Winconsistent-missing-override) in the compilation unit. It should probably be done in a single commit

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm inclined to agree. I think this is something we should adopt, but also let's make this in a single change and target at 7.0 only as there would be no need to back port.


//! Get a description of the time shift.
virtual TOptionalChangeDescription change() const;

Expand Down
148 changes: 115 additions & 33 deletions lib/maths/CTimeSeriesChangeDetector.cc
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
#include <maths/CBasicStatisticsPersist.h>
#include <maths/CChecksum.h>
#include <maths/CPrior.h>
#include <maths/CPriorDetail.h>
#include <maths/CPriorStateSerialiser.h>
#include <maths/CRestoreParams.h>
#include <maths/CTimeSeriesModel.h>
Expand All @@ -50,7 +51,6 @@ using TDouble1Vec = core::CSmallVector<double, 1>;
using TDouble4Vec = core::CSmallVector<double, 4>;
using TDouble4Vec1Vec = core::CSmallVector<TDouble4Vec, 1>;
using TOptionalChangeDescription = CUnivariateTimeSeriesChangeDetector::TOptionalChangeDescription;

const std::string MINIMUM_TIME_TO_DETECT{"a"};
const std::string MAXIMUM_TIME_TO_DETECT{"b"};
const std::string MINIMUM_DELTA_BIC_TO_DETECT{"c"};
Expand All @@ -61,9 +61,12 @@ const std::string MIN_TIME_TAG{"g"};
const std::string MAX_TIME_TAG{"h"};
const std::string CHANGE_MODEL_TAG{"i"};
const std::string LOG_LIKELIHOOD_TAG{"j"};
const std::string SHIFT_TAG{"k"};
const std::string TREND_MODEL_TAG{"l"};
const std::string RESIDUAL_MODEL_TAG{"m"};
const std::string EXPECTED_LOG_LIKELIHOOD_TAG{"k"};
const std::string SHIFT_TAG{"l"};
const std::string TREND_MODEL_TAG{"m"};
const std::string RESIDUAL_MODEL_TAG{"n"};
const std::size_t EXPECTED_LOG_LIKELIHOOD_NUMBER_INTERVALS{4u};
const double EXPECTED_EVIDENCE_THRESHOLD_MULTIPLIER{0.9};
}

SChangeDescription::SChangeDescription(EDescription description,
Expand Down Expand Up @@ -169,12 +172,25 @@ TOptionalChangeDescription CUnivariateTimeSeriesChangeDetector::change()

double evidences[]{noChangeBic - candidates[0].first,
noChangeBic - candidates[1].first};
m_CurrentEvidenceOfChange = evidences[0];
if ( evidences[0] > m_MinimumDeltaBicToDetect
&& evidences[0] > evidences[1] + m_MinimumDeltaBicToDetect / 2.0)
double expectedEvidence{noChangeBic - (*candidates[0].second)->expectedBic()};

double x[]{evidences[0] / m_MinimumDeltaBicToDetect,
2.0 * (evidences[0] - evidences[1]) / m_MinimumDeltaBicToDetect,
evidences[0] / EXPECTED_EVIDENCE_THRESHOLD_MULTIPLIER / expectedEvidence,
static_cast<double>(m_TimeRange.range() - m_MinimumTimeToDetect)
/ static_cast<double>(m_MaximumTimeToDetect - m_MinimumTimeToDetect)};
double p{ CTools::logisticFunction(x[0], 0.05, 1.0)
* CTools::logisticFunction(x[1], 0.1, 1.0)
* (x[2] < 0.0 ? 1.0 : CTools::logisticFunction(x[2], 0.2, 1.0))
* (0.5 + CTools::logisticFunction(x[3], 0.2, 0.5))};
LOG_TRACE("p = " << p);

if (p > 0.0625/*= std::pow(0.5, 4.0)*/)
{
return (*candidates[0].second)->change();
}

m_CurrentEvidenceOfChange = evidences[0];
}
return TOptionalChangeDescription();
}
Expand Down Expand Up @@ -236,9 +252,34 @@ namespace time_series_change_detector_detail

CUnivariateChangeModel::CUnivariateChangeModel(const TDecompositionPtr &trendModel,
const TPriorPtr &residualModel) :
m_LogLikelihood{0.0}, m_TrendModel{trendModel}, m_ResidualModel{residualModel}
m_LogLikelihood{0.0}, m_ExpectedLogLikelihood{0.0},
m_TrendModel{trendModel}, m_ResidualModel{residualModel}
{}

bool CUnivariateChangeModel::acceptRestoreTraverser(const SModelRestoreParams &/*params*/,
core::CStateRestoreTraverser &traverser)
{
do
{
const std::string name{traverser.name()};
RESTORE_BUILT_IN(LOG_LIKELIHOOD_TAG, m_LogLikelihood);
RESTORE_BUILT_IN(EXPECTED_LOG_LIKELIHOOD_TAG, m_ExpectedLogLikelihood);
return true;
}
while (traverser.next());
return true;
}

void CUnivariateChangeModel::acceptPersistInserter(core::CStatePersistInserter &inserter) const
{
inserter.insertValue(LOG_LIKELIHOOD_TAG,
m_LogLikelihood,
core::CIEEE754::E_SinglePrecision);
inserter.insertValue(EXPECTED_LOG_LIKELIHOOD_TAG,
m_ExpectedLogLikelihood,
core::CIEEE754::E_SinglePrecision);
}

void CUnivariateChangeModel::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const
{
// Note if the trend and residual models are shallow copied their
Expand All @@ -258,6 +299,7 @@ std::size_t CUnivariateChangeModel::memoryUsage() const
uint64_t CUnivariateChangeModel::checksum(uint64_t seed) const
{
seed = CChecksum::calculate(seed, m_LogLikelihood);
seed = CChecksum::calculate(seed, m_ExpectedLogLikelihood);
seed = CChecksum::calculate(seed, m_TrendModel);
return CChecksum::calculate(seed, m_ResidualModel);
}
Expand All @@ -280,6 +322,16 @@ void CUnivariateChangeModel::addLogLikelihood(double logLikelihood)
m_LogLikelihood += logLikelihood;
}

double CUnivariateChangeModel::expectedLogLikelihood() const
{
return m_ExpectedLogLikelihood;
}

void CUnivariateChangeModel::addExpectedLogLikelihood(double logLikelihood)
{
m_ExpectedLogLikelihood += logLikelihood;
}

const CTimeSeriesDecompositionInterface &CUnivariateChangeModel::trendModel() const
{
return *m_TrendModel;
Expand Down Expand Up @@ -310,31 +362,29 @@ CUnivariateNoChangeModel::CUnivariateNoChangeModel(const TDecompositionPtr &tren
CUnivariateChangeModel{trendModel, residualModel}
{}

bool CUnivariateNoChangeModel::acceptRestoreTraverser(const SModelRestoreParams &/*params*/,
bool CUnivariateNoChangeModel::acceptRestoreTraverser(const SModelRestoreParams &params,
core::CStateRestoreTraverser &traverser)
{
do
{
const std::string name{traverser.name()};
RESTORE_SETUP_TEARDOWN(LOG_LIKELIHOOD_TAG,
double logLikelihood,
core::CStringUtils::stringToType(traverser.value(), logLikelihood),
this->addLogLikelihood(logLikelihood))
}
while (traverser.next());
return true;
return this->CUnivariateChangeModel::acceptRestoreTraverser(params, traverser);
}

void CUnivariateNoChangeModel::acceptPersistInserter(core::CStatePersistInserter &inserter) const
{
inserter.insertValue(LOG_LIKELIHOOD_TAG, this->logLikelihood());
this->CUnivariateChangeModel::acceptPersistInserter(inserter);
}

double CUnivariateNoChangeModel::bic() const
{
return -2.0 * this->logLikelihood();
}

double CUnivariateNoChangeModel::expectedBic() const
{
// This is irrelevant since this is only used for deciding
// whether to accept a change.
return this->bic();
}

TOptionalChangeDescription CUnivariateNoChangeModel::change() const
{
return TOptionalChangeDescription();
Expand All @@ -357,7 +407,7 @@ void CUnivariateNoChangeModel::addSamples(std::size_t count,
samples.push_back(this->trendModel().detrend(sample.first, sample.second, 0.0));
}

double logLikelihood;
double logLikelihood{0.0};
if (this->residualModel().jointLogMarginalLikelihood(weightStyles, samples, weights,
logLikelihood) == maths_t::E_FpNoErrors)
{
Expand Down Expand Up @@ -386,13 +436,13 @@ CUnivariateLevelShiftModel::CUnivariateLevelShiftModel(const TDecompositionPtr &
bool CUnivariateLevelShiftModel::acceptRestoreTraverser(const SModelRestoreParams &params,
core::CStateRestoreTraverser &traverser)
{
if (this->CUnivariateChangeModel::acceptRestoreTraverser(params, traverser) == false)
{
return false;
}
do
{
const std::string name{traverser.name()};
RESTORE_SETUP_TEARDOWN(LOG_LIKELIHOOD_TAG,
double logLikelihood,
core::CStringUtils::stringToType(traverser.value(), logLikelihood),
this->addLogLikelihood(logLikelihood))
RESTORE(SHIFT_TAG, m_Shift.fromDelimited(traverser.value()))
RESTORE_BUILT_IN(RESIDUAL_MODEL_MODE_TAG, m_ResidualModelMode)
RESTORE_BUILT_IN(SAMPLE_COUNT_TAG, m_SampleCount)
Expand All @@ -404,7 +454,7 @@ bool CUnivariateLevelShiftModel::acceptRestoreTraverser(const SModelRestoreParam

void CUnivariateLevelShiftModel::acceptPersistInserter(core::CStatePersistInserter &inserter) const
{
inserter.insertValue(LOG_LIKELIHOOD_TAG, this->logLikelihood());
this->CUnivariateChangeModel::acceptPersistInserter(inserter);
inserter.insertValue(SHIFT_TAG, m_Shift.toDelimited());
inserter.insertValue(SAMPLE_COUNT_TAG, m_SampleCount);
inserter.insertLevel(RESIDUAL_MODEL_TAG, boost::bind<void>(CPriorStateSerialiser(),
Expand All @@ -416,6 +466,11 @@ double CUnivariateLevelShiftModel::bic() const
return -2.0 * this->logLikelihood() + std::log(m_SampleCount);
}

double CUnivariateLevelShiftModel::expectedBic() const
{
return -2.0 * this->expectedLogLikelihood() + std::log(m_SampleCount);
}

TOptionalChangeDescription CUnivariateLevelShiftModel::change() const
{
// The "magic" 0.9 is due to the fact that the trend is updated
Expand Down Expand Up @@ -465,12 +520,24 @@ void CUnivariateLevelShiftModel::addSamples(std::size_t count,
residualModel.addSamples(weightStyles, samples, weights);
residualModel.propagateForwardsByTime(1.0);

double logLikelihood;
double logLikelihood{0.0};
if (residualModel.jointLogMarginalLikelihood(weightStyles, samples, weights,
logLikelihood) == maths_t::E_FpNoErrors)
{
this->addLogLikelihood(logLikelihood);
}
for (const auto &weight : weights)
{
double expectedLogLikelihood{0.0};
TDouble4Vec1Vec weight_{weight};
if (residualModel.expectation(maths::CPrior::CLogMarginalLikelihood{
residualModel, weightStyles, weight_},
EXPECTED_LOG_LIKELIHOOD_NUMBER_INTERVALS,
expectedLogLikelihood, weightStyles, weight))
{
this->addExpectedLogLikelihood(expectedLogLikelihood);
}
}
}
}

Expand All @@ -496,13 +563,13 @@ CUnivariateTimeShiftModel::CUnivariateTimeShiftModel(const TDecompositionPtr &tr
bool CUnivariateTimeShiftModel::acceptRestoreTraverser(const SModelRestoreParams &params,
core::CStateRestoreTraverser &traverser)
{
if (this->CUnivariateChangeModel::acceptRestoreTraverser(params, traverser) == false)
{
return false;
}
do
{
const std::string name{traverser.name()};
RESTORE_SETUP_TEARDOWN(LOG_LIKELIHOOD_TAG,
double logLikelihood,
core::CStringUtils::stringToType(traverser.value(), logLikelihood),
this->addLogLikelihood(logLikelihood))
RESTORE(RESIDUAL_MODEL_TAG, this->restoreResidualModel(params.s_DistributionParams, traverser))
}
while (traverser.next());
Expand All @@ -511,7 +578,7 @@ bool CUnivariateTimeShiftModel::acceptRestoreTraverser(const SModelRestoreParams

void CUnivariateTimeShiftModel::acceptPersistInserter(core::CStatePersistInserter &inserter) const
{
inserter.insertValue(LOG_LIKELIHOOD_TAG, this->logLikelihood());
this->CUnivariateChangeModel::acceptPersistInserter(inserter);
inserter.insertLevel(RESIDUAL_MODEL_TAG, boost::bind<void>(CPriorStateSerialiser(),
boost::cref(this->residualModel()), _1));
}
Expand All @@ -521,6 +588,11 @@ double CUnivariateTimeShiftModel::bic() const
return -2.0 * this->logLikelihood();
}

double CUnivariateTimeShiftModel::expectedBic() const
{
return -2.0 * this->expectedLogLikelihood();
}

TOptionalChangeDescription CUnivariateTimeShiftModel::change() const
{
return SChangeDescription{SChangeDescription::E_TimeShift,
Expand Down Expand Up @@ -549,12 +621,22 @@ void CUnivariateTimeShiftModel::addSamples(std::size_t count,
residualModel.addSamples(weightStyles, samples, weights);
residualModel.propagateForwardsByTime(1.0);

double logLikelihood;
double logLikelihood{0.0};
if (residualModel.jointLogMarginalLikelihood(weightStyles, samples, weights,
logLikelihood) == maths_t::E_FpNoErrors)
{
this->addLogLikelihood(logLikelihood);
}
for (const auto &weight : weights)
{
double expectedLogLikelihood{0.0};
TDouble4Vec1Vec weight_{weight};
residualModel.expectation(maths::CPrior::CLogMarginalLikelihood{
residualModel, weightStyles, weight_},
EXPECTED_LOG_LIKELIHOOD_NUMBER_INTERVALS,
expectedLogLikelihood, weightStyles, weight);
this->addExpectedLogLikelihood(expectedLogLikelihood);
}
}
}

Expand Down
2 changes: 1 addition & 1 deletion lib/maths/CTimeSeriesModel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2844,7 +2844,7 @@ const CMultivariateTimeSeriesModel::TTimeDouble2VecPrCBuf &CMultivariateTimeSeri
return m_SlidingWindow;
}

const CMultivariateTimeSeriesModel::TDecompositionPtr10Vec &CMultivariateTimeSeriesModel::trend(void) const
const CMultivariateTimeSeriesModel::TDecompositionPtr10Vec &CMultivariateTimeSeriesModel::trendModel(void) const
{
return m_TrendModel;
}
Expand Down