Skip to content

Commit ff2fb61

Browse files
committed
Guard against the case there are insufficient populated values to estimate variance in tests for periodic components (#13)
This was triggering a boost exception to be thrown and spamming the logs. In all such cases, we have too little data to correctly determine if such a periodic component is present and so can correctly exit early.
1 parent eb300d4 commit ff2fb61

File tree

1 file changed

+83
-77
lines changed

1 file changed

+83
-77
lines changed

lib/maths/CPeriodicityHypothesisTests.cc

Lines changed: 83 additions & 77 deletions
Original file line numberDiff line numberDiff line change
@@ -1292,7 +1292,7 @@ bool CPeriodicityHypothesisTests::testStatisticsFor(const TFloatMeanAccumulatorC
12921292
return false;
12931293
}
12941294

1295-
LOG_TRACE("populated = " << 100.0 * populated << "%");
1295+
LOG_TRACE("populated = " << 100.0 * populated / static_cast<double>(buckets.size()) << "%");
12961296

12971297
stats.s_Range = range.max() - range.min();
12981298
stats.s_B = populated;
@@ -1466,65 +1466,68 @@ bool CPeriodicityHypothesisTests::testPeriod(const TTimeTimePr2Vec &windows,
14661466
LOG_TRACE(" populated = " << b);
14671467

14681468
double df1{B - b};
1469-
double v1{varianceAtPercentile(residualVariance<double>(trend, scale), df1,
1470-
50.0 + CONFIDENCE_INTERVAL / 2.0)};
1471-
LOG_TRACE(" variance = " << v1);
1472-
LOG_TRACE(" varianceThreshold = " << vt);
1473-
LOG_TRACE(" significance = " << CStatisticalTests::leftTailFTest(v1 / v0, df1, df0));
1474-
1475-
double Rt{stats.s_Rt * CTools::truncate(1.0 - 0.5 * (vt - v1) / vt, 0.9, 1.0)};
1476-
if (v1 < vt && CStatisticalTests::leftTailFTest(v1 / v0, df1, df0) <= MAXIMUM_SIGNIFICANCE)
1477-
{
1478-
double R{CSignal::autocorrelation(period, values)};
1479-
R = autocorrelationAtPercentile(R, B, 50.0 - CONFIDENCE_INTERVAL / 2.0);
1480-
LOG_TRACE(" autocorrelation = " << R);
1481-
LOG_TRACE(" autocorrelationThreshold = " << Rt);
1482-
if (R > Rt)
1469+
if (df1 > 0.0)
1470+
{
1471+
double v1{varianceAtPercentile(residualVariance<double>(trend, scale), df1,
1472+
50.0 + CONFIDENCE_INTERVAL / 2.0)};
1473+
LOG_TRACE(" variance = " << v1);
1474+
LOG_TRACE(" varianceThreshold = " << vt);
1475+
LOG_TRACE(" significance = " << CStatisticalTests::leftTailFTest(v1 / v0, df1, df0));
1476+
1477+
double Rt{stats.s_Rt * CTools::truncate(1.0 - 0.5 * (vt - v1) / vt, 0.9, 1.0)};
1478+
if (v1 < vt && CStatisticalTests::leftTailFTest(v1 / v0, df1, df0) <= MAXIMUM_SIGNIFICANCE)
14831479
{
1484-
return true;
1480+
double R{CSignal::autocorrelation(period, values)};
1481+
R = autocorrelationAtPercentile(R, B, 50.0 - CONFIDENCE_INTERVAL / 2.0);
1482+
LOG_TRACE(" autocorrelation = " << R);
1483+
LOG_TRACE(" autocorrelationThreshold = " << Rt);
1484+
if (R > Rt)
1485+
{
1486+
return true;
1487+
}
14851488
}
1486-
}
14871489

1488-
// The amplitude test.
1490+
// The amplitude test.
14891491

1490-
double F1{1.0};
1491-
if (v1 > 0.0)
1492-
{
1493-
try
1492+
double F1{1.0};
1493+
if (v1 > 0.0)
14941494
{
1495-
std::size_t n{static_cast<std::size_t>(
1496-
std::ceil(Rt * static_cast<double>(length(window) / period_)))};
1497-
TMeanAccumulator level;
1498-
for (const auto &value : values)
1495+
try
14991496
{
1500-
if (CBasicStatistics::count(value) > 0.0)
1497+
std::size_t n{static_cast<std::size_t>(
1498+
std::ceil(Rt * static_cast<double>(length(window) / period_)))};
1499+
TMeanAccumulator level;
1500+
for (const auto &value : values)
15011501
{
1502-
level.add(CBasicStatistics::mean(value));
1502+
if (CBasicStatistics::count(value) > 0.0)
1503+
{
1504+
level.add(CBasicStatistics::mean(value));
1505+
}
15031506
}
1504-
}
1505-
TMinAmplitudeVec amplitudes(period, {n, CBasicStatistics::mean(level)});
1506-
periodicTrend(values, window, m_BucketLength, amplitudes);
1507-
boost::math::normal normal(0.0, std::sqrt(v1));
1508-
std::for_each(amplitudes.begin(), amplitudes.end(),
1509-
[&F1, &normal, at](CMinAmplitude &x)
1510-
{
1511-
if (x.amplitude() >= at)
1507+
TMinAmplitudeVec amplitudes(period, {n, CBasicStatistics::mean(level)});
1508+
periodicTrend(values, window, m_BucketLength, amplitudes);
1509+
boost::math::normal normal(0.0, std::sqrt(v1));
1510+
std::for_each(amplitudes.begin(), amplitudes.end(),
1511+
[&F1, &normal, at](CMinAmplitude &x)
15121512
{
1513-
F1 = std::min(F1, x.significance(normal));
1514-
}
1515-
});
1513+
if (x.amplitude() >= at)
1514+
{
1515+
F1 = std::min(F1, x.significance(normal));
1516+
}
1517+
});
1518+
}
1519+
catch (const std::exception &e)
1520+
{
1521+
LOG_ERROR("Unable to compute significance of amplitude: " << e.what());
1522+
}
15161523
}
1517-
catch (const std::exception &e)
1524+
LOG_TRACE(" F(amplitude) = " << F1);
1525+
1526+
if (1.0 - std::pow(1.0 - F1, b) <= MAXIMUM_SIGNIFICANCE)
15181527
{
1519-
LOG_ERROR("Unable to compute significance of amplitude: " << e.what());
1528+
return true;
15201529
}
15211530
}
1522-
LOG_TRACE(" F(amplitude) = " << F1);
1523-
1524-
if (1.0 - std::pow(1.0 - F1, b) <= MAXIMUM_SIGNIFICANCE)
1525-
{
1526-
return true;
1527-
}
15281531
return false;
15291532
}
15301533

@@ -1618,7 +1621,7 @@ bool CPeriodicityHypothesisTests::testPartition(const TTimeTimePr2Vec &partition
16181621
{
16191622
for (std::size_t i = 0u; i < 2; ++i)
16201623
{
1621-
for (auto &&delta : deltas[i])
1624+
for (auto &delta : deltas[i])
16221625
{
16231626
delta = (delta + m_BucketLength) % windowLength;
16241627
}
@@ -1683,39 +1686,42 @@ bool CPeriodicityHypothesisTests::testPartition(const TTimeTimePr2Vec &partition
16831686
}
16841687

16851688
double df1{B - b};
1686-
double variance{correction * minimum[0].first};
1687-
double v1{varianceAtPercentile(variance, df1, 50.0 + CONFIDENCE_INTERVAL / 2.0)};
1688-
LOG_TRACE(" variance = " << v1);
1689-
LOG_TRACE(" varianceThreshold = " << vt);
1690-
LOG_TRACE(" significance = " << CStatisticalTests::leftTailFTest(v1 / v0, df1, df0));
1691-
1692-
if (v1 <= vt && CStatisticalTests::leftTailFTest(v1 / v0, df1, df0) <= MAXIMUM_SIGNIFICANCE)
1689+
if (df1 > 0.0)
16931690
{
1694-
double R{-1.0};
1695-
double Rt{stats.s_Rt * CTools::truncate(1.0 - 0.5 * (vt - v1) / vt, 0.9, 1.0)};
1691+
double variance{correction * minimum[0].first};
1692+
double v1{varianceAtPercentile(variance, df1, 50.0 + CONFIDENCE_INTERVAL / 2.0)};
1693+
LOG_TRACE(" variance = " << v1);
1694+
LOG_TRACE(" varianceThreshold = " << vt);
1695+
LOG_TRACE(" significance = " << CStatisticalTests::leftTailFTest(v1 / v0, df1, df0));
16961696

1697-
startOfPartition = best[0].second;
1698-
windows[0] = calculateWindows(startOfPartition, windowLength, repeat, partition[0]);
1699-
windows[1] = calculateWindows(startOfPartition, windowLength, repeat, partition[1]);
1700-
for (const auto &windows_ : windows)
1697+
if (v1 <= vt && CStatisticalTests::leftTailFTest(v1 / v0, df1, df0) <= MAXIMUM_SIGNIFICANCE)
17011698
{
1702-
TFloatMeanAccumulatorVec partitionValues;
1703-
project(values, windows_, m_BucketLength, partitionValues);
1704-
std::size_t windowLength_(length(windows_[0]) / m_BucketLength);
1705-
double BW{std::accumulate(partitionValues.begin(), partitionValues.end(), 0.0,
1706-
[](double n, const TFloatMeanAccumulator &value)
1707-
{ return n + (CBasicStatistics::count(value) > 0.0 ? 1.0 : 0.0); })};
1708-
R = std::max(R, autocorrelationAtPercentile(CSignal::autocorrelation(
1709-
windowLength_ + period, partitionValues),
1710-
BW, 50.0 - CONFIDENCE_INTERVAL / 2.0));
1711-
LOG_TRACE(" autocorrelation = " << R);
1712-
LOG_TRACE(" autocorrelationThreshold = " << Rt);
1713-
}
1699+
double R{-1.0};
1700+
double Rt{stats.s_Rt * CTools::truncate(1.0 - 0.5 * (vt - v1) / vt, 0.9, 1.0)};
17141701

1715-
if (R > Rt)
1716-
{
1717-
stats.s_StartOfPartition = startOfPartition;
1718-
return true;
1702+
startOfPartition = best[0].second;
1703+
windows[0] = calculateWindows(startOfPartition, windowLength, repeat, partition[0]);
1704+
windows[1] = calculateWindows(startOfPartition, windowLength, repeat, partition[1]);
1705+
for (const auto &windows_ : windows)
1706+
{
1707+
TFloatMeanAccumulatorVec partitionValues;
1708+
project(values, windows_, m_BucketLength, partitionValues);
1709+
std::size_t windowLength_(length(windows_[0]) / m_BucketLength);
1710+
double BW{std::accumulate(partitionValues.begin(), partitionValues.end(), 0.0,
1711+
[](double n, const TFloatMeanAccumulator &value)
1712+
{ return n + (CBasicStatistics::count(value) > 0.0 ? 1.0 : 0.0); })};
1713+
R = std::max(R, autocorrelationAtPercentile(CSignal::autocorrelation(
1714+
windowLength_ + period, partitionValues),
1715+
BW, 50.0 - CONFIDENCE_INTERVAL / 2.0));
1716+
LOG_TRACE(" autocorrelation = " << R);
1717+
LOG_TRACE(" autocorrelationThreshold = " << Rt);
1718+
}
1719+
1720+
if (R > Rt)
1721+
{
1722+
stats.s_StartOfPartition = startOfPartition;
1723+
return true;
1724+
}
17191725
}
17201726
}
17211727
return false;

0 commit comments

Comments
 (0)