Skip to content

Commit 65f6e5b

Browse files
committed
Guard against the case there are insufficient populated values to estimate variance in tests for periodic components (#13)
This was triggering a boost exception to be thrown and spamming the logs. In all such cases, we have too little data to correctly determine if such a periodic component is present and so can correctly exit early.
1 parent 3f06de9 commit 65f6e5b

File tree

1 file changed

+83
-77
lines changed

1 file changed

+83
-77
lines changed

lib/maths/CPeriodicityHypothesisTests.cc

Lines changed: 83 additions & 77 deletions
Original file line numberDiff line numberDiff line change
@@ -1301,7 +1301,7 @@ bool CPeriodicityHypothesisTests::testStatisticsFor(const TFloatMeanAccumulatorC
13011301
return false;
13021302
}
13031303

1304-
LOG_TRACE("populated = " << 100.0 * populated << "%");
1304+
LOG_TRACE("populated = " << 100.0 * populated / static_cast<double>(buckets.size()) << "%");
13051305

13061306
stats.s_Range = range.max() - range.min();
13071307
stats.s_B = populated;
@@ -1475,65 +1475,68 @@ bool CPeriodicityHypothesisTests::testPeriod(const TTimeTimePr2Vec &windows,
14751475
LOG_TRACE(" populated = " << b);
14761476

14771477
double df1{B - b};
1478-
double v1{varianceAtPercentile(residualVariance<double>(trend, scale), df1,
1479-
50.0 + CONFIDENCE_INTERVAL / 2.0)};
1480-
LOG_TRACE(" variance = " << v1);
1481-
LOG_TRACE(" varianceThreshold = " << vt);
1482-
LOG_TRACE(" significance = " << CStatisticalTests::leftTailFTest(v1 / v0, df1, df0));
1483-
1484-
double Rt{stats.s_Rt * CTools::truncate(1.0 - 0.5 * (vt - v1) / vt, 0.9, 1.0)};
1485-
if (v1 < vt && CStatisticalTests::leftTailFTest(v1 / v0, df1, df0) <= MAXIMUM_SIGNIFICANCE)
1486-
{
1487-
double R{CSignal::autocorrelation(period, values)};
1488-
R = autocorrelationAtPercentile(R, B, 50.0 - CONFIDENCE_INTERVAL / 2.0);
1489-
LOG_TRACE(" autocorrelation = " << R);
1490-
LOG_TRACE(" autocorrelationThreshold = " << Rt);
1491-
if (R > Rt)
1478+
if (df1 > 0.0)
1479+
{
1480+
double v1{varianceAtPercentile(residualVariance<double>(trend, scale), df1,
1481+
50.0 + CONFIDENCE_INTERVAL / 2.0)};
1482+
LOG_TRACE(" variance = " << v1);
1483+
LOG_TRACE(" varianceThreshold = " << vt);
1484+
LOG_TRACE(" significance = " << CStatisticalTests::leftTailFTest(v1 / v0, df1, df0));
1485+
1486+
double Rt{stats.s_Rt * CTools::truncate(1.0 - 0.5 * (vt - v1) / vt, 0.9, 1.0)};
1487+
if (v1 < vt && CStatisticalTests::leftTailFTest(v1 / v0, df1, df0) <= MAXIMUM_SIGNIFICANCE)
14921488
{
1493-
return true;
1489+
double R{CSignal::autocorrelation(period, values)};
1490+
R = autocorrelationAtPercentile(R, B, 50.0 - CONFIDENCE_INTERVAL / 2.0);
1491+
LOG_TRACE(" autocorrelation = " << R);
1492+
LOG_TRACE(" autocorrelationThreshold = " << Rt);
1493+
if (R > Rt)
1494+
{
1495+
return true;
1496+
}
14941497
}
1495-
}
14961498

1497-
// The amplitude test.
1499+
// The amplitude test.
14981500

1499-
double F1{1.0};
1500-
if (v1 > 0.0)
1501-
{
1502-
try
1501+
double F1{1.0};
1502+
if (v1 > 0.0)
15031503
{
1504-
std::size_t n{static_cast<std::size_t>(
1505-
std::ceil(Rt * static_cast<double>(length(window) / period_)))};
1506-
TMeanAccumulator level;
1507-
for (const auto &value : values)
1504+
try
15081505
{
1509-
if (CBasicStatistics::count(value) > 0.0)
1506+
std::size_t n{static_cast<std::size_t>(
1507+
std::ceil(Rt * static_cast<double>(length(window) / period_)))};
1508+
TMeanAccumulator level;
1509+
for (const auto &value : values)
15101510
{
1511-
level.add(CBasicStatistics::mean(value));
1511+
if (CBasicStatistics::count(value) > 0.0)
1512+
{
1513+
level.add(CBasicStatistics::mean(value));
1514+
}
15121515
}
1513-
}
1514-
TMinAmplitudeVec amplitudes(period, {n, CBasicStatistics::mean(level)});
1515-
periodicTrend(values, window, m_BucketLength, amplitudes);
1516-
boost::math::normal normal(0.0, std::sqrt(v1));
1517-
std::for_each(amplitudes.begin(), amplitudes.end(),
1518-
[&F1, &normal, at](CMinAmplitude &x)
1519-
{
1520-
if (x.amplitude() >= at)
1516+
TMinAmplitudeVec amplitudes(period, {n, CBasicStatistics::mean(level)});
1517+
periodicTrend(values, window, m_BucketLength, amplitudes);
1518+
boost::math::normal normal(0.0, std::sqrt(v1));
1519+
std::for_each(amplitudes.begin(), amplitudes.end(),
1520+
[&F1, &normal, at](CMinAmplitude &x)
15211521
{
1522-
F1 = std::min(F1, x.significance(normal));
1523-
}
1524-
});
1522+
if (x.amplitude() >= at)
1523+
{
1524+
F1 = std::min(F1, x.significance(normal));
1525+
}
1526+
});
1527+
}
1528+
catch (const std::exception &e)
1529+
{
1530+
LOG_ERROR("Unable to compute significance of amplitude: " << e.what());
1531+
}
15251532
}
1526-
catch (const std::exception &e)
1533+
LOG_TRACE(" F(amplitude) = " << F1);
1534+
1535+
if (1.0 - std::pow(1.0 - F1, b) <= MAXIMUM_SIGNIFICANCE)
15271536
{
1528-
LOG_ERROR("Unable to compute significance of amplitude: " << e.what());
1537+
return true;
15291538
}
15301539
}
1531-
LOG_TRACE(" F(amplitude) = " << F1);
1532-
1533-
if (1.0 - std::pow(1.0 - F1, b) <= MAXIMUM_SIGNIFICANCE)
1534-
{
1535-
return true;
1536-
}
15371540
return false;
15381541
}
15391542

@@ -1627,7 +1630,7 @@ bool CPeriodicityHypothesisTests::testPartition(const TTimeTimePr2Vec &partition
16271630
{
16281631
for (std::size_t i = 0u; i < 2; ++i)
16291632
{
1630-
for (auto &&delta : deltas[i])
1633+
for (auto &delta : deltas[i])
16311634
{
16321635
delta = (delta + m_BucketLength) % windowLength;
16331636
}
@@ -1692,39 +1695,42 @@ bool CPeriodicityHypothesisTests::testPartition(const TTimeTimePr2Vec &partition
16921695
}
16931696

16941697
double df1{B - b};
1695-
double variance{correction * minimum[0].first};
1696-
double v1{varianceAtPercentile(variance, df1, 50.0 + CONFIDENCE_INTERVAL / 2.0)};
1697-
LOG_TRACE(" variance = " << v1);
1698-
LOG_TRACE(" varianceThreshold = " << vt);
1699-
LOG_TRACE(" significance = " << CStatisticalTests::leftTailFTest(v1 / v0, df1, df0));
1700-
1701-
if (v1 <= vt && CStatisticalTests::leftTailFTest(v1 / v0, df1, df0) <= MAXIMUM_SIGNIFICANCE)
1698+
if (df1 > 0.0)
17021699
{
1703-
double R{-1.0};
1704-
double Rt{stats.s_Rt * CTools::truncate(1.0 - 0.5 * (vt - v1) / vt, 0.9, 1.0)};
1700+
double variance{correction * minimum[0].first};
1701+
double v1{varianceAtPercentile(variance, df1, 50.0 + CONFIDENCE_INTERVAL / 2.0)};
1702+
LOG_TRACE(" variance = " << v1);
1703+
LOG_TRACE(" varianceThreshold = " << vt);
1704+
LOG_TRACE(" significance = " << CStatisticalTests::leftTailFTest(v1 / v0, df1, df0));
17051705

1706-
startOfPartition = best[0].second;
1707-
windows[0] = calculateWindows(startOfPartition, windowLength, repeat, partition[0]);
1708-
windows[1] = calculateWindows(startOfPartition, windowLength, repeat, partition[1]);
1709-
for (const auto &windows_ : windows)
1706+
if (v1 <= vt && CStatisticalTests::leftTailFTest(v1 / v0, df1, df0) <= MAXIMUM_SIGNIFICANCE)
17101707
{
1711-
TFloatMeanAccumulatorVec partitionValues;
1712-
project(values, windows_, m_BucketLength, partitionValues);
1713-
std::size_t windowLength_(length(windows_[0]) / m_BucketLength);
1714-
double BW{std::accumulate(partitionValues.begin(), partitionValues.end(), 0.0,
1715-
[](double n, const TFloatMeanAccumulator &value)
1716-
{ return n + (CBasicStatistics::count(value) > 0.0 ? 1.0 : 0.0); })};
1717-
R = std::max(R, autocorrelationAtPercentile(CSignal::autocorrelation(
1718-
windowLength_ + period, partitionValues),
1719-
BW, 50.0 - CONFIDENCE_INTERVAL / 2.0));
1720-
LOG_TRACE(" autocorrelation = " << R);
1721-
LOG_TRACE(" autocorrelationThreshold = " << Rt);
1722-
}
1708+
double R{-1.0};
1709+
double Rt{stats.s_Rt * CTools::truncate(1.0 - 0.5 * (vt - v1) / vt, 0.9, 1.0)};
17231710

1724-
if (R > Rt)
1725-
{
1726-
stats.s_StartOfPartition = startOfPartition;
1727-
return true;
1711+
startOfPartition = best[0].second;
1712+
windows[0] = calculateWindows(startOfPartition, windowLength, repeat, partition[0]);
1713+
windows[1] = calculateWindows(startOfPartition, windowLength, repeat, partition[1]);
1714+
for (const auto &windows_ : windows)
1715+
{
1716+
TFloatMeanAccumulatorVec partitionValues;
1717+
project(values, windows_, m_BucketLength, partitionValues);
1718+
std::size_t windowLength_(length(windows_[0]) / m_BucketLength);
1719+
double BW{std::accumulate(partitionValues.begin(), partitionValues.end(), 0.0,
1720+
[](double n, const TFloatMeanAccumulator &value)
1721+
{ return n + (CBasicStatistics::count(value) > 0.0 ? 1.0 : 0.0); })};
1722+
R = std::max(R, autocorrelationAtPercentile(CSignal::autocorrelation(
1723+
windowLength_ + period, partitionValues),
1724+
BW, 50.0 - CONFIDENCE_INTERVAL / 2.0));
1725+
LOG_TRACE(" autocorrelation = " << R);
1726+
LOG_TRACE(" autocorrelationThreshold = " << Rt);
1727+
}
1728+
1729+
if (R > Rt)
1730+
{
1731+
stats.s_StartOfPartition = startOfPartition;
1732+
return true;
1733+
}
17281734
}
17291735
}
17301736
return false;

0 commit comments

Comments
 (0)