@@ -1039,18 +1039,18 @@ CPeriodicityHypothesisTests::best(const TNestedHypothesesVec &hypotheses) const
1039
1039
{
1040
1040
STestStats stats;
1041
1041
CPeriodicityHypothesisTestsResult resultForHypothesis{hypothesis.test (stats)};
1042
- summaries.emplace_back (stats.s_V0 , stats.s_B - stats.s_DF0 ,
1043
- std::move (resultForHypothesis));
1042
+ if (stats.s_B > stats.s_DF0 )
1043
+ {
1044
+ summaries.emplace_back (stats.s_V0 , stats.s_B - stats.s_DF0 ,
1045
+ std::move (resultForHypothesis));
1046
+ }
1044
1047
}
1045
1048
1046
1049
TMinAccumulator vCutoff;
1047
1050
for (const auto &summary : summaries)
1048
1051
{
1049
- if (summary.s_DF > 0.0 )
1050
- {
1051
- vCutoff.add (varianceAtPercentile (summary.s_V , summary.s_DF ,
1052
- 50.0 + CONFIDENCE_INTERVAL / 2.0 ));
1053
- }
1052
+ vCutoff.add (varianceAtPercentile (summary.s_V , summary.s_DF ,
1053
+ 50.0 + CONFIDENCE_INTERVAL / 2.0 ));
1054
1054
}
1055
1055
if (vCutoff.count () > 0 )
1056
1056
{
@@ -1284,6 +1284,26 @@ bool CPeriodicityHypothesisTests::seenSufficientDataToTest(core_t::TTime period,
1284
1284
>= 2.0 * ACCURATE_TEST_POPULATED_FRACTION * static_cast <double >(period);
1285
1285
}
1286
1286
1287
+ bool CPeriodicityHypothesisTests::seenSufficientPeriodicallyPopulatedBucketsToTest (const TFloatMeanAccumulatorCRng &buckets,
1288
+ std::size_t period) const
1289
+ {
1290
+ double repeats{0.0 };
1291
+ for (std::size_t i = 0u ; i < period; ++i)
1292
+ {
1293
+ for (std::size_t j = i + period; j < buckets.size (); j += period)
1294
+ {
1295
+ if ( CBasicStatistics::count (buckets[j])
1296
+ * CBasicStatistics::count (buckets[j - period]) > 0.0 )
1297
+ {
1298
+ repeats += 1.0 ;
1299
+ break ;
1300
+ }
1301
+ }
1302
+ }
1303
+ LOG_TRACE (" repeated values = " << repeats);
1304
+ return repeats >= static_cast <double >(period) * ACCURATE_TEST_POPULATED_FRACTION / 3.0 ;
1305
+ }
1306
+
1287
1307
bool CPeriodicityHypothesisTests::testStatisticsFor (const TFloatMeanAccumulatorCRng &buckets,
1288
1308
STestStats &stats) const
1289
1309
{
@@ -1433,21 +1453,7 @@ bool CPeriodicityHypothesisTests::testPeriod(const TTimeTimePr2Vec &windows,
1433
1453
1434
1454
// We need to observe a minimum number of repeated values to test with
1435
1455
// an acceptable false positive rate.
1436
- double repeats{0.0 };
1437
- for (std::size_t i = 0u ; i < period; ++i)
1438
- {
1439
- for (std::size_t j = i + period; j < buckets.size (); j += period)
1440
- {
1441
- if ( CBasicStatistics::count (buckets[j])
1442
- * CBasicStatistics::count (buckets[j - period]) > 0.0 )
1443
- {
1444
- repeats += 1.0 ;
1445
- break ;
1446
- }
1447
- }
1448
- }
1449
- LOG_TRACE (" repeated values = " << repeats);
1450
- if (repeats < static_cast <double >(period) * ACCURATE_TEST_POPULATED_FRACTION / 3.0 )
1456
+ if (!this ->seenSufficientPeriodicallyPopulatedBucketsToTest (buckets, period))
1451
1457
{
1452
1458
return false ;
1453
1459
}
@@ -1493,7 +1499,8 @@ bool CPeriodicityHypothesisTests::testPeriod(const TTimeTimePr2Vec &windows,
1493
1499
LOG_TRACE (" significance = " << CStatisticalTests::leftTailFTest (v1 / v0, df1, df0));
1494
1500
1495
1501
double Rt{stats.s_Rt * CTools::truncate (1.0 - 0.5 * (vt - v1) / vt, 0.9 , 1.0 )};
1496
- if (v1 < vt && CStatisticalTests::leftTailFTest (v1 / v0, df1, df0) <= MAXIMUM_SIGNIFICANCE)
1502
+ if ( v1 < vt && B > 1.0
1503
+ && CStatisticalTests::leftTailFTest (v1 / v0, df1, df0) <= MAXIMUM_SIGNIFICANCE)
1497
1504
{
1498
1505
double R{CSignal::autocorrelation (period, values)};
1499
1506
R = autocorrelationAtPercentile (R, B, 50.0 - CONFIDENCE_INTERVAL / 2.0 );
@@ -1567,6 +1574,15 @@ bool CPeriodicityHypothesisTests::testPartition(const TTimeTimePr2Vec &partition
1567
1574
{
1568
1575
return false ;
1569
1576
}
1577
+
1578
+ std::size_t period{static_cast <std::size_t >(period_ / m_BucketLength)};
1579
+
1580
+ // We need to observe a minimum number of repeated values to test with
1581
+ // an acceptable false positive rate.
1582
+ if (!this ->seenSufficientPeriodicallyPopulatedBucketsToTest (buckets, period))
1583
+ {
1584
+ return false ;
1585
+ }
1570
1586
if (stats.s_HasPartition )
1571
1587
{
1572
1588
return true ;
@@ -1577,7 +1593,6 @@ bool CPeriodicityHypothesisTests::testPartition(const TTimeTimePr2Vec &partition
1577
1593
// evidence that it reduces the residual variance and repeats.
1578
1594
1579
1595
core_t ::TTime windowLength{length (buckets, m_BucketLength)};
1580
- std::size_t period{static_cast <std::size_t >(period_ / m_BucketLength)};
1581
1596
core_t ::TTime repeat{length (partition)};
1582
1597
core_t ::TTime startOfPartition{stats.s_StartOfPartition };
1583
1598
double B{stats.s_B };
@@ -1732,11 +1747,13 @@ bool CPeriodicityHypothesisTests::testPartition(const TTimeTimePr2Vec &partition
1732
1747
double BW{std::accumulate (partitionValues.begin (), partitionValues.end (), 0.0 ,
1733
1748
[](double n, const TFloatMeanAccumulator &value)
1734
1749
{ return n + (CBasicStatistics::count (value) > 0.0 ? 1.0 : 0.0 ); })};
1735
- R = std::max (R, autocorrelationAtPercentile (CSignal::autocorrelation (
1736
- windowLength_ + period, partitionValues),
1737
- BW, 50.0 - CONFIDENCE_INTERVAL / 2.0 ));
1738
- LOG_TRACE (" autocorrelation = " << R);
1739
- LOG_TRACE (" autocorrelationThreshold = " << Rt);
1750
+ if (BW > 1.0 )
1751
+ {
1752
+ double RW{CSignal::autocorrelation (windowLength_ + period, partitionValues)};
1753
+ R = std::max (R, autocorrelationAtPercentile (RW, BW, 50.0 - CONFIDENCE_INTERVAL / 2.0 ));
1754
+ LOG_TRACE (" autocorrelation = " << R);
1755
+ LOG_TRACE (" autocorrelationThreshold = " << Rt);
1756
+ }
1740
1757
}
1741
1758
1742
1759
if (R > Rt)
0 commit comments