@@ -1030,18 +1030,18 @@ CPeriodicityHypothesisTests::best(const TNestedHypothesesVec &hypotheses) const
1030
1030
{
1031
1031
STestStats stats;
1032
1032
CPeriodicityHypothesisTestsResult resultForHypothesis{hypothesis.test (stats)};
1033
- summaries.emplace_back (stats.s_V0 , stats.s_B - stats.s_DF0 ,
1034
- std::move (resultForHypothesis));
1033
+ if (stats.s_B > stats.s_DF0 )
1034
+ {
1035
+ summaries.emplace_back (stats.s_V0 , stats.s_B - stats.s_DF0 ,
1036
+ std::move (resultForHypothesis));
1037
+ }
1035
1038
}
1036
1039
1037
1040
TMinAccumulator vCutoff;
1038
1041
for (const auto &summary : summaries)
1039
1042
{
1040
- if (summary.s_DF > 0.0 )
1041
- {
1042
- vCutoff.add (varianceAtPercentile (summary.s_V , summary.s_DF ,
1043
- 50.0 + CONFIDENCE_INTERVAL / 2.0 ));
1044
- }
1043
+ vCutoff.add (varianceAtPercentile (summary.s_V , summary.s_DF ,
1044
+ 50.0 + CONFIDENCE_INTERVAL / 2.0 ));
1045
1045
}
1046
1046
if (vCutoff.count () > 0 )
1047
1047
{
@@ -1275,6 +1275,26 @@ bool CPeriodicityHypothesisTests::seenSufficientDataToTest(core_t::TTime period,
1275
1275
>= 2.0 * ACCURATE_TEST_POPULATED_FRACTION * static_cast <double >(period);
1276
1276
}
1277
1277
1278
+ bool CPeriodicityHypothesisTests::seenSufficientPeriodicallyPopulatedBucketsToTest (const TFloatMeanAccumulatorCRng &buckets,
1279
+ std::size_t period) const
1280
+ {
1281
+ double repeats{0.0 };
1282
+ for (std::size_t i = 0u ; i < period; ++i)
1283
+ {
1284
+ for (std::size_t j = i + period; j < buckets.size (); j += period)
1285
+ {
1286
+ if ( CBasicStatistics::count (buckets[j])
1287
+ * CBasicStatistics::count (buckets[j - period]) > 0.0 )
1288
+ {
1289
+ repeats += 1.0 ;
1290
+ break ;
1291
+ }
1292
+ }
1293
+ }
1294
+ LOG_TRACE (" repeated values = " << repeats);
1295
+ return repeats >= static_cast <double >(period) * ACCURATE_TEST_POPULATED_FRACTION / 3.0 ;
1296
+ }
1297
+
1278
1298
bool CPeriodicityHypothesisTests::testStatisticsFor (const TFloatMeanAccumulatorCRng &buckets,
1279
1299
STestStats &stats) const
1280
1300
{
@@ -1424,21 +1444,7 @@ bool CPeriodicityHypothesisTests::testPeriod(const TTimeTimePr2Vec &windows,
1424
1444
1425
1445
// We need to observe a minimum number of repeated values to test with
1426
1446
// an acceptable false positive rate.
1427
- double repeats{0.0 };
1428
- for (std::size_t i = 0u ; i < period; ++i)
1429
- {
1430
- for (std::size_t j = i + period; j < buckets.size (); j += period)
1431
- {
1432
- if ( CBasicStatistics::count (buckets[j])
1433
- * CBasicStatistics::count (buckets[j - period]) > 0.0 )
1434
- {
1435
- repeats += 1.0 ;
1436
- break ;
1437
- }
1438
- }
1439
- }
1440
- LOG_TRACE (" repeated values = " << repeats);
1441
- if (repeats < static_cast <double >(period) * ACCURATE_TEST_POPULATED_FRACTION / 3.0 )
1447
+ if (!this ->seenSufficientPeriodicallyPopulatedBucketsToTest (buckets, period))
1442
1448
{
1443
1449
return false ;
1444
1450
}
@@ -1484,7 +1490,8 @@ bool CPeriodicityHypothesisTests::testPeriod(const TTimeTimePr2Vec &windows,
1484
1490
LOG_TRACE (" significance = " << CStatisticalTests::leftTailFTest (v1 / v0, df1, df0));
1485
1491
1486
1492
double Rt{stats.s_Rt * CTools::truncate (1.0 - 0.5 * (vt - v1) / vt, 0.9 , 1.0 )};
1487
- if (v1 < vt && CStatisticalTests::leftTailFTest (v1 / v0, df1, df0) <= MAXIMUM_SIGNIFICANCE)
1493
+ if ( v1 < vt && B > 1.0
1494
+ && CStatisticalTests::leftTailFTest (v1 / v0, df1, df0) <= MAXIMUM_SIGNIFICANCE)
1488
1495
{
1489
1496
double R{CSignal::autocorrelation (period, values)};
1490
1497
R = autocorrelationAtPercentile (R, B, 50.0 - CONFIDENCE_INTERVAL / 2.0 );
@@ -1558,6 +1565,15 @@ bool CPeriodicityHypothesisTests::testPartition(const TTimeTimePr2Vec &partition
1558
1565
{
1559
1566
return false ;
1560
1567
}
1568
+
1569
+ std::size_t period{static_cast <std::size_t >(period_ / m_BucketLength)};
1570
+
1571
+ // We need to observe a minimum number of repeated values to test with
1572
+ // an acceptable false positive rate.
1573
+ if (!this ->seenSufficientPeriodicallyPopulatedBucketsToTest (buckets, period))
1574
+ {
1575
+ return false ;
1576
+ }
1561
1577
if (stats.s_HasPartition )
1562
1578
{
1563
1579
return true ;
@@ -1568,7 +1584,6 @@ bool CPeriodicityHypothesisTests::testPartition(const TTimeTimePr2Vec &partition
1568
1584
// evidence that it reduces the residual variance and repeats.
1569
1585
1570
1586
core_t ::TTime windowLength{length (buckets, m_BucketLength)};
1571
- std::size_t period{static_cast <std::size_t >(period_ / m_BucketLength)};
1572
1587
core_t ::TTime repeat{length (partition)};
1573
1588
core_t ::TTime startOfPartition{stats.s_StartOfPartition };
1574
1589
double B{stats.s_B };
@@ -1723,11 +1738,13 @@ bool CPeriodicityHypothesisTests::testPartition(const TTimeTimePr2Vec &partition
1723
1738
double BW{std::accumulate (partitionValues.begin (), partitionValues.end (), 0.0 ,
1724
1739
[](double n, const TFloatMeanAccumulator &value)
1725
1740
{ return n + (CBasicStatistics::count (value) > 0.0 ? 1.0 : 0.0 ); })};
1726
- R = std::max (R, autocorrelationAtPercentile (CSignal::autocorrelation (
1727
- windowLength_ + period, partitionValues),
1728
- BW, 50.0 - CONFIDENCE_INTERVAL / 2.0 ));
1729
- LOG_TRACE (" autocorrelation = " << R);
1730
- LOG_TRACE (" autocorrelationThreshold = " << Rt);
1741
+ if (BW > 1.0 )
1742
+ {
1743
+ double RW{CSignal::autocorrelation (windowLength_ + period, partitionValues)};
1744
+ R = std::max (R, autocorrelationAtPercentile (RW, BW, 50.0 - CONFIDENCE_INTERVAL / 2.0 ));
1745
+ LOG_TRACE (" autocorrelation = " << R);
1746
+ LOG_TRACE (" autocorrelationThreshold = " << Rt);
1747
+ }
1731
1748
}
1732
1749
1733
1750
if (R > Rt)
0 commit comments