@@ -331,7 +331,8 @@ TSizeVec calculateRepeats(const TTimeTimePr2Vec& windows_,
331
331
// !
332
332
// ! These are defined as some fraction of the values which are most
333
333
// ! different from the periodic trend on the time windows \p windows_.
334
- void reweightOutliers (const TMeanVarAccumulatorVec& trend,
334
+ template <typename T>
335
+ void reweightOutliers (const std::vector<T>& trend,
335
336
const TTimeTimePr2Vec& windows_,
336
337
core_t ::TTime bucketLength,
337
338
TFloatMeanAccumulatorVec& values) {
@@ -361,10 +362,10 @@ void reweightOutliers(const TMeanVarAccumulatorVec& trend,
361
362
std::size_t b{window.second };
362
363
for (std::size_t j = a; j < b; ++j) {
363
364
const TFloatMeanAccumulator& value{values[j % n]};
364
- std::size_t offset{(j - a) % period};
365
- double difference{std::fabs (CBasicStatistics::mean (value) -
366
- CBasicStatistics::mean (trend[offset]))};
367
365
if (CBasicStatistics::count (value) > 0.0 ) {
366
+ std::size_t offset{(j - a) % period};
367
+ double difference{std::fabs (CBasicStatistics::mean (value) -
368
+ CBasicStatistics::mean (trend[offset]))};
368
369
outliers.add ({difference, j});
369
370
meanDifference.add (difference);
370
371
}
@@ -390,11 +391,11 @@ void reweightOutliers(const TMeanVarAccumulatorVec& trend,
390
391
}
391
392
392
393
// ! Compute the periodic trend from \p values falling in \p windows.
393
- template <typename U , typename V >
394
- void periodicTrend (const U & values,
394
+ template <typename T , typename CONTAINER >
395
+ void periodicTrend (const std::vector<T> & values,
395
396
const TSizeSizePr2Vec& windows_,
396
397
core_t ::TTime bucketLength,
397
- V & trend) {
398
+ CONTAINER & trend) {
398
399
if (!trend.empty ()) {
399
400
TSizeSizePr2Vec windows;
400
401
calculateIndexWindows (windows_, bucketLength, windows);
@@ -413,11 +414,11 @@ void periodicTrend(const U& values,
413
414
}
414
415
415
416
// ! Compute the periodic trend on \p values minus outliers.
416
- template <typename U , typename V >
417
- void periodicTrendMinusOutliers (U & values,
417
+ template <typename T , typename CONTAINER >
418
+ void periodicTrendMinusOutliers (std::vector<T> & values,
418
419
const TSizeSizePr2Vec& windows,
419
420
core_t ::TTime bucketLength,
420
- V & trend) {
421
+ CONTAINER & trend) {
421
422
periodicTrend (values, windows, bucketLength, trend);
422
423
reweightOutliers (trend, windows, bucketLength, values);
423
424
trend.assign (trend.size (), TMeanVarAccumulator{});
@@ -469,8 +470,8 @@ struct SResidualVarianceImpl<TMeanAccumulator> {
469
470
};
470
471
471
472
// ! Compute the residual variance of the trend \p trend.
472
- template <typename R, typename T >
473
- R residualVariance (const T & trend, double scale) {
473
+ template <typename R, typename CONTAINER >
474
+ R residualVariance (const CONTAINER & trend, double scale) {
474
475
TMeanAccumulator result;
475
476
for (const auto & bucket : trend) {
476
477
result.add (CBasicStatistics::maximumLikelihoodVariance (bucket),
@@ -1337,8 +1338,7 @@ void CPeriodicityHypothesisTests::hypothesis(const TTime2Vec& periods,
1337
1338
}
1338
1339
}
1339
1340
1340
- void CPeriodicityHypothesisTests::conditionOnHypothesis (const TTimeTimePr2Vec& windows,
1341
- const STestStats& stats,
1341
+ void CPeriodicityHypothesisTests::conditionOnHypothesis (const STestStats& stats,
1342
1342
TFloatMeanAccumulatorVec& buckets) const {
1343
1343
std::size_t n{buckets.size ()};
1344
1344
core_t ::TTime windowLength{static_cast <core_t ::TTime>(n) * m_BucketLength};
@@ -1361,14 +1361,6 @@ void CPeriodicityHypothesisTests::conditionOnHypothesis(const TTimeTimePr2Vec& w
1361
1361
}
1362
1362
}
1363
1363
}
1364
-
1365
- if (length (windows) < windowLength) {
1366
- LOG_TRACE (<< " Projecting onto " << core::CContainerPrinter::print (windows));
1367
- TFloatMeanAccumulatorVec projection;
1368
- project (buckets, windows, m_BucketLength, projection);
1369
- buckets = std::move (projection);
1370
- LOG_TRACE (<< " # values = " << buckets.size ());
1371
- }
1372
1364
}
1373
1365
1374
1366
bool CPeriodicityHypothesisTests::testPeriod (const TTimeTimePr2Vec& windows,
@@ -1401,11 +1393,22 @@ bool CPeriodicityHypothesisTests::testPeriod(const TTimeTimePr2Vec& windows,
1401
1393
return false ;
1402
1394
}
1403
1395
1404
- double B{static_cast <double >(
1405
- std::count_if (buckets.begin (), buckets.end (),
1406
- [](const TFloatMeanAccumulator& value) {
1407
- return CBasicStatistics::count (value) > 0.0 ;
1408
- }))};
1396
+ core_t ::TTime windowLength{length (windows)};
1397
+ TTimeTimePr2Vec window{{0 , windowLength}};
1398
+ TFloatMeanAccumulatorVec values (buckets.begin (), buckets.end ());
1399
+ this ->conditionOnHypothesis (stats, values);
1400
+
1401
+ if (windowLength < length (buckets, m_BucketLength)) {
1402
+ LOG_TRACE (<< " Projecting onto " << core::CContainerPrinter::print (windows));
1403
+ TFloatMeanAccumulatorVec projection;
1404
+ project (values, windows, m_BucketLength, projection);
1405
+ values = std::move (projection);
1406
+ }
1407
+
1408
+ double B{static_cast <double >(std::count_if (
1409
+ values.begin (), values.end (), [](const TFloatMeanAccumulator& value) {
1410
+ return CBasicStatistics::count (value) > 0.0 ;
1411
+ }))};
1409
1412
double df0{B - stats.s_DF0 };
1410
1413
1411
1414
// We need fewer degrees of freedom in the null hypothesis trend model
@@ -1414,9 +1417,6 @@ bool CPeriodicityHypothesisTests::testPeriod(const TTimeTimePr2Vec& windows,
1414
1417
return false ;
1415
1418
}
1416
1419
1417
- TTimeTimePr2Vec window{{0 , length (windows)}};
1418
- TFloatMeanAccumulatorVec values (buckets.begin (), buckets.end ());
1419
- this ->conditionOnHypothesis (window, stats, values);
1420
1420
TMeanVarAccumulatorVec trend (period);
1421
1421
periodicTrend (values, window, m_BucketLength, trend);
1422
1422
@@ -1438,7 +1438,7 @@ bool CPeriodicityHypothesisTests::testPeriod(const TTimeTimePr2Vec& windows,
1438
1438
1439
1439
double v{residualVariance<double >(trend, scale)};
1440
1440
v = varianceAtPercentile (v, df1, 50.0 + CONFIDENCE_INTERVAL / 2.0 );
1441
- reweightOutliers (trend, windows , m_BucketLength, values);
1441
+ reweightOutliers (trend, window , m_BucketLength, values);
1442
1442
trend.assign (period, TMeanVarAccumulator{});
1443
1443
periodicTrend (values, window, m_BucketLength, trend);
1444
1444
@@ -1459,7 +1459,7 @@ bool CPeriodicityHypothesisTests::testPeriod(const TTimeTimePr2Vec& windows,
1459
1459
LOG_TRACE (<< " autocorrelation = " << R);
1460
1460
LOG_TRACE (<< " autocorrelationThreshold = " << Rt);
1461
1461
1462
- TSizeVec repeats{calculateRepeats (windows , period_, m_BucketLength, values)};
1462
+ TSizeVec repeats{calculateRepeats (window , period_, m_BucketLength, values)};
1463
1463
double meanRepeats{CBasicStatistics::mean (
1464
1464
std::accumulate (repeats.begin (), repeats.end (), TMeanAccumulator{},
1465
1465
[](TMeanAccumulator mean, std::size_t repeat) {
@@ -1501,7 +1501,7 @@ bool CPeriodicityHypothesisTests::testPeriod(const TTimeTimePr2Vec& windows,
1501
1501
if (v > 0.0 ) {
1502
1502
try {
1503
1503
std::size_t n{static_cast <std::size_t >(
1504
- std::ceil (Rt * static_cast <double >(length (window) / period_)))};
1504
+ std::ceil (Rt * static_cast <double >(windowLength / period_)))};
1505
1505
double at{stats.s_At * std::sqrt (v0 / scale)};
1506
1506
LOG_TRACE (<< " n = " << n << " , at = " << at << " , v = " << v);
1507
1507
TMeanAccumulator level;
@@ -1555,7 +1555,7 @@ bool CPeriodicityHypothesisTests::testPartition(const TTimeTimePr2Vec& partition
1555
1555
1556
1556
using TDoubleTimePr = std::pair<double , core_t ::TTime>;
1557
1557
using TDoubleTimePrVec = std::vector<TDoubleTimePr>;
1558
- using TMinAccumulator = CBasicStatistics::COrderStatisticsStack <TDoubleTimePr, 1 > ;
1558
+ using TMinAccumulator = CBasicStatistics::SMin <TDoubleTimePr>::TAccumulator ;
1559
1559
using TMeanVarAccumulatorBuffer = boost::circular_buffer<TMeanVarAccumulator>;
1560
1560
1561
1561
LOG_TRACE (<< " Testing partition " << core::CContainerPrinter::print (partition)
@@ -1585,10 +1585,7 @@ bool CPeriodicityHypothesisTests::testPartition(const TTimeTimePr2Vec& partition
1585
1585
// w.r.t. the period is minimized and check if there is significant
1586
1586
// evidence that it reduces the residual variance and repeats.
1587
1587
1588
- double B{static_cast <double >(std::count_if (
1589
- buckets.begin (), buckets.end (), [](const TFloatMeanAccumulator& value) {
1590
- return CBasicStatistics::count (value) > 0.0 ;
1591
- }))};
1588
+ double B{stats.s_B };
1592
1589
double df0{B - stats.s_DF0 };
1593
1590
1594
1591
// We need fewer degrees of freedom in the null hypothesis trend model
@@ -1598,10 +1595,10 @@ bool CPeriodicityHypothesisTests::testPartition(const TTimeTimePr2Vec& partition
1598
1595
}
1599
1596
1600
1597
TFloatMeanAccumulatorVec values (buckets.begin (), buckets.end ());
1601
- this ->conditionOnHypothesis ({{ 0 , windowLength}}, stats, values);
1598
+ this ->conditionOnHypothesis (stats, values);
1602
1599
{
1603
1600
TTimeTimePr2Vec window{{0 , windowLength}};
1604
- TMeanVarAccumulatorVec trend (period);
1601
+ TMeanAccumulatorVec trend (period);
1605
1602
periodicTrend (values, window, m_BucketLength, trend);
1606
1603
reweightOutliers (trend, window, m_BucketLength, values);
1607
1604
}
@@ -1670,29 +1667,25 @@ bool CPeriodicityHypothesisTests::testPartition(const TTimeTimePr2Vec& partition
1670
1667
double b{0.0 };
1671
1668
TMinAccumulator best;
1672
1669
1673
- TTimeTimePr2Vec candidateWindows;
1674
1670
for (const auto & candidate : candidates) {
1675
1671
if (candidate.first <= 1.05 * minimum[0 ].first ) {
1676
- core_t ::TTime candidateStartOfPartition{candidate.second };
1677
- candidateWindows = calculateWindows (candidateStartOfPartition,
1678
- windowLength, repeat, partition[0 ]);
1672
+ startOfPartition = candidate.second ;
1679
1673
TMeanAccumulator cost;
1680
- for (const auto & window : candidateWindows) {
1674
+ for (const auto & window : calculateWindows (startOfPartition, windowLength,
1675
+ repeat, partition[0 ])) {
1681
1676
core_t ::TTime a_{window.first / m_BucketLength};
1682
1677
core_t ::TTime b_{window.second / m_BucketLength - 1 };
1683
1678
double va{CBasicStatistics::mean (values[a_ % values.size ()])};
1684
1679
double vb{CBasicStatistics::mean (values[b_ % values.size ()])};
1685
1680
cost.add (std::fabs (va) + std::fabs (vb) + std::fabs (vb - va));
1686
1681
}
1687
- if (best.add ({CBasicStatistics::mean (cost), candidateStartOfPartition })) {
1682
+ if (best.add ({CBasicStatistics::mean (cost), startOfPartition })) {
1688
1683
b = 0.0 ;
1689
- for (const auto & subset : partition) {
1690
- candidateWindows = calculateWindows (
1691
- candidateStartOfPartition, windowLength, repeat, subset);
1692
-
1684
+ for (std::size_t i = 0u ; i < partition.size (); ++i) {
1685
+ windows[i] = calculateWindows (startOfPartition, windowLength,
1686
+ repeat, partition[i]);
1693
1687
TMeanVarAccumulatorVec trend (period);
1694
- periodicTrend (values, candidateWindows, m_BucketLength, trend);
1695
-
1688
+ periodicTrend (values, windows[i], m_BucketLength, trend);
1696
1689
b += static_cast <double >(std::count_if (
1697
1690
trend.begin (), trend.end (), [](const TMeanVarAccumulator& value) {
1698
1691
return CBasicStatistics::count (value) > 0.0 ;
@@ -1710,17 +1703,22 @@ bool CPeriodicityHypothesisTests::testPartition(const TTimeTimePr2Vec& partition
1710
1703
return false ;
1711
1704
}
1712
1705
1713
- double variance{correction * minimum[0 ].first };
1714
- double v1{varianceAtPercentile (variance, df1, 50.0 + CONFIDENCE_INTERVAL / 2.0 )};
1706
+ startOfPartition = best[0 ].second ;
1707
+ double v1{varianceAtPercentile (correction * minimum[0 ].first , df1,
1708
+ 50.0 + CONFIDENCE_INTERVAL / 2.0 )};
1709
+ LOG_TRACE (<< " start of partition = " << startOfPartition);
1715
1710
LOG_TRACE (<< " variance = " << v1);
1716
1711
LOG_TRACE (<< " varianceThreshold = " << vt);
1717
1712
LOG_TRACE (<< " significance = "
1718
1713
<< CStatisticalTests::leftTailFTest (v1 / v0, df1, df0));
1719
1714
1720
- startOfPartition = best[0 ].second ;
1721
- windows[0 ] = calculateWindows (startOfPartition, windowLength, repeat, partition[0 ]);
1722
- windows[1 ] = calculateWindows (startOfPartition, windowLength, repeat, partition[1 ]);
1723
- LOG_TRACE (<< " start of partition = " << startOfPartition);
1715
+ values.assign (buckets.begin (), buckets.end ());
1716
+ TMeanAccumulatorVec trend;
1717
+ for (const auto & window : windows) {
1718
+ trend.assign (period, TMeanAccumulator{});
1719
+ periodicTrend (values, window, m_BucketLength, trend);
1720
+ reweightOutliers (trend, window, m_BucketLength, values);
1721
+ }
1724
1722
1725
1723
// In the following we're trading off:
1726
1724
// 1) The cyclic autocorrelation of each periodic component in the
0 commit comments