Skip to content

Commit 13aee89

Browse files
authored
[6.x][ML] Don't lose mean offset in expanding window when persisting (elastic#99)
1 parent ab2f819 commit 13aee89

File tree

6 files changed

+118
-1
lines changed

6 files changed

+118
-1
lines changed

docs/CHANGELOG.asciidoc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ model state on disk ({pull}89[#89])
2727
=== Bug Fixes
2828

2929
Age seasonal components in proportion to the fraction of values with which they're updated ({pull}88[#88])
30+
Persist and restore was missing some of the trend model state ({pull}#99[#99])
3031

3132
=== Regressions
3233

lib/maths/CExpandingWindow.cc

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ namespace {
2525
const std::string BUCKET_LENGTH_INDEX_TAG("a");
2626
const std::string BUCKET_VALUES_TAG("b");
2727
const std::string START_TIME_TAG("c");
28+
const std::string MEAN_OFFSET_TAG("d");
2829
}
2930

3031
CExpandingWindow::CExpandingWindow(core_t::TTime bucketLength,
@@ -45,6 +46,7 @@ bool CExpandingWindow::acceptRestoreTraverser(core::CStateRestoreTraverser& trav
4546
RESTORE_BUILT_IN(START_TIME_TAG, m_StartTime)
4647
RESTORE(BUCKET_VALUES_TAG,
4748
core::CPersistUtils::restore(BUCKET_VALUES_TAG, m_BucketValues, traverser));
49+
RESTORE(MEAN_OFFSET_TAG, m_MeanOffset.fromDelimited(traverser.value()))
4850
} while (traverser.next());
4951
return true;
5052
}
@@ -53,6 +55,7 @@ void CExpandingWindow::acceptPersistInserter(core::CStatePersistInserter& insert
5355
inserter.insertValue(BUCKET_LENGTH_INDEX_TAG, m_BucketLengthIndex);
5456
inserter.insertValue(START_TIME_TAG, m_StartTime);
5557
core::CPersistUtils::persist(BUCKET_VALUES_TAG, m_BucketValues, inserter);
58+
inserter.insertValue(MEAN_OFFSET_TAG, m_MeanOffset.toDelimited());
5659
}
5760

5861
core_t::TTime CExpandingWindow::startTime() const {
@@ -148,7 +151,8 @@ bool CExpandingWindow::needToCompress(core_t::TTime time) const {
148151
uint64_t CExpandingWindow::checksum(uint64_t seed) const {
149152
seed = CChecksum::calculate(seed, m_BucketLengthIndex);
150153
seed = CChecksum::calculate(seed, m_StartTime);
151-
return CChecksum::calculate(seed, m_BucketValues);
154+
seed = CChecksum::calculate(seed, m_BucketValues);
155+
return CChecksum::calculate(seed, m_MeanOffset);
152156
}
153157

154158
void CExpandingWindow::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const {
Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the Elastic License;
4+
* you may not use this file except in compliance with the Elastic License.
5+
*/
6+
7+
#include "CExpandingWindowTest.h"
8+
9+
#include <core/CContainerPrinter.h>
10+
#include <core/CLogger.h>
11+
#include <core/CRapidXmlStatePersistInserter.h>
12+
#include <core/CRapidXmlStateRestoreTraverser.h>
13+
#include <core/Constants.h>
14+
15+
#include <maths/CBasicStatistics.h>
16+
#include <maths/CExpandingWindow.h>
17+
18+
#include <test/CRandomNumbers.h>
19+
20+
#include <boost/math/constants/constants.hpp>
21+
22+
#include <vector>
23+
24+
using namespace ml;
25+
26+
namespace {
27+
using TDoubleVec = std::vector<double>;
28+
using TTimeVec = std::vector<core_t::TTime>;
29+
using TTimeCRng = core::CVectorRange<const TTimeVec>;
30+
using TFloatMeanAccumulator =
31+
maths::CBasicStatistics::SSampleMean<maths::CFloatStorage>::TAccumulator;
32+
using TFloatMeanAccumulatorVec = std::vector<TFloatMeanAccumulator>;
33+
34+
TTimeVec BUCKET_LENGTHS{300, 600, 1800, 3600};
35+
}
36+
37+
void CExpandingWindowTest::testPersistence() {
38+
// Test persist and restore is idempotent.
39+
40+
core_t::TTime bucketLength{300};
41+
std::size_t size{336};
42+
double decayRate{0.01};
43+
44+
test::CRandomNumbers rng;
45+
46+
maths::CExpandingWindow origWindow{bucketLength, TTimeCRng{BUCKET_LENGTHS, 0, 4},
47+
size, decayRate};
48+
49+
TDoubleVec values;
50+
rng.generateUniformSamples(0.0, 10.0, size, values);
51+
for (core_t::TTime time = 0; time < static_cast<core_t::TTime>(size) * bucketLength;
52+
time += bucketLength) {
53+
double value{values[time / bucketLength]};
54+
origWindow.add(time, value);
55+
}
56+
57+
std::string origXml;
58+
{
59+
core::CRapidXmlStatePersistInserter inserter("root");
60+
origWindow.acceptPersistInserter(inserter);
61+
inserter.toXml(origXml);
62+
}
63+
LOG_TRACE(<< "Window XML = " << origXml);
64+
LOG_DEBUG(<< "Window XML size = " << origXml.size());
65+
66+
// Restore the XML into a new window.
67+
{
68+
core::CRapidXmlParser parser;
69+
CPPUNIT_ASSERT(parser.parseStringIgnoreCdata(origXml));
70+
core::CRapidXmlStateRestoreTraverser traverser(parser);
71+
maths::CExpandingWindow restoredWindow{
72+
bucketLength, TTimeCRng{BUCKET_LENGTHS, 0, 4}, size, decayRate};
73+
CPPUNIT_ASSERT_EQUAL(
74+
true, traverser.traverseSubLevel(boost::bind(&maths::CExpandingWindow::acceptRestoreTraverser,
75+
&restoredWindow, _1)));
76+
77+
LOG_DEBUG(<< "orig checksum = " << origWindow.checksum()
78+
<< ", new checksum = " << restoredWindow.checksum());
79+
CPPUNIT_ASSERT_EQUAL(origWindow.checksum(), restoredWindow.checksum());
80+
}
81+
}
82+
83+
CppUnit::Test* CExpandingWindowTest::suite() {
84+
CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CExpandingWindowTest");
85+
86+
suiteOfTests->addTest(new CppUnit::TestCaller<CExpandingWindowTest>(
87+
"CExpandingWindowTest::testPersistence", &CExpandingWindowTest::testPersistence));
88+
89+
return suiteOfTests;
90+
}
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the Elastic License;
4+
* you may not use this file except in compliance with the Elastic License.
5+
*/
6+
7+
#ifndef INCLUDED_CExpandingWindowTest_h
8+
#define INCLUDED_CExpandingWindowTest_h
9+
10+
#include <cppunit/extensions/HelperMacros.h>
11+
12+
class CExpandingWindowTest : public CppUnit::TestFixture {
13+
public:
14+
void testPersistence();
15+
16+
static CppUnit::Test* suite();
17+
};
18+
19+
#endif // INCLUDED_CExpandingWindowTest_h

lib/maths/unittest/Main.cc

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
#include "CDecayRateControllerTest.h"
2222
#include "CEntropySketchTest.h"
2323
#include "CEqualWithToleranceTest.h"
24+
#include "CExpandingWindowTest.h"
2425
#include "CForecastTest.h"
2526
#include "CGammaRateConjugateTest.h"
2627
#include "CGramSchmidtTest.h"
@@ -84,6 +85,7 @@
8485
int main(int argc, const char** argv) {
8586
ml::test::CTestRunner runner(argc, argv);
8687

88+
runner.addTest(CExpandingWindowTest::suite());
8789
runner.addTest(CAgglomerativeClustererTest::suite());
8890
runner.addTest(CAssignmentTest::suite());
8991
runner.addTest(CBasicStatisticsTest::suite());

lib/maths/unittest/Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ SRCS=\
3232
CDecayRateControllerTest.cc \
3333
CEntropySketchTest.cc \
3434
CEqualWithToleranceTest.cc \
35+
CExpandingWindowTest.cc \
3536
CForecastTest.cc \
3637
CGammaRateConjugateTest.cc \
3738
CGramSchmidtTest.cc \

0 commit comments

Comments
 (0)