Skip to content

Commit d41de34

Browse files
author
Hendrik Muhs
authored
[ML] Fix influencer count and influence calculation (#150)
Fix counting of influencer per bucket for metric population analyses, prior this fix the count has always been set to 1. Fixes #24
1 parent 7164f46 commit d41de34

File tree

4 files changed

+73
-11
lines changed

4 files changed

+73
-11
lines changed

docs/CHANGELOG.asciidoc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ Age seasonal components in proportion to the fraction of values with which they'
5959
Persist and restore was missing some of the trend model state ({pull}#99[#99])
6060
Stop zero variance data generating a log error in the forecast confidence interval calculation ({pull}#107[#107])
6161
Fix corner case failing to calculate lgamma values and the correspoinding log errors ({pull}#126[#126])
62+
Influence count per bucket for metric population analyses was wrong and lead to wrong influencer scoring ({pull}#150[#150])
6263

6364
=== Regressions
6465

lib/model/CMetricPopulationModel.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -943,7 +943,7 @@ void CMetricPopulationModel::fill(model_t::EFeature feature,
943943
this->currentBucketInterimCorrections().emplace(
944944
CCorrectionKey(feature, pid, cid), correction);
945945
}
946-
params.s_Count = 1.0;
946+
params.s_Count = bucket->count();
947947
params.s_ComputeProbabilityParams
948948
.tag(pid) // new line
949949
.addCalculation(model_t::probabilityCalculation(feature))

lib/model/CProbabilityAndInfluenceCalculator.cc

Lines changed: 69 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -144,43 +144,55 @@ double ratio(double numerator, double denominator, double zeroDividedByZero) {
144144
return numerator / denominator;
145145
}
146146

147+
// Functions to compute influence based on different criteria
147148
//! \brief Computes the value of summed statistics on the set difference.
148149
class CValueDifference {
149150
public:
150151
//! Features.
151-
void operator()(const TDouble2Vec& v,
152+
bool operator()(const TDouble2Vec& v,
152153
double n,
153154
const TDouble1Vec& vi,
154155
double ni,
155156
maths::CModelProbabilityParams& params,
156157
TDouble2Vec& difference) const {
158+
if (n < ni) {
159+
return false;
160+
}
161+
157162
for (std::size_t i = 0u; i < v.size(); ++i) {
158163
difference[i] = v[i] - vi[i];
159164
}
160165
params.addBucketEmpty({n == ni});
166+
167+
return true;
161168
}
162169

163170
//! Correlates.
164-
void operator()(const TDouble2Vec& v,
171+
bool operator()(const TDouble2Vec& v,
165172
const TDouble2Vec& n,
166173
const TDouble1Vec& vi,
167174
const TDouble1Vec& ni,
168175
maths::CModelProbabilityParams& params,
169176
TDouble2Vec& difference) const {
177+
if (n < ni) {
178+
return false;
179+
}
170180
TBool2Vec bucketEmpty(2);
171181
for (std::size_t i = 0u; i < v.size(); ++i) {
172182
bucketEmpty[i] = ((n[i] - ni[i]) == 0);
173183
difference[i] = v[i] - vi[i];
174184
}
175185
params.addBucketEmpty(bucketEmpty);
186+
187+
return true;
176188
}
177189
};
178190

179191
//! \brief Computes the value of min, max, dc, etc on the set intersection.
180192
class CValueIntersection {
181193
public:
182194
//! Features.
183-
void operator()(const TDouble2Vec& /*v*/,
195+
bool operator()(const TDouble2Vec& /*v*/,
184196
double /*n*/,
185197
const TDouble1Vec& vi,
186198
double ni,
@@ -190,34 +202,50 @@ class CValueIntersection {
190202
intersection[i] = vi[i];
191203
}
192204
params.addBucketEmpty({ni == 0});
205+
206+
return true;
193207
}
194208

195209
//! Correlates.
196-
void operator()(const TDouble2Vec& /*v*/,
210+
bool operator()(const TDouble2Vec& /*v*/,
197211
const TDouble2Vec& /*n*/,
198212
const TDouble1Vec& vi,
199213
const TDouble1Vec& ni,
200214
maths::CModelProbabilityParams& params,
201215
TDouble2Vec& intersection) const {
216+
202217
TBool2Vec bucketEmpty(2);
203218
for (std::size_t i = 0u; i < vi.size(); ++i) {
204219
bucketEmpty[i] = (ni[i] == 0);
205220
intersection[i] = vi[i];
206221
}
207222
params.addBucketEmpty(bucketEmpty);
223+
224+
return true;
208225
}
209226
};
210227

211228
//! \brief Computes the value of the mean statistic on a set difference.
212229
class CMeanDifference {
213230
public:
214231
//! Features.
215-
void operator()(const TDouble2Vec& v,
232+
//!
233+
//! \param[in] v overall mean
234+
//! \param[in] n overall count
235+
//! \param[in] vi influencer mean
236+
//! \param[in] ni influencer count
237+
//! \param[out] params model parameters to be updated
238+
//! \param[out] difference computed mean difference
239+
bool operator()(const TDouble2Vec& v,
216240
double n,
217241
const TDouble1Vec& vi,
218242
double ni,
219243
maths::CModelProbabilityParams& params,
220244
TDouble2Vec& difference) const {
245+
if (n <= ni) {
246+
return false;
247+
}
248+
221249
for (std::size_t d = 0u; d < v.size(); ++d) {
222250
difference[d] = maths::CBasicStatistics::mean(
223251
maths::CBasicStatistics::accumulator(n, v[d]) -
@@ -226,15 +254,21 @@ class CMeanDifference {
226254
maths_t::multiplyCountVarianceScale(TDouble2Vec(v.size(), n / (n - ni)),
227255
params.weights()[0]);
228256
params.addBucketEmpty({n == ni});
257+
258+
return true;
229259
}
230260

231261
//! Correlates.
232-
void operator()(const TDouble2Vec& v,
262+
bool operator()(const TDouble2Vec& v,
233263
const TDouble2Vec& n,
234264
const TDouble1Vec& vi,
235265
const TDouble1Vec& ni,
236266
maths::CModelProbabilityParams& params,
237267
TDouble2Vec& difference) const {
268+
if (n <= ni) {
269+
return false;
270+
}
271+
238272
TBool2Vec bucketEmpty(2);
239273
for (std::size_t d = 0u; d < 2; ++d) {
240274
bucketEmpty[d] = ((n[d] - ni[d]) == 0);
@@ -246,19 +280,31 @@ class CMeanDifference {
246280
TDouble2Vec{n[0] / (n[0] - ni[0]), n[1] / (n[1] - ni[1])},
247281
params.weights()[0]);
248282
params.addBucketEmpty(bucketEmpty);
283+
284+
return true;
249285
}
250286
};
251287

252288
//! \brief Computes the value of the variance statistic on a set difference.
253289
class CVarianceDifference {
254290
public:
255291
//! Features.
256-
void operator()(const TDouble1Vec& v,
292+
//!
293+
//! \param[in] v overall variance and mean
294+
//! \param[in] n overall count
295+
//! \param[in] vi influencer variance and mean
296+
//! \param[in] ni influencer count
297+
//! \param[out] params model parameters to be updated
298+
//! \param[out] difference computed mean difference
299+
bool operator()(const TDouble1Vec& v,
257300
double n,
258301
const TDouble1Vec& vi,
259302
double ni,
260303
maths::CModelProbabilityParams& params,
261304
TDouble2Vec& difference) const {
305+
if (n < ni) {
306+
return false;
307+
}
262308
std::size_t dimension = v.size() / 2;
263309
for (std::size_t d = 0u; d < dimension; ++d) {
264310
difference[d] = maths::CBasicStatistics::maximumLikelihoodVariance(
@@ -268,15 +314,20 @@ class CVarianceDifference {
268314
maths_t::multiplyCountVarianceScale(TDouble2Vec(dimension, n / (n - ni)),
269315
params.weights()[0]);
270316
params.addBucketEmpty({n == ni});
317+
318+
return true;
271319
}
272320

273321
//! Correlates.
274-
void operator()(const TDouble2Vec& v,
322+
bool operator()(const TDouble2Vec& v,
275323
const TDouble2Vec& n,
276324
const TDouble1Vec& vi,
277325
const TDouble1Vec& ni,
278326
maths::CModelProbabilityParams& params,
279327
TDouble2Vec& difference) const {
328+
if (n < ni) {
329+
return false;
330+
}
280331
TBool2Vec bucketEmpty(2);
281332
for (std::size_t d = 0u; d < 2; ++d) {
282333
bucketEmpty[d] = ((n[d] - ni[d]) == 0);
@@ -288,6 +339,8 @@ class CVarianceDifference {
288339
maths_t::multiplyCountVarianceScale(
289340
TDouble2Vec{n[0] / (n[0] - ni[0]), n[1] / (n[1] - ni[1])},
290341
params.weights()[0]);
342+
343+
return true;
291344
}
292345
};
293346

@@ -370,8 +423,14 @@ void doComputeInfluences(model_t::EFeature feature,
370423
for (auto i = influencerValues.begin(); i != influencerValues.end(); ++i) {
371424
params.weights(weights).updateAnomalyModel(false);
372425

373-
computeInfluencedValue(value, count, i->second.first, i->second.second,
374-
params, influencedValue[0]);
426+
if (computeInfluencedValue(value, count, i->second.first, i->second.second,
427+
params, influencedValue[0]) == false) {
428+
LOG_ERROR(<< "Failed to compute influencer value (value = " << value
429+
<< " , count = " << count
430+
<< " , influencer value = " << i->second.first
431+
<< " , influencer count = " << i->second.second << ")");
432+
continue;
433+
}
375434

376435
double pi;
377436
bool conditional;

lib/model/unittest/CMetricPopulationModelTest.cc

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1548,6 +1548,8 @@ CppUnit::Test* CMetricPopulationModelTest::suite() {
15481548
suiteOfTests->addTest(new CppUnit::TestCaller<CMetricPopulationModelTest>(
15491549
"CMetricPopulationModelTest::testMinMaxAndMean",
15501550
&CMetricPopulationModelTest::testMinMaxAndMean));
1551+
suiteOfTests->addTest(new CppUnit::TestCaller<CMetricPopulationModelTest>(
1552+
"CMetricPopulationModelTest::testVarp", &CMetricPopulationModelTest::testVarp));
15511553
suiteOfTests->addTest(new CppUnit::TestCaller<CMetricPopulationModelTest>(
15521554
"CMetricPopulationModelTest::testComputeProbability",
15531555
&CMetricPopulationModelTest::testComputeProbability));

0 commit comments

Comments
 (0)