Skip to content

Commit 576d86c

Browse files
authored
[ML] Add include categorical filter type to detector rules (#27)
* Add whitelist rule condition type * Enable BWC for parsing categorical rules
1 parent 7687464 commit 576d86c

8 files changed

+177
-92
lines changed

include/model/CRuleCondition.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ class MODEL_EXPORT CRuleCondition {
4343
using TPatternSetCRef = boost::reference_wrapper<const core::CPatternSet>;
4444

4545
public:
46-
enum ERuleConditionType { E_Categorical, E_NumericalActual, E_NumericalTypical, E_NumericalDiffAbs, E_Time };
46+
enum ERuleConditionType { E_CategoricalMatch, E_CategoricalComplement, E_NumericalActual, E_NumericalTypical, E_NumericalDiffAbs, E_Time };
4747

4848
enum EConditionOperator { E_LT, E_LTE, E_GT, E_GTE };
4949

@@ -76,6 +76,8 @@ class MODEL_EXPORT CRuleCondition {
7676
void valueFilter(const core::CPatternSet& valueFilter);
7777

7878
//! Is the condition categorical?
79+
//! Categorical conditions are pattern match conditions i.e.
80+
//! E_CategoricalMatch and E_CategoricalComplement
7981
bool isCategorical() const;
8082

8183
//! Is the condition numerical?

lib/api/CDetectionRulesJsonParser.cc

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,8 @@ const std::string TARGET_FIELD_NAME("target_field_name");
3232
const std::string TARGET_FIELD_VALUE("target_field_value");
3333
const std::string TYPE("type");
3434
const std::string CATEGORICAL("categorical");
35+
const std::string CATEGORICAL_MATCH("categorical_match");
36+
const std::string CATEGORICAL_COMPLEMENT("categorical_complement");
3537
const std::string NUMERICAL_ACTUAL("numerical_actual");
3638
const std::string NUMERICAL_TYPICAL("numerical_typical");
3739
const std::string NUMERICAL_DIFF_ABS("numerical_diff_abs");
@@ -237,8 +239,10 @@ bool CDetectionRulesJsonParser::parseRuleConditionType(const rapidjson::Value& r
237239
}
238240

239241
const std::string& type = ruleConditionObject[TYPE.c_str()].GetString();
240-
if (type == CATEGORICAL) {
241-
ruleCondition.type(model::CRuleCondition::E_Categorical);
242+
if (type == CATEGORICAL_MATCH || type == CATEGORICAL){
243+
ruleCondition.type(model::CRuleCondition::E_CategoricalMatch);
244+
} else if (type == CATEGORICAL_COMPLEMENT) {
245+
ruleCondition.type(model::CRuleCondition::E_CategoricalComplement);
242246
} else if (type == NUMERICAL_ACTUAL) {
243247
ruleCondition.type(model::CRuleCondition::E_NumericalActual);
244248
} else if (type == NUMERICAL_TYPICAL) {

lib/api/unittest/CDetectionRulesJsonParserTest.cc

Lines changed: 65 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -76,13 +76,19 @@ CppUnit::Test* CDetectionRulesJsonParserTest::suite() {
7676
new CppUnit::TestCaller<CDetectionRulesJsonParserTest>("CDetectionRulesJsonParserTest::testParseRulesGivenMultipleRules",
7777
&CDetectionRulesJsonParserTest::testParseRulesGivenMultipleRules));
7878
suiteOfTests->addTest(
79-
new CppUnit::TestCaller<CDetectionRulesJsonParserTest>("CDetectionRulesJsonParserTest::testParseRulesGivenCategoricalRule",
80-
&CDetectionRulesJsonParserTest::testParseRulesGivenCategoricalRule));
79+
new CppUnit::TestCaller<CDetectionRulesJsonParserTest>("CDetectionRulesJsonParserTest::testParseRulesGivenCategoricalMatchRule",
80+
&CDetectionRulesJsonParserTest::testParseRulesGivenCategoricalMatchRule));
81+
suiteOfTests->addTest(
82+
new CppUnit::TestCaller<CDetectionRulesJsonParserTest>("CDetectionRulesJsonParserTest::testParseRulesGivenCategoricalComplementRule",
83+
&CDetectionRulesJsonParserTest::testParseRulesGivenCategoricalComplementRule));
8184
suiteOfTests->addTest(new CppUnit::TestCaller<CDetectionRulesJsonParserTest>(
8285
"CDetectionRulesJsonParserTest::testParseRulesGivenTimeRule", &CDetectionRulesJsonParserTest::testParseRulesGivenTimeRule));
8386
suiteOfTests->addTest(
8487
new CppUnit::TestCaller<CDetectionRulesJsonParserTest>("CDetectionRulesJsonParserTest::testParseRulesGivenDifferentActions",
8588
&CDetectionRulesJsonParserTest::testParseRulesGivenDifferentActions));
89+
suiteOfTests->addTest(
90+
new CppUnit::TestCaller<CDetectionRulesJsonParserTest>("CDetectionRulesJsonParserTest::testParseRulesGivenOldStyleCategoricalRule",
91+
&CDetectionRulesJsonParserTest::testParseRulesGivenOldStyleCategoricalRule));
8692
return suiteOfTests;
8793
}
8894

@@ -356,8 +362,37 @@ void CDetectionRulesJsonParserTest::testParseRulesGivenMultipleRules() {
356362
CPPUNIT_ASSERT_EQUAL(std::string("SKIP_SAMPLING (id:42) IF ACTUAL < 2.000000"), rules[1].print());
357363
}
358364

359-
void CDetectionRulesJsonParserTest::testParseRulesGivenCategoricalRule() {
360-
LOG_DEBUG("*** testParseRulesGivenCategoricalRule ***");
365+
void CDetectionRulesJsonParserTest::testParseRulesGivenCategoricalMatchRule() {
366+
LOG_DEBUG("*** testParseRulesGivenCategoricalMatchRule ***");
367+
368+
TStrPatternSetUMap filtersById;
369+
core::CPatternSet filter;
370+
filter.initFromJson("[\"b\", \"a\"]");
371+
filtersById["filter1"] = filter;
372+
373+
CDetectionRulesJsonParser parser(filtersById);
374+
CDetectionRulesJsonParser::TDetectionRuleVec rules;
375+
std::string rulesJson = "[";
376+
rulesJson += "{";
377+
rulesJson += " \"actions\":[\"filter_results\"],";
378+
rulesJson += " \"conditions_connective\":\"or\",";
379+
rulesJson += " \"conditions\": [";
380+
rulesJson += " {\"type\":\"categorical_match\", \"field_name\":\"foo\", \"filter_id\":\"filter1\"}";
381+
rulesJson += " ]";
382+
rulesJson += "}";
383+
rulesJson += "]";
384+
385+
CPPUNIT_ASSERT(parser.parseRules(rulesJson, rules));
386+
387+
CPPUNIT_ASSERT_EQUAL(std::size_t(1), rules.size());
388+
CPPUNIT_ASSERT_EQUAL(std::string("FILTER_RESULTS IF (foo) IN FILTER"), rules[0].print());
389+
}
390+
391+
void CDetectionRulesJsonParserTest::testParseRulesGivenOldStyleCategoricalRule() {
392+
LOG_DEBUG("*** testParseRulesGivenOldStyleCategoricalRule ***");
393+
394+
// Tests that the rule type can be parsed as categorical_match
395+
// when the type is categorical
361396

362397
TStrPatternSetUMap filtersById;
363398
core::CPatternSet filter;
@@ -382,6 +417,32 @@ void CDetectionRulesJsonParserTest::testParseRulesGivenCategoricalRule() {
382417
CPPUNIT_ASSERT_EQUAL(std::string("FILTER_RESULTS IF (foo) IN FILTER"), rules[0].print());
383418
}
384419

420+
void CDetectionRulesJsonParserTest::testParseRulesGivenCategoricalComplementRule() {
421+
LOG_DEBUG("*** testParseRulesGivenCategoricalComplementRule ***");
422+
423+
TStrPatternSetUMap filtersById;
424+
core::CPatternSet filter;
425+
filter.initFromJson("[\"b\", \"a\"]");
426+
filtersById["filter1"] = filter;
427+
428+
CDetectionRulesJsonParser parser(filtersById);
429+
CDetectionRulesJsonParser::TDetectionRuleVec rules;
430+
std::string rulesJson = "[";
431+
rulesJson += "{";
432+
rulesJson += " \"actions\":[\"filter_results\"],";
433+
rulesJson += " \"conditions_connective\":\"or\",";
434+
rulesJson += " \"conditions\": [";
435+
rulesJson += " {\"type\":\"categorical_complement\", \"field_name\":\"foo\", \"filter_id\":\"filter1\"}";
436+
rulesJson += " ]";
437+
rulesJson += "}";
438+
rulesJson += "]";
439+
440+
CPPUNIT_ASSERT(parser.parseRules(rulesJson, rules));
441+
442+
CPPUNIT_ASSERT_EQUAL(std::size_t(1), rules.size());
443+
CPPUNIT_ASSERT_EQUAL(std::string("FILTER_RESULTS IF (foo) NOT IN FILTER"), rules[0].print());
444+
}
445+
385446
void CDetectionRulesJsonParserTest::testParseRulesGivenTimeRule() {
386447
LOG_DEBUG("*** testParseRulesGivenTimeRule ***");
387448

lib/api/unittest/CDetectionRulesJsonParserTest.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,9 +34,11 @@ class CDetectionRulesJsonParserTest : public CppUnit::TestFixture {
3434
void testParseRulesGivenNumericalActualRuleWithConnectiveOr();
3535
void testParseRulesGivenNumericalTypicalAndDiffAbsRuleWithConnectiveAnd();
3636
void testParseRulesGivenMultipleRules();
37-
void testParseRulesGivenCategoricalRule();
37+
void testParseRulesGivenCategoricalMatchRule();
38+
void testParseRulesGivenCategoricalComplementRule();
3839
void testParseRulesGivenTimeRule();
3940
void testParseRulesGivenDifferentActions();
41+
void testParseRulesGivenOldStyleCategoricalRule();
4042
static CppUnit::Test* suite();
4143
};
4244

lib/model/CRuleCondition.cc

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ void CRuleCondition::valueFilter(const core::CPatternSet& valueFilter) {
7373
}
7474

7575
bool CRuleCondition::isCategorical() const {
76-
return m_Type == E_Categorical;
76+
return m_Type == E_CategoricalMatch || m_Type == E_CategoricalComplement;
7777
}
7878

7979
bool CRuleCondition::isNumerical() const {
@@ -90,16 +90,19 @@ bool CRuleCondition::test(const CAnomalyDetectorModel& model,
9090
const CDataGatherer& gatherer = model.dataGatherer();
9191

9292
if (this->isCategorical()) {
93+
bool containsValue{false};
9394
if (m_FieldName == gatherer.partitionFieldName()) {
94-
return m_ValueFilter.get().contains(gatherer.partitionFieldValue());
95+
containsValue = m_ValueFilter.get().contains(gatherer.partitionFieldValue());
9596
} else if (m_FieldName == gatherer.personFieldName()) {
96-
return m_ValueFilter.get().contains(gatherer.personName(pid));
97+
containsValue = m_ValueFilter.get().contains(gatherer.personName(pid));
9798
} else if (m_FieldName == gatherer.attributeFieldName()) {
98-
return m_ValueFilter.get().contains(gatherer.attributeName(cid));
99+
containsValue = m_ValueFilter.get().contains(gatherer.attributeName(cid));
99100
} else {
100101
LOG_ERROR("Unexpected fieldName = " << m_FieldName);
101102
return false;
102103
}
104+
105+
return (m_Type == E_CategoricalComplement) ? !containsValue : containsValue;
103106
} else {
104107
if (m_FieldValue.empty() == false) {
105108
if (isScoped) {
@@ -138,7 +141,8 @@ bool CRuleCondition::checkCondition(const CAnomalyDetectorModel& model,
138141
core_t::TTime time) const {
139142
TDouble1Vec value;
140143
switch (m_Type) {
141-
case E_Categorical: {
144+
case E_CategoricalMatch:
145+
case E_CategoricalComplement: {
142146
LOG_ERROR("Should never check numerical condition for categorical rule condition");
143147
return false;
144148
}
@@ -198,7 +202,11 @@ std::string CRuleCondition::print() const {
198202
result += ")";
199203
}
200204
result += " ";
205+
201206
if (this->isCategorical()) {
207+
if (m_Type == E_CategoricalComplement) {
208+
result += "NOT ";
209+
}
202210
result += "IN FILTER";
203211
} else {
204212
result += this->print(m_Condition.s_Op) + " " + core::CStringUtils::typeToString(m_Condition.s_Threshold);
@@ -208,7 +216,8 @@ std::string CRuleCondition::print() const {
208216

209217
std::string CRuleCondition::print(ERuleConditionType type) const {
210218
switch (type) {
211-
case E_Categorical:
219+
case E_CategoricalMatch:
220+
case E_CategoricalComplement:
212221
return "";
213222
case E_NumericalActual:
214223
return "ACTUAL";

0 commit comments

Comments
 (0)