Skip to content

Commit 6bd1a53

Browse files
authored
[ML] Add include categorical filter type to detector rules (#27)
* Add whitelist rule condition type * Enable BWC for parsing categorical rules
1 parent e8f9bc9 commit 6bd1a53

8 files changed

+177
-92
lines changed

include/model/CRuleCondition.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ class MODEL_EXPORT CRuleCondition {
3434
using TPatternSetCRef = boost::reference_wrapper<const core::CPatternSet>;
3535

3636
public:
37-
enum ERuleConditionType { E_Categorical, E_NumericalActual, E_NumericalTypical, E_NumericalDiffAbs, E_Time };
37+
enum ERuleConditionType { E_CategoricalMatch, E_CategoricalComplement, E_NumericalActual, E_NumericalTypical, E_NumericalDiffAbs, E_Time };
3838

3939
enum EConditionOperator { E_LT, E_LTE, E_GT, E_GTE };
4040

@@ -67,6 +67,8 @@ class MODEL_EXPORT CRuleCondition {
6767
void valueFilter(const core::CPatternSet& valueFilter);
6868

6969
//! Is the condition categorical?
70+
//! Categorical conditions are pattern match conditions i.e.
71+
//! E_CategoricalMatch and E_CategoricalComplement
7072
bool isCategorical() const;
7173

7274
//! Is the condition numerical?

lib/api/CDetectionRulesJsonParser.cc

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@ const std::string TARGET_FIELD_NAME("target_field_name");
2323
const std::string TARGET_FIELD_VALUE("target_field_value");
2424
const std::string TYPE("type");
2525
const std::string CATEGORICAL("categorical");
26+
const std::string CATEGORICAL_MATCH("categorical_match");
27+
const std::string CATEGORICAL_COMPLEMENT("categorical_complement");
2628
const std::string NUMERICAL_ACTUAL("numerical_actual");
2729
const std::string NUMERICAL_TYPICAL("numerical_typical");
2830
const std::string NUMERICAL_DIFF_ABS("numerical_diff_abs");
@@ -228,8 +230,10 @@ bool CDetectionRulesJsonParser::parseRuleConditionType(const rapidjson::Value& r
228230
}
229231

230232
const std::string& type = ruleConditionObject[TYPE.c_str()].GetString();
231-
if (type == CATEGORICAL) {
232-
ruleCondition.type(model::CRuleCondition::E_Categorical);
233+
if (type == CATEGORICAL_MATCH || type == CATEGORICAL){
234+
ruleCondition.type(model::CRuleCondition::E_CategoricalMatch);
235+
} else if (type == CATEGORICAL_COMPLEMENT) {
236+
ruleCondition.type(model::CRuleCondition::E_CategoricalComplement);
233237
} else if (type == NUMERICAL_ACTUAL) {
234238
ruleCondition.type(model::CRuleCondition::E_NumericalActual);
235239
} else if (type == NUMERICAL_TYPICAL) {

lib/api/unittest/CDetectionRulesJsonParserTest.cc

Lines changed: 65 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -67,13 +67,19 @@ CppUnit::Test* CDetectionRulesJsonParserTest::suite() {
6767
new CppUnit::TestCaller<CDetectionRulesJsonParserTest>("CDetectionRulesJsonParserTest::testParseRulesGivenMultipleRules",
6868
&CDetectionRulesJsonParserTest::testParseRulesGivenMultipleRules));
6969
suiteOfTests->addTest(
70-
new CppUnit::TestCaller<CDetectionRulesJsonParserTest>("CDetectionRulesJsonParserTest::testParseRulesGivenCategoricalRule",
71-
&CDetectionRulesJsonParserTest::testParseRulesGivenCategoricalRule));
70+
new CppUnit::TestCaller<CDetectionRulesJsonParserTest>("CDetectionRulesJsonParserTest::testParseRulesGivenCategoricalMatchRule",
71+
&CDetectionRulesJsonParserTest::testParseRulesGivenCategoricalMatchRule));
72+
suiteOfTests->addTest(
73+
new CppUnit::TestCaller<CDetectionRulesJsonParserTest>("CDetectionRulesJsonParserTest::testParseRulesGivenCategoricalComplementRule",
74+
&CDetectionRulesJsonParserTest::testParseRulesGivenCategoricalComplementRule));
7275
suiteOfTests->addTest(new CppUnit::TestCaller<CDetectionRulesJsonParserTest>(
7376
"CDetectionRulesJsonParserTest::testParseRulesGivenTimeRule", &CDetectionRulesJsonParserTest::testParseRulesGivenTimeRule));
7477
suiteOfTests->addTest(
7578
new CppUnit::TestCaller<CDetectionRulesJsonParserTest>("CDetectionRulesJsonParserTest::testParseRulesGivenDifferentActions",
7679
&CDetectionRulesJsonParserTest::testParseRulesGivenDifferentActions));
80+
suiteOfTests->addTest(
81+
new CppUnit::TestCaller<CDetectionRulesJsonParserTest>("CDetectionRulesJsonParserTest::testParseRulesGivenOldStyleCategoricalRule",
82+
&CDetectionRulesJsonParserTest::testParseRulesGivenOldStyleCategoricalRule));
7783
return suiteOfTests;
7884
}
7985

@@ -347,8 +353,37 @@ void CDetectionRulesJsonParserTest::testParseRulesGivenMultipleRules() {
347353
CPPUNIT_ASSERT_EQUAL(std::string("SKIP_SAMPLING (id:42) IF ACTUAL < 2.000000"), rules[1].print());
348354
}
349355

350-
void CDetectionRulesJsonParserTest::testParseRulesGivenCategoricalRule() {
351-
LOG_DEBUG("*** testParseRulesGivenCategoricalRule ***");
356+
void CDetectionRulesJsonParserTest::testParseRulesGivenCategoricalMatchRule() {
357+
LOG_DEBUG("*** testParseRulesGivenCategoricalMatchRule ***");
358+
359+
TStrPatternSetUMap filtersById;
360+
core::CPatternSet filter;
361+
filter.initFromJson("[\"b\", \"a\"]");
362+
filtersById["filter1"] = filter;
363+
364+
CDetectionRulesJsonParser parser(filtersById);
365+
CDetectionRulesJsonParser::TDetectionRuleVec rules;
366+
std::string rulesJson = "[";
367+
rulesJson += "{";
368+
rulesJson += " \"actions\":[\"filter_results\"],";
369+
rulesJson += " \"conditions_connective\":\"or\",";
370+
rulesJson += " \"conditions\": [";
371+
rulesJson += " {\"type\":\"categorical_match\", \"field_name\":\"foo\", \"filter_id\":\"filter1\"}";
372+
rulesJson += " ]";
373+
rulesJson += "}";
374+
rulesJson += "]";
375+
376+
CPPUNIT_ASSERT(parser.parseRules(rulesJson, rules));
377+
378+
CPPUNIT_ASSERT_EQUAL(std::size_t(1), rules.size());
379+
CPPUNIT_ASSERT_EQUAL(std::string("FILTER_RESULTS IF (foo) IN FILTER"), rules[0].print());
380+
}
381+
382+
void CDetectionRulesJsonParserTest::testParseRulesGivenOldStyleCategoricalRule() {
383+
LOG_DEBUG("*** testParseRulesGivenOldStyleCategoricalRule ***");
384+
385+
// Tests that the rule type can be parsed as categorical_match
386+
// when the type is categorical
352387

353388
TStrPatternSetUMap filtersById;
354389
core::CPatternSet filter;
@@ -373,6 +408,32 @@ void CDetectionRulesJsonParserTest::testParseRulesGivenCategoricalRule() {
373408
CPPUNIT_ASSERT_EQUAL(std::string("FILTER_RESULTS IF (foo) IN FILTER"), rules[0].print());
374409
}
375410

411+
void CDetectionRulesJsonParserTest::testParseRulesGivenCategoricalComplementRule() {
412+
LOG_DEBUG("*** testParseRulesGivenCategoricalComplementRule ***");
413+
414+
TStrPatternSetUMap filtersById;
415+
core::CPatternSet filter;
416+
filter.initFromJson("[\"b\", \"a\"]");
417+
filtersById["filter1"] = filter;
418+
419+
CDetectionRulesJsonParser parser(filtersById);
420+
CDetectionRulesJsonParser::TDetectionRuleVec rules;
421+
std::string rulesJson = "[";
422+
rulesJson += "{";
423+
rulesJson += " \"actions\":[\"filter_results\"],";
424+
rulesJson += " \"conditions_connective\":\"or\",";
425+
rulesJson += " \"conditions\": [";
426+
rulesJson += " {\"type\":\"categorical_complement\", \"field_name\":\"foo\", \"filter_id\":\"filter1\"}";
427+
rulesJson += " ]";
428+
rulesJson += "}";
429+
rulesJson += "]";
430+
431+
CPPUNIT_ASSERT(parser.parseRules(rulesJson, rules));
432+
433+
CPPUNIT_ASSERT_EQUAL(std::size_t(1), rules.size());
434+
CPPUNIT_ASSERT_EQUAL(std::string("FILTER_RESULTS IF (foo) NOT IN FILTER"), rules[0].print());
435+
}
436+
376437
void CDetectionRulesJsonParserTest::testParseRulesGivenTimeRule() {
377438
LOG_DEBUG("*** testParseRulesGivenTimeRule ***");
378439

lib/api/unittest/CDetectionRulesJsonParserTest.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,9 +25,11 @@ class CDetectionRulesJsonParserTest : public CppUnit::TestFixture {
2525
void testParseRulesGivenNumericalActualRuleWithConnectiveOr();
2626
void testParseRulesGivenNumericalTypicalAndDiffAbsRuleWithConnectiveAnd();
2727
void testParseRulesGivenMultipleRules();
28-
void testParseRulesGivenCategoricalRule();
28+
void testParseRulesGivenCategoricalMatchRule();
29+
void testParseRulesGivenCategoricalComplementRule();
2930
void testParseRulesGivenTimeRule();
3031
void testParseRulesGivenDifferentActions();
32+
void testParseRulesGivenOldStyleCategoricalRule();
3133
static CppUnit::Test* suite();
3234
};
3335

lib/model/CRuleCondition.cc

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ void CRuleCondition::valueFilter(const core::CPatternSet& valueFilter) {
6464
}
6565

6666
bool CRuleCondition::isCategorical() const {
67-
return m_Type == E_Categorical;
67+
return m_Type == E_CategoricalMatch || m_Type == E_CategoricalComplement;
6868
}
6969

7070
bool CRuleCondition::isNumerical() const {
@@ -81,16 +81,19 @@ bool CRuleCondition::test(const CAnomalyDetectorModel& model,
8181
const CDataGatherer& gatherer = model.dataGatherer();
8282

8383
if (this->isCategorical()) {
84+
bool containsValue{false};
8485
if (m_FieldName == gatherer.partitionFieldName()) {
85-
return m_ValueFilter.get().contains(gatherer.partitionFieldValue());
86+
containsValue = m_ValueFilter.get().contains(gatherer.partitionFieldValue());
8687
} else if (m_FieldName == gatherer.personFieldName()) {
87-
return m_ValueFilter.get().contains(gatherer.personName(pid));
88+
containsValue = m_ValueFilter.get().contains(gatherer.personName(pid));
8889
} else if (m_FieldName == gatherer.attributeFieldName()) {
89-
return m_ValueFilter.get().contains(gatherer.attributeName(cid));
90+
containsValue = m_ValueFilter.get().contains(gatherer.attributeName(cid));
9091
} else {
9192
LOG_ERROR("Unexpected fieldName = " << m_FieldName);
9293
return false;
9394
}
95+
96+
return (m_Type == E_CategoricalComplement) ? !containsValue : containsValue;
9497
} else {
9598
if (m_FieldValue.empty() == false) {
9699
if (isScoped) {
@@ -129,7 +132,8 @@ bool CRuleCondition::checkCondition(const CAnomalyDetectorModel& model,
129132
core_t::TTime time) const {
130133
TDouble1Vec value;
131134
switch (m_Type) {
132-
case E_Categorical: {
135+
case E_CategoricalMatch:
136+
case E_CategoricalComplement: {
133137
LOG_ERROR("Should never check numerical condition for categorical rule condition");
134138
return false;
135139
}
@@ -189,7 +193,11 @@ std::string CRuleCondition::print() const {
189193
result += ")";
190194
}
191195
result += " ";
196+
192197
if (this->isCategorical()) {
198+
if (m_Type == E_CategoricalComplement) {
199+
result += "NOT ";
200+
}
193201
result += "IN FILTER";
194202
} else {
195203
result += this->print(m_Condition.s_Op) + " " + core::CStringUtils::typeToString(m_Condition.s_Threshold);
@@ -199,7 +207,8 @@ std::string CRuleCondition::print() const {
199207

200208
std::string CRuleCondition::print(ERuleConditionType type) const {
201209
switch (type) {
202-
case E_Categorical:
210+
case E_CategoricalMatch:
211+
case E_CategoricalComplement:
203212
return "";
204213
case E_NumericalActual:
205214
return "ACTUAL";

0 commit comments

Comments
 (0)