Skip to content

Commit df437ae

Browse files
authored
Merge pull request #387 from splunk/feature/risk-model-validation
Adding risk data model validation to integration testing
2 parents fc27ea2 + 60bef93 commit df437ae

File tree

5 files changed

+199
-49
lines changed

5 files changed

+199
-49
lines changed
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
from abc import ABC, abstractmethod
2+
3+
from pydantic import BaseModel, ConfigDict
4+
5+
from contentctl.objects.detection import Detection
6+
7+
8+
class BaseSecurityEvent(BaseModel, ABC):
9+
"""
10+
Base event class for a Splunk security event (e.g. risks and notables)
11+
"""
12+
13+
# The search name (e.g. "ESCU - Windows Modify Registry EnableLinkedConnections - Rule")
14+
search_name: str
15+
16+
# The search ID that found that generated this event
17+
orig_sid: str
18+
19+
# Allowing fields that aren't explicitly defined to be passed since some of the risk/notable
20+
# event's fields vary depending on the SPL which generated them
21+
model_config = ConfigDict(extra="allow")
22+
23+
@abstractmethod
24+
def validate_against_detection(self, detection: Detection) -> None:
25+
"""
26+
Validate this risk/notable event against the given detection
27+
"""
28+
raise NotImplementedError()

contentctl/objects/correlation_search.py

Lines changed: 161 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
format_pbar_string, # type: ignore
1919
)
2020
from contentctl.helper.utils import Utils
21+
from contentctl.objects.base_security_event import BaseSecurityEvent
2122
from contentctl.objects.base_test_result import TestResultStatus
2223
from contentctl.objects.detection import Detection
2324
from contentctl.objects.errors import (
@@ -222,6 +223,9 @@ class CorrelationSearch(BaseModel):
222223
# The list of risk events found
223224
_risk_events: list[RiskEvent] | None = PrivateAttr(default=None)
224225

226+
# The list of risk data model events found
227+
_risk_dm_events: list[BaseSecurityEvent] | None = PrivateAttr(default=None)
228+
225229
# The list of notable events found
226230
_notable_events: list[NotableEvent] | None = PrivateAttr(default=None)
227231

@@ -554,6 +558,13 @@ def get_risk_events(self, force_update: bool = False) -> list[RiskEvent]:
554558
raise
555559
events.append(event)
556560
self.logger.debug(f"Found risk event for '{self.name}': {event}")
561+
else:
562+
msg = (
563+
f"Found event for unexpected index ({result['index']}) in our query "
564+
f"results (expected {Indexes.RISK_INDEX})"
565+
)
566+
self.logger.error(msg)
567+
raise ValueError(msg)
557568
except ServerError as e:
558569
self.logger.error(f"Error returned from Splunk instance: {e}")
559570
raise e
@@ -623,6 +634,13 @@ def get_notable_events(self, force_update: bool = False) -> list[NotableEvent]:
623634
raise
624635
events.append(event)
625636
self.logger.debug(f"Found notable event for '{self.name}': {event}")
637+
else:
638+
msg = (
639+
f"Found event for unexpected index ({result['index']}) in our query "
640+
f"results (expected {Indexes.NOTABLE_INDEX})"
641+
)
642+
self.logger.error(msg)
643+
raise ValueError(msg)
626644
except ServerError as e:
627645
self.logger.error(f"Error returned from Splunk instance: {e}")
628646
raise e
@@ -637,15 +655,119 @@ def get_notable_events(self, force_update: bool = False) -> list[NotableEvent]:
637655

638656
return events
639657

658+
def risk_dm_event_exists(self) -> bool:
659+
"""Whether at least one matching risk data model event exists
660+
661+
Queries the `risk` data model and returns True if at least one matching event (could come
662+
from risk or notable index) exists for this search
663+
:return: a bool indicating whether a risk data model event for this search exists in the
664+
risk data model
665+
"""
666+
# We always force an update on the cache when checking if events exist
667+
events = self.get_risk_dm_events(force_update=True)
668+
return len(events) > 0
669+
670+
def get_risk_dm_events(self, force_update: bool = False) -> list[BaseSecurityEvent]:
671+
"""Get risk data model events from the Splunk instance
672+
673+
Queries the `risk` data model and returns any matching events (could come from risk or
674+
notable index)
675+
:param force_update: whether the cached _risk_events should be forcibly updated if already
676+
set
677+
:return: a list of risk events
678+
"""
679+
# Reset the list of risk data model events if we're forcing an update
680+
if force_update:
681+
self.logger.debug("Resetting risk data model event cache.")
682+
self._risk_dm_events = None
683+
684+
# Use the cached risk_dm_events unless we're forcing an update
685+
if self._risk_dm_events is not None:
686+
self.logger.debug(
687+
f"Using cached risk data model events ({len(self._risk_dm_events)} total)."
688+
)
689+
return self._risk_dm_events
690+
691+
# TODO (#248): Refactor risk/notable querying to pin to a single savedsearch ID
692+
# Search for all risk data model events from a single scheduled search (indicated by
693+
# orig_sid)
694+
query = (
695+
f'datamodel Risk All_Risk flat | search search_name="{self.name}" [datamodel Risk '
696+
f'All_Risk flat | search search_name="{self.name}" | tail 1 | fields orig_sid] '
697+
"| tojson"
698+
)
699+
result_iterator = self._search(query)
700+
701+
# Iterate over the events, storing them in a list and checking for any errors
702+
events: list[BaseSecurityEvent] = []
703+
risk_count = 0
704+
notable_count = 0
705+
try:
706+
for result in result_iterator:
707+
# sanity check that this result from the iterator is a risk event and not some
708+
# other metadata
709+
if result["index"] == Indexes.RISK_INDEX:
710+
try:
711+
parsed_raw = json.loads(result["_raw"])
712+
event = RiskEvent.model_validate(parsed_raw)
713+
except Exception:
714+
self.logger.error(
715+
f"Failed to parse RiskEvent from search result: {result}"
716+
)
717+
raise
718+
events.append(event)
719+
risk_count += 1
720+
self.logger.debug(
721+
f"Found risk event in risk data model for '{self.name}': {event}"
722+
)
723+
elif result["index"] == Indexes.NOTABLE_INDEX:
724+
try:
725+
parsed_raw = json.loads(result["_raw"])
726+
event = NotableEvent.model_validate(parsed_raw)
727+
except Exception:
728+
self.logger.error(
729+
f"Failed to parse NotableEvent from search result: {result}"
730+
)
731+
raise
732+
events.append(event)
733+
notable_count += 1
734+
self.logger.debug(
735+
f"Found notable event in risk data model for '{self.name}': {event}"
736+
)
737+
else:
738+
msg = (
739+
f"Found event for unexpected index ({result['index']}) in our query "
740+
f"results (expected {Indexes.NOTABLE_INDEX} or {Indexes.RISK_INDEX})"
741+
)
742+
self.logger.error(msg)
743+
raise ValueError(msg)
744+
except ServerError as e:
745+
self.logger.error(f"Error returned from Splunk instance: {e}")
746+
raise e
747+
748+
# Log if no events were found
749+
if len(events) < 1:
750+
self.logger.debug(f"No events found in risk data model for '{self.name}'")
751+
else:
752+
# Set the cache if we found events
753+
self._risk_dm_events = events
754+
self.logger.debug(
755+
f"Caching {len(self._risk_dm_events)} risk data model events."
756+
)
757+
758+
# Log counts of risk and notable events found
759+
self.logger.debug(
760+
f"Found {risk_count} risk events and {notable_count} notable events in the risk data "
761+
"model"
762+
)
763+
764+
return events
765+
640766
def validate_risk_events(self) -> None:
641767
"""Validates the existence of any expected risk events
642768
643769
First ensure the risk event exists, and if it does validate its risk message and make sure
644-
any events align with the specified risk object. Also adds the risk index to the purge list
645-
if risk events existed
646-
:param elapsed_sleep_time: an int representing the amount of time slept thus far waiting to
647-
check the risks/notables
648-
:returns: an IntegrationTestResult on failure; None on success
770+
any events align with the specified risk object.
649771
"""
650772
# Ensure the rba object is defined
651773
if self.detection.rba is None:
@@ -735,13 +857,29 @@ def validate_risk_events(self) -> None:
735857
def validate_notable_events(self) -> None:
736858
"""Validates the existence of any expected notables
737859
738-
Ensures the notable exists. Also adds the notable index to the purge list if notables
739-
existed
740-
:param elapsed_sleep_time: an int representing the amount of time slept thus far waiting to
741-
check the risks/notables
742-
:returns: an IntegrationTestResult on failure; None on success
860+
Check various fields within the notable to ensure alignment with the detection definition.
861+
Additionally, ensure that the notable does not appear in the risk data model, as this is
862+
currently undesired behavior for ESCU detections.
863+
"""
864+
if self.notable_in_risk_dm():
865+
raise ValidationFailed(
866+
"One or more notables appeared in the risk data model. This could lead to risk "
867+
"score doubling, and/or notable multiplexing, depending on the detection type "
868+
"(e.g. TTP), or the number of risk modifiers."
869+
)
870+
871+
def notable_in_risk_dm(self) -> bool:
872+
"""Check if notables are in the risk data model
873+
874+
Returns a bool indicating whether notables are in the risk data model or not.
875+
876+
:returns: a bool, True if notables are in the risk data model results; False if not
743877
"""
744-
raise NotImplementedError()
878+
if self.risk_dm_event_exists():
879+
for event in self.get_risk_dm_events():
880+
if isinstance(event, NotableEvent):
881+
return True
882+
return False
745883

746884
# NOTE: it would be more ideal to switch this to a system which gets the handle of the saved search job and polls
747885
# it for completion, but that seems more tricky
@@ -828,8 +966,8 @@ def test(
828966

829967
try:
830968
# Validate risk events
831-
self.logger.debug("Checking for matching risk events")
832969
if self.has_risk_analysis_action:
970+
self.logger.debug("Checking for matching risk events")
833971
if self.risk_event_exists():
834972
# TODO (PEX-435): should this in the retry loop? or outside it?
835973
# -> I've observed there being a missing risk event (15/16) on
@@ -846,22 +984,28 @@ def test(
846984
raise ValidationFailed(
847985
f"TEST FAILED: No matching risk event created for: {self.name}"
848986
)
987+
else:
988+
self.logger.debug(
989+
f"No risk action defined for '{self.name}'"
990+
)
849991

850992
# Validate notable events
851-
self.logger.debug("Checking for matching notable events")
852993
if self.has_notable_action:
994+
self.logger.debug("Checking for matching notable events")
853995
# NOTE: because we check this last, if both fail, the error message about notables will
854996
# always be the last to be added and thus the one surfaced to the user
855997
if self.notable_event_exists():
856998
# TODO (PEX-435): should this in the retry loop? or outside it?
857-
# TODO (PEX-434): implement deeper notable validation (the method
858-
# commented out below is unimplemented)
859-
# self.validate_notable_events(elapsed_sleep_time)
999+
self.validate_notable_events()
8601000
pass
8611001
else:
8621002
raise ValidationFailed(
8631003
f"TEST FAILED: No matching notable event created for: {self.name}"
8641004
)
1005+
else:
1006+
self.logger.debug(
1007+
f"No notable action defined for '{self.name}'"
1008+
)
8651009
except ValidationFailed as e:
8661010
self.logger.error(f"Risk/notable validation failed: {e}")
8671011
result = IntegrationTestResult(
@@ -1015,6 +1159,7 @@ def cleanup(self, delete_test_index: bool = False) -> None:
10151159
# reset caches
10161160
self._risk_events = None
10171161
self._notable_events = None
1162+
self._risk_dm_events = None
10181163

10191164
def update_pbar(self, state: str) -> str:
10201165
"""

contentctl/objects/notable_event.py

Lines changed: 6 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,12 @@
1-
from pydantic import ConfigDict, BaseModel
2-
1+
from contentctl.objects.base_security_event import BaseSecurityEvent
32
from contentctl.objects.detection import Detection
43

54

6-
# TODO (PEX-434): implement deeper notable validation
7-
class NotableEvent(BaseModel):
8-
# The search name (e.g. "ESCU - Windows Modify Registry EnableLinkedConnections - Rule")
9-
search_name: str
10-
11-
# The search ID that found that generated this risk event
12-
orig_sid: str
13-
14-
# Allowing fields that aren't explicitly defined to be passed since some of the risk event's
15-
# fields vary depending on the SPL which generated them
16-
model_config = ConfigDict(extra="allow")
5+
class NotableEvent(BaseSecurityEvent):
6+
# TODO (PEX-434): implement deeper notable validation
177

188
def validate_against_detection(self, detection: Detection) -> None:
9+
"""
10+
Validate this risk/notable event against the given detection
11+
"""
1912
raise NotImplementedError()

contentctl/objects/risk_event.py

Lines changed: 3 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,26 +1,17 @@
11
import re
22
from functools import cached_property
33

4-
from pydantic import (
5-
BaseModel,
6-
ConfigDict,
7-
Field,
8-
PrivateAttr,
9-
computed_field,
10-
field_validator,
11-
)
4+
from pydantic import Field, PrivateAttr, computed_field, field_validator
125

6+
from contentctl.objects.base_security_event import BaseSecurityEvent
137
from contentctl.objects.detection import Detection
148
from contentctl.objects.errors import ValidationFailed
159
from contentctl.objects.rba import RiskObject
1610

1711

18-
class RiskEvent(BaseModel):
12+
class RiskEvent(BaseSecurityEvent):
1913
"""Model for risk event in ES"""
2014

21-
# The search name (e.g. "ESCU - Windows Modify Registry EnableLinkedConnections - Rule")
22-
search_name: str
23-
2415
# The subject of the risk event (e.g. a username, process name, system name, account ID, etc.)
2516
# (not to be confused w/ the risk object from the detection)
2617
es_risk_object: int | str = Field(alias="risk_object")
@@ -32,9 +23,6 @@ class RiskEvent(BaseModel):
3223
# The level of risk associated w/ the risk event
3324
risk_score: int
3425

35-
# The search ID that found that generated this risk event
36-
orig_sid: str
37-
3826
# The message for the risk event
3927
risk_message: str
4028

@@ -53,10 +41,6 @@ class RiskEvent(BaseModel):
5341
# Private attribute caching the risk object this RiskEvent is mapped to
5442
_matched_risk_object: RiskObject | None = PrivateAttr(default=None)
5543

56-
# Allowing fields that aren't explicitly defined to be passed since some of the risk event's
57-
# fields vary depending on the SPL which generated them
58-
model_config = ConfigDict(extra="allow")
59-
6044
@field_validator("annotations_mitre_attack", "analyticstories", mode="before")
6145
@classmethod
6246
def _convert_str_value_to_singleton(cls, v: str | list[str]) -> list[str]:

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
[tool.poetry]
22
name = "contentctl"
33

4-
version = "5.3.2"
4+
version = "5.4.0"
55

66
description = "Splunk Content Control Tool"
77
authors = ["STRT <research@splunk.com>"]

0 commit comments

Comments
 (0)