Skip to content

fix(aci milestone 3): updates to dual writing data conditions #83275

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Jan 13, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
127 changes: 110 additions & 17 deletions src/sentry/workflow_engine/migration_helpers/alert_rule.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,15 +94,46 @@ def migrate_metric_action(
return action, data_condition_group_action


def migrate_metric_data_condition(
def migrate_metric_data_conditions(
alert_rule_trigger: AlertRuleTrigger,
) -> tuple[DataCondition, AlertRuleTriggerDataCondition] | None:
) -> tuple[DataCondition, DataCondition, AlertRuleTriggerDataCondition] | None:
alert_rule = alert_rule_trigger.alert_rule
# create a data condition for the Detector's data condition group with the
# threshold and associated priority level
alert_rule_detector = AlertRuleDetector.objects.select_related(
"detector__workflow_condition_group"
).get(alert_rule=alert_rule)
detector = alert_rule_detector.detector
detector_data_condition_group = detector.workflow_condition_group
if detector_data_condition_group is None:
logger.error("workflow_condition_group does not exist", extra={"detector": detector})
return None

threshold_type = (
Condition.GREATER
if alert_rule.threshold_type == AlertRuleThresholdType.ABOVE.value
else Condition.LESS
)

detector_trigger = DataCondition.objects.create(
comparison=alert_rule_trigger.alert_threshold,
condition_result=(
DetectorPriorityLevel.MEDIUM
if alert_rule_trigger.label == "warning"
else DetectorPriorityLevel.HIGH
),
type=threshold_type,
condition_group=detector_data_condition_group,
)

# create an "action filter": if the detector's status matches a certain priority level,
# then the condition result is set to true
data_condition_group = DataConditionGroup.objects.create(
organization_id=alert_rule.organization_id
)
alert_rule_workflow = AlertRuleWorkflow.objects.get(alert_rule=alert_rule)
alert_rule_workflow = AlertRuleWorkflow.objects.select_related("workflow").get(
alert_rule=alert_rule
)
WorkflowDataConditionGroup.objects.create(
condition_group=data_condition_group,
workflow=alert_rule_workflow.workflow,
Expand All @@ -120,13 +151,82 @@ def migrate_metric_data_condition(
alert_rule_trigger_data_condition = AlertRuleTriggerDataCondition.objects.create(
alert_rule_trigger=alert_rule_trigger, data_condition=data_condition
)
return data_condition, alert_rule_trigger_data_condition
return detector_trigger, data_condition, alert_rule_trigger_data_condition
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🤔 it might be better for us to do the detector's data_condition migrations with the detector and the workflow's action filters in another. that's mostly just a nit to have this be a little clearer or help incase we have issues with the migration (like a workflow migrated correctly, but not a detector, we could then reuse the migrate detector method to just migrate the detector and all of it's associated conditions)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this could be difficult because of where the dual write methods are called in logic.py? @ceorourke you might have a better sense of whether this is possible.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah the serializer first calls create_alert_rule and then later calls create_alert_rule_trigger so we don't have any access to the trigger stuff at the rule creation step



def migrate_resolve_threshold_data_condition(alert_rule: AlertRule) -> DataCondition:
def get_resolve_threshold(detector_data_condition_group: DataConditionGroup) -> float:
"""
Create data conditions for rules with a resolve threshold
Helper method to get the resolve threshold for a Detector if none is specified on
the legacy AlertRule.
"""
detector_triggers = DataCondition.objects.filter(condition_group=detector_data_condition_group)
if detector_triggers.count() > 1:
# there is a warning threshold for this detector, so we should resolve based on it
warning_data_condition = detector_triggers.filter(
condition_result=DetectorPriorityLevel.MEDIUM
).first()
if warning_data_condition is None:
logger.error(
"more than one detector trigger in detector data condition group, but no warning condition exists",
extra={"detector_data_condition_group": detector_triggers},
)
return -1
resolve_threshold = warning_data_condition.comparison
else:
# critical threshold value
critical_data_condition = detector_triggers.first()
if critical_data_condition is None:
logger.error(
"no data conditions exist for detector data condition group",
extra={"detector_data_condition_group": detector_triggers},
)
return -1
resolve_threshold = critical_data_condition.comparison

return resolve_threshold


def migrate_resolve_threshold_data_conditions(
alert_rule: AlertRule,
) -> tuple[DataCondition, DataCondition] | None:
"""
Create data conditions for the old world's "resolve" threshold. If a resolve threshold
has been explicitly set on the alert rule, then use this as our comparison value. Otherwise,
we need to figure out what the resolve threshold is based on the trigger threshold values.
"""
alert_rule_detector = AlertRuleDetector.objects.select_related(
"detector__workflow_condition_group"
).get(alert_rule=alert_rule)
detector = alert_rule_detector.detector
detector_data_condition_group = detector.workflow_condition_group
if detector_data_condition_group is None:
logger.error("workflow_condition_group does not exist", extra={"detector": detector})
return None

# XXX: we set the resolve trigger's threshold_type to whatever the opposite of the rule's threshold_type is
# e.g. if the rule has a critical trigger ABOVE some number, the resolve threshold is automatically set to BELOW
threshold_type = (
Condition.LESS_OR_EQUAL
if alert_rule.threshold_type == AlertRuleThresholdType.ABOVE.value
else Condition.GREATER_OR_EQUAL
)

if alert_rule.resolve_threshold is not None:
resolve_threshold = alert_rule.resolve_threshold
else:
# figure out the resolve threshold ourselves
resolve_threshold = get_resolve_threshold(detector_data_condition_group)
if resolve_threshold == -1:
# something went wrong
return None

detector_trigger = DataCondition.objects.create(
comparison=resolve_threshold,
condition_result=DetectorPriorityLevel.OK,
type=threshold_type,
condition_group=detector_data_condition_group,
)
Comment on lines +223 to +228
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

💯


data_condition_group = DataConditionGroup.objects.create(
organization_id=alert_rule.organization_id
)
Expand All @@ -135,22 +235,15 @@ def migrate_resolve_threshold_data_condition(alert_rule: AlertRule) -> DataCondi
condition_group=data_condition_group,
workflow=alert_rule_workflow.workflow,
)
threshold_type = (
Condition.LESS
if alert_rule.threshold_type == AlertRuleThresholdType.ABOVE.value
else Condition.GREATER
)
# XXX: we set the resolve trigger's threshold_type to whatever the opposite of the rule's threshold_type is
# e.g. if the rule has a critical trigger ABOVE some number, the resolve threshold is automatically set to BELOW

data_condition = DataCondition.objects.create(
comparison=alert_rule.resolve_threshold,
condition_result=DetectorPriorityLevel.OK,
type=threshold_type,
comparison=DetectorPriorityLevel.OK,
condition_result=True,
type=Condition.ISSUE_PRIORITY_EQUALS,
condition_group=data_condition_group,
)
# XXX: can't make an AlertRuleTriggerDataCondition since this isn't really a trigger
return data_condition
return detector_trigger, data_condition


def create_metric_alert_lookup_tables(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from sentry.deletions.tasks.scheduled import run_scheduled_deletions
from sentry.incidents.grouptype import MetricAlertFire
from sentry.incidents.models.alert_rule import AlertRuleTriggerAction
from sentry.incidents.models.alert_rule import AlertRuleThresholdType, AlertRuleTriggerAction
from sentry.integrations.models.integration import Integration
from sentry.integrations.models.organization_integration import OrganizationIntegration
from sentry.snuba.models import QuerySubscription
Expand All @@ -11,10 +11,11 @@
from sentry.users.services.user.service import user_service
from sentry.workflow_engine.migration_helpers.alert_rule import (
dual_delete_migrated_alert_rule,
get_resolve_threshold,
migrate_alert_rule,
migrate_metric_action,
migrate_metric_data_condition,
migrate_resolve_threshold_data_condition,
migrate_metric_data_conditions,
migrate_resolve_threshold_data_conditions,
)
from sentry.workflow_engine.models import (
Action,
Expand Down Expand Up @@ -194,9 +195,9 @@ def test_create_metric_alert_trigger(self):
Test that when we call the helper methods we create all the ACI models correctly for alert rule triggers
"""
migrate_alert_rule(self.metric_alert, self.rpc_user)
migrate_metric_data_condition(self.alert_rule_trigger_warning)
migrate_metric_data_condition(self.alert_rule_trigger_critical)
migrate_resolve_threshold_data_condition(self.metric_alert)
migrate_metric_data_conditions(self.alert_rule_trigger_warning)
migrate_metric_data_conditions(self.alert_rule_trigger_critical)
migrate_resolve_threshold_data_conditions(self.metric_alert)

assert (
AlertRuleTriggerDataCondition.objects.filter(
Expand All @@ -207,12 +208,38 @@ def test_create_metric_alert_trigger(self):
).count()
== 2
)
detector_triggers = DataCondition.objects.filter(
comparison__in=[
self.alert_rule_trigger_warning.alert_threshold,
self.alert_rule_trigger_critical.alert_threshold,
self.metric_alert.resolve_threshold,
]
)

assert len(detector_triggers) == 3
detector = AlertRuleDetector.objects.get(alert_rule=self.metric_alert).detector

warning_detector_trigger = detector_triggers[0]
critical_detector_trigger = detector_triggers[1]
resolve_detector_trigger = detector_triggers[2]

assert warning_detector_trigger.type == Condition.GREATER
assert warning_detector_trigger.condition_result == DetectorPriorityLevel.MEDIUM
assert warning_detector_trigger.condition_group == detector.workflow_condition_group

assert critical_detector_trigger.type == Condition.GREATER
assert critical_detector_trigger.condition_result == DetectorPriorityLevel.HIGH
assert critical_detector_trigger.condition_group == detector.workflow_condition_group

assert resolve_detector_trigger.type == Condition.LESS_OR_EQUAL
assert resolve_detector_trigger.condition_result == DetectorPriorityLevel.OK
assert resolve_detector_trigger.condition_group == detector.workflow_condition_group

data_conditions = DataCondition.objects.filter(
comparison__in=[
DetectorPriorityLevel.MEDIUM,
DetectorPriorityLevel.HIGH,
self.metric_alert.resolve_threshold,
DetectorPriorityLevel.OK,
]
)
assert len(data_conditions) == 3
Expand All @@ -236,19 +263,94 @@ def test_create_metric_alert_trigger(self):
condition_group=critical_data_condition.condition_group
).exists()

assert resolve_data_condition.type == Condition.LESS
assert resolve_data_condition.comparison == self.metric_alert.resolve_threshold
assert resolve_data_condition.condition_result == DetectorPriorityLevel.OK
assert resolve_data_condition.type == Condition.ISSUE_PRIORITY_EQUALS
assert resolve_data_condition.comparison == DetectorPriorityLevel.OK
assert resolve_data_condition.condition_result is True
assert resolve_data_condition.condition_group == resolve_data_condition.condition_group
assert WorkflowDataConditionGroup.objects.filter(
condition_group=resolve_data_condition.condition_group
).exists()

def test_calculate_resolve_threshold_critical_only(self):
migrate_alert_rule(self.metric_alert, self.rpc_user)
migrate_metric_data_conditions(self.alert_rule_trigger_critical)

detector = AlertRuleDetector.objects.get(alert_rule=self.metric_alert).detector
detector_dcg = detector.workflow_condition_group
assert detector_dcg
resolve_threshold = get_resolve_threshold(detector_dcg)
assert resolve_threshold == self.alert_rule_trigger_critical.alert_threshold

def test_calculate_resolve_threshold_with_warning(self):
migrate_alert_rule(self.metric_alert, self.rpc_user)
migrate_metric_data_conditions(self.alert_rule_trigger_warning)
migrate_metric_data_conditions(self.alert_rule_trigger_critical)

detector = AlertRuleDetector.objects.get(alert_rule=self.metric_alert).detector
detector_dcg = detector.workflow_condition_group
assert detector_dcg
resolve_threshold = get_resolve_threshold(detector_dcg)
assert resolve_threshold == self.alert_rule_trigger_warning.alert_threshold

def create_metric_alert_trigger_auto_resolve(self):
"""
Test that we create the correct resolution DataConditions when an AlertRule has no explicit resolve threshold
"""
metric_alert = self.create_alert_rule()
critical_trigger = self.create_alert_rule_trigger(alert_rule=metric_alert, label="critical")

migrate_alert_rule(metric_alert, self.rpc_user)
migrate_metric_data_conditions(critical_trigger)

detector = AlertRuleDetector.objects.get(alert_rule=metric_alert).detector

resolve_detector_trigger = DataCondition.objects.get(
condition_result=DetectorPriorityLevel.OK
)

assert resolve_detector_trigger.type == Condition.LESS_OR_EQUAL
assert resolve_detector_trigger.comparison == critical_trigger.alert_threshold
assert resolve_detector_trigger.condition_result == DetectorPriorityLevel.OK
assert resolve_detector_trigger.condition_group == detector.workflow_condition_group

resolve_data_condition = DataCondition.objects.get(comparison=DetectorPriorityLevel.OK)

assert resolve_data_condition.type == Condition.ISSUE_PRIORITY_EQUALS
assert resolve_data_condition.condition_result is True
assert resolve_data_condition.condition_group == resolve_data_condition.condition_group
assert WorkflowDataConditionGroup.objects.filter(
condition_group=resolve_data_condition.condition_group
).exists()

def create_metric_alert_trigger_auto_resolve_less_than(self):
"""
Test that we assign the resolve detector trigger the correct type if the threshold type is ABOVE
"""
metric_alert = self.create_alert_rule(threshold_type=AlertRuleThresholdType.ABOVE)
critical_trigger = self.create_alert_rule_trigger(alert_rule=metric_alert, label="critical")

migrate_alert_rule(metric_alert, self.rpc_user)
migrate_metric_data_conditions(critical_trigger)

detector = AlertRuleDetector.objects.get(alert_rule=metric_alert).detector

resolve_detector_trigger = DataCondition.objects.get(
condition_result=DetectorPriorityLevel.OK
)

assert resolve_detector_trigger.type == Condition.GREATER_OR_EQUAL
assert resolve_detector_trigger.comparison == critical_trigger.alert_threshold
assert resolve_detector_trigger.condition_result == DetectorPriorityLevel.OK
assert resolve_detector_trigger.condition_group == detector.workflow_condition_group

def test_create_metric_alert_trigger_action(self):
"""
Test that when we call the helper methods we create all the ACI models correctly for alert rule trigger actions
"""
migrate_alert_rule(self.metric_alert, self.rpc_user)

migrate_metric_data_condition(self.alert_rule_trigger_warning)
migrate_metric_data_condition(self.alert_rule_trigger_critical)
migrate_metric_data_conditions(self.alert_rule_trigger_warning)
migrate_metric_data_conditions(self.alert_rule_trigger_critical)

migrate_metric_action(self.alert_rule_trigger_action_email)
migrate_metric_action(self.alert_rule_trigger_action_integration)
Expand Down Expand Up @@ -286,7 +388,7 @@ def test_create_metric_alert_trigger_action_no_alert_rule_trigger_data_condition
other_alert_rule_trigger = self.create_alert_rule_trigger(alert_rule=other_metric_alert)

migrate_alert_rule(other_metric_alert, self.rpc_user)
migrate_metric_data_condition(other_alert_rule_trigger)
migrate_metric_data_conditions(other_alert_rule_trigger)
migrated_action = migrate_metric_action(self.alert_rule_trigger_action_email)
assert migrated_action is None
mock_logger.exception.assert_called_with(
Expand All @@ -306,7 +408,7 @@ def test_create_metric_alert_trigger_action_no_type(self, mock_logger):
self.integration.save()

migrate_alert_rule(self.metric_alert, self.rpc_user)
migrate_metric_data_condition(self.alert_rule_trigger_critical)
migrate_metric_data_conditions(self.alert_rule_trigger_critical)
migrated = migrate_metric_action(self.alert_rule_trigger_action_integration)
assert migrated is None
mock_logger.warning.assert_called_with(
Expand Down
Loading