-
-
Notifications
You must be signed in to change notification settings - Fork 4.4k
fix(aci milestone 3): updates to dual writing data conditions #83275
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from all commits
Commits
Show all changes
8 commits
Select commit
Hold shift + click to select a range
fbf1935
add detector triggers to the dual write
mifu67 73350cb
existing test update
mifu67 a53b0a3
test get_resolve_threshold
mifu67 12a469d
okay final tests
mifu67 4dd9712
select_related zoom zoom
mifu67 56d2c13
Merge branch 'master' into mifu67/aci/dual-write-detector-dcg
mifu67 6fbff49
one mypy fix
mifu67 ef7fa35
typing
mifu67 File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -94,15 +94,46 @@ def migrate_metric_action( | |
return action, data_condition_group_action | ||
|
||
|
||
def migrate_metric_data_condition( | ||
def migrate_metric_data_conditions( | ||
alert_rule_trigger: AlertRuleTrigger, | ||
) -> tuple[DataCondition, AlertRuleTriggerDataCondition] | None: | ||
) -> tuple[DataCondition, DataCondition, AlertRuleTriggerDataCondition] | None: | ||
alert_rule = alert_rule_trigger.alert_rule | ||
# create a data condition for the Detector's data condition group with the | ||
# threshold and associated priority level | ||
alert_rule_detector = AlertRuleDetector.objects.select_related( | ||
"detector__workflow_condition_group" | ||
).get(alert_rule=alert_rule) | ||
detector = alert_rule_detector.detector | ||
detector_data_condition_group = detector.workflow_condition_group | ||
if detector_data_condition_group is None: | ||
logger.error("workflow_condition_group does not exist", extra={"detector": detector}) | ||
return None | ||
|
||
threshold_type = ( | ||
Condition.GREATER | ||
if alert_rule.threshold_type == AlertRuleThresholdType.ABOVE.value | ||
else Condition.LESS | ||
) | ||
|
||
detector_trigger = DataCondition.objects.create( | ||
comparison=alert_rule_trigger.alert_threshold, | ||
condition_result=( | ||
DetectorPriorityLevel.MEDIUM | ||
if alert_rule_trigger.label == "warning" | ||
else DetectorPriorityLevel.HIGH | ||
), | ||
type=threshold_type, | ||
condition_group=detector_data_condition_group, | ||
) | ||
|
||
# create an "action filter": if the detector's status matches a certain priority level, | ||
# then the condition result is set to true | ||
data_condition_group = DataConditionGroup.objects.create( | ||
organization_id=alert_rule.organization_id | ||
) | ||
alert_rule_workflow = AlertRuleWorkflow.objects.get(alert_rule=alert_rule) | ||
alert_rule_workflow = AlertRuleWorkflow.objects.select_related("workflow").get( | ||
alert_rule=alert_rule | ||
) | ||
WorkflowDataConditionGroup.objects.create( | ||
condition_group=data_condition_group, | ||
workflow=alert_rule_workflow.workflow, | ||
|
@@ -120,13 +151,82 @@ def migrate_metric_data_condition( | |
alert_rule_trigger_data_condition = AlertRuleTriggerDataCondition.objects.create( | ||
alert_rule_trigger=alert_rule_trigger, data_condition=data_condition | ||
) | ||
return data_condition, alert_rule_trigger_data_condition | ||
return detector_trigger, data_condition, alert_rule_trigger_data_condition | ||
|
||
|
||
def migrate_resolve_threshold_data_condition(alert_rule: AlertRule) -> DataCondition: | ||
def get_resolve_threshold(detector_data_condition_group: DataConditionGroup) -> float: | ||
""" | ||
Create data conditions for rules with a resolve threshold | ||
Helper method to get the resolve threshold for a Detector if none is specified on | ||
the legacy AlertRule. | ||
""" | ||
detector_triggers = DataCondition.objects.filter(condition_group=detector_data_condition_group) | ||
if detector_triggers.count() > 1: | ||
# there is a warning threshold for this detector, so we should resolve based on it | ||
warning_data_condition = detector_triggers.filter( | ||
condition_result=DetectorPriorityLevel.MEDIUM | ||
).first() | ||
if warning_data_condition is None: | ||
logger.error( | ||
"more than one detector trigger in detector data condition group, but no warning condition exists", | ||
extra={"detector_data_condition_group": detector_triggers}, | ||
) | ||
return -1 | ||
resolve_threshold = warning_data_condition.comparison | ||
else: | ||
# critical threshold value | ||
critical_data_condition = detector_triggers.first() | ||
if critical_data_condition is None: | ||
logger.error( | ||
"no data conditions exist for detector data condition group", | ||
extra={"detector_data_condition_group": detector_triggers}, | ||
) | ||
return -1 | ||
resolve_threshold = critical_data_condition.comparison | ||
|
||
return resolve_threshold | ||
|
||
|
||
def migrate_resolve_threshold_data_conditions( | ||
alert_rule: AlertRule, | ||
) -> tuple[DataCondition, DataCondition] | None: | ||
""" | ||
Create data conditions for the old world's "resolve" threshold. If a resolve threshold | ||
has been explicitly set on the alert rule, then use this as our comparison value. Otherwise, | ||
we need to figure out what the resolve threshold is based on the trigger threshold values. | ||
""" | ||
alert_rule_detector = AlertRuleDetector.objects.select_related( | ||
"detector__workflow_condition_group" | ||
).get(alert_rule=alert_rule) | ||
detector = alert_rule_detector.detector | ||
detector_data_condition_group = detector.workflow_condition_group | ||
if detector_data_condition_group is None: | ||
logger.error("workflow_condition_group does not exist", extra={"detector": detector}) | ||
return None | ||
|
||
# XXX: we set the resolve trigger's threshold_type to whatever the opposite of the rule's threshold_type is | ||
# e.g. if the rule has a critical trigger ABOVE some number, the resolve threshold is automatically set to BELOW | ||
threshold_type = ( | ||
Condition.LESS_OR_EQUAL | ||
if alert_rule.threshold_type == AlertRuleThresholdType.ABOVE.value | ||
else Condition.GREATER_OR_EQUAL | ||
) | ||
|
||
if alert_rule.resolve_threshold is not None: | ||
resolve_threshold = alert_rule.resolve_threshold | ||
else: | ||
# figure out the resolve threshold ourselves | ||
resolve_threshold = get_resolve_threshold(detector_data_condition_group) | ||
if resolve_threshold == -1: | ||
# something went wrong | ||
return None | ||
|
||
detector_trigger = DataCondition.objects.create( | ||
comparison=resolve_threshold, | ||
condition_result=DetectorPriorityLevel.OK, | ||
type=threshold_type, | ||
condition_group=detector_data_condition_group, | ||
) | ||
Comment on lines
+223
to
+228
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 💯 |
||
|
||
data_condition_group = DataConditionGroup.objects.create( | ||
organization_id=alert_rule.organization_id | ||
) | ||
|
@@ -135,22 +235,15 @@ def migrate_resolve_threshold_data_condition(alert_rule: AlertRule) -> DataCondi | |
condition_group=data_condition_group, | ||
workflow=alert_rule_workflow.workflow, | ||
) | ||
threshold_type = ( | ||
Condition.LESS | ||
if alert_rule.threshold_type == AlertRuleThresholdType.ABOVE.value | ||
else Condition.GREATER | ||
) | ||
# XXX: we set the resolve trigger's threshold_type to whatever the opposite of the rule's threshold_type is | ||
# e.g. if the rule has a critical trigger ABOVE some number, the resolve threshold is automatically set to BELOW | ||
|
||
data_condition = DataCondition.objects.create( | ||
comparison=alert_rule.resolve_threshold, | ||
condition_result=DetectorPriorityLevel.OK, | ||
type=threshold_type, | ||
comparison=DetectorPriorityLevel.OK, | ||
condition_result=True, | ||
type=Condition.ISSUE_PRIORITY_EQUALS, | ||
condition_group=data_condition_group, | ||
) | ||
# XXX: can't make an AlertRuleTriggerDataCondition since this isn't really a trigger | ||
return data_condition | ||
return detector_trigger, data_condition | ||
|
||
|
||
def create_metric_alert_lookup_tables( | ||
|
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
🤔 it might be better for us to do the detector's data_condition migrations with the detector and the workflow's action filters in another. that's mostly just a nit to have this be a little clearer or help incase we have issues with the migration (like a workflow migrated correctly, but not a detector, we could then reuse the
migrate detector
method to just migrate the detector and all of it's associated conditions)There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think this could be difficult because of where the dual write methods are called in
logic.py
? @ceorourke you might have a better sense of whether this is possible.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah the serializer first calls
create_alert_rule
and then later callscreate_alert_rule_trigger
so we don't have any access to the trigger stuff at the rule creation step