diff --git a/metrics/alertmanager/ticdc.rules.yml b/metrics/alertmanager/ticdc.rules.yml index 5290867501d..af3cd9481f8 100644 --- a/metrics/alertmanager/ticdc.rules.yml +++ b/metrics/alertmanager/ticdc.rules.yml @@ -13,6 +13,18 @@ groups: value: '{{ $value }}' summary: cdc cluster has multiple owners + - alert: cdc_no_owner + expr: sum(rate(ticdc_owner_ownership_counter[30s])) < 0.5 + for: 10m + labels: + env: ENV_LABELS_ENV + level: warning + expr: sum(rate(ticdc_owner_ownership_counter[30s])) < 0.5 + annotations: + description: 'cluster: ENV_LABELS_ENV, instance: {{ $labels.instance }}, values: {{ $value }}' + value: '{{ $value }}' + summary: cdc cluster has no owner for more than 10 minutes + - alert: cdc_checkpoint_high_delay expr: ticdc_processor_checkpoint_ts_lag > 600 for: 1m