forked from cloudposse/terraform-datadog-platform
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* Initial list of amq interesting metrics * Adding cpu alert * Update cpu alert message * Auto Format * Initial list of amq interesting metrics * Adding cpu alert * Update cpu alert message * Best baseline monitors that apply broadly to most amq users * Best baseline monitors that apply broadly to most amq users * Fixing amq-heap-usage query to match alert threshold * Moving mq-current-connections-count to anomoly Co-authored-by: cloudpossebot <11232728+cloudpossebot@users.noreply.github.com>
- Loading branch information
1 parent
a9184c1
commit 705447c
Showing
1 changed file
with
174 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,174 @@ | ||
# The official Datadog API documentation with available query parameters & alert types: | ||
# https://docs.datadoghq.com/api/v1/monitors/#create-a-monitor | ||
|
||
amq-cpu-utilization: | ||
name: "(AMQ) CPU Utilization above 90%" | ||
type: metric alert | ||
query: | | ||
avg(last_15m):avg:aws.amazonmq.cpu_utilization{*} by {broker} > 90 | ||
message: | | ||
{{#is_warning}} | ||
({broker}) - CPU Utilization above 85% | ||
{{/is_warning}} | ||
{{#is_alert}} | ||
({broker}) - CPU Utilization above 90% | ||
{{/is_alert}} | ||
escalation_message: "" | ||
tags: [ "ManagedBy:Terraform" ] | ||
notify_no_data: false | ||
notify_audit: true | ||
require_full_window: true | ||
enable_logs_sample: false | ||
force_delete: true | ||
include_tags: true | ||
locked: false | ||
renotify_interval: 60 | ||
timeout_h: 60 | ||
evaluation_delay: 60 | ||
new_host_delay: 300 | ||
no_data_timeframe: 10 | ||
threshold_windows: { } | ||
thresholds: | ||
critical: 90 | ||
warning: 85 | ||
#unknown: | ||
#ok: | ||
#critical_recovery: | ||
#warning_recovery: | ||
|
||
amq-heap-usage: | ||
name: "(AMQ) JVM heap usage above 95%" | ||
type: metric alert | ||
query: | | ||
avg(last_15m):avg:aws.amazonmq.heap_usage{*} by {broker} > 95 | ||
message: | | ||
{{#is_warning}} | ||
({broker}) - JVM heap usage above 90% | ||
{{/is_warning}} | ||
{{#is_alert}} | ||
({broker}) - JVM heap usage above 95% | ||
{{/is_alert}} | ||
escalation_message: "" | ||
tags: [ "ManagedBy:Terraform" ] | ||
notify_no_data: false | ||
notify_audit: true | ||
require_full_window: true | ||
enable_logs_sample: false | ||
force_delete: true | ||
include_tags: true | ||
locked: false | ||
renotify_interval: 60 | ||
timeout_h: 60 | ||
evaluation_delay: 60 | ||
new_host_delay: 300 | ||
no_data_timeframe: 10 | ||
threshold_windows: { } | ||
thresholds: | ||
critical: 95 | ||
warning: 90 | ||
#unknown: | ||
#ok: | ||
#critical_recovery: | ||
#warning_recovery: | ||
|
||
amq-network-in: | ||
name: "(AMQ) Anomaly of a large variance in network-in bytes" | ||
type: metric alert | ||
query: | | ||
avg(last_4h):anomalies(avg:aws.amazonmq.network_in{*} by {broker}, 'agile', 2, direction='both', alert_window='last_15m', interval=60, count_default_zero='true', seasonality='hourly') >= 1 | ||
message: | | ||
{{#is_alert}} | ||
({broker}) - Anomaly of a large variance in network-in bytes | ||
{{/is_alert}} | ||
escalation_message: "" | ||
tags: [ "ManagedBy:Terraform" ] | ||
notify_no_data: false | ||
notify_audit: true | ||
require_full_window: true | ||
enable_logs_sample: false | ||
force_delete: true | ||
include_tags: true | ||
locked: false | ||
renotify_interval: 60 | ||
timeout_h: 60 | ||
evaluation_delay: 900 | ||
new_host_delay: 300 | ||
no_data_timeframe: 10 | ||
threshold_windows: | ||
trigger_window: "last_15m" | ||
recovery_window: "last_15m" | ||
thresholds: | ||
critical: 1 | ||
#warning: | ||
#unknown: | ||
#ok: | ||
critical_recovery: 0 | ||
#warning_recovery: | ||
|
||
amq-network-out: | ||
name: "(AMQ) Anomaly of a large variance in network-out bytes" | ||
type: metric alert | ||
query: | | ||
avg(last_4h):anomalies(avg:aws.amazonmq.network_out{*} by {broker}, 'agile', 2, direction='both', alert_window='last_15m', interval=60, count_default_zero='true', seasonality='hourly') >= 1 | ||
message: | | ||
{{#is_alert}} | ||
({broker}) - Anomaly of a large variance in network-out bytes | ||
{{/is_alert}} | ||
escalation_message: "" | ||
tags: [ "ManagedBy:Terraform" ] | ||
notify_no_data: false | ||
notify_audit: true | ||
require_full_window: true | ||
enable_logs_sample: false | ||
force_delete: true | ||
include_tags: true | ||
locked: false | ||
renotify_interval: 60 | ||
timeout_h: 60 | ||
evaluation_delay: 900 | ||
new_host_delay: 300 | ||
no_data_timeframe: 10 | ||
threshold_windows: | ||
trigger_window: "last_15m" | ||
recovery_window: "last_15m" | ||
thresholds: | ||
critical: 1 | ||
#warning: | ||
#unknown: | ||
#ok: | ||
critical_recovery: 0 | ||
#warning_recovery: | ||
|
||
amq-current-connections-count: | ||
name: "(AMQ) Anomaly of a large variance in broker connections" | ||
type: metric alert | ||
query: | | ||
avg(last_4h):anomalies(avg:aws.amazonmq.current_connections_count{*}.as_count(), 'basic', 2, direction='both', alert_window='last_15m', interval=60, count_default_zero='true') >= 1 | ||
message: | | ||
{{#is_alert}} | ||
({broker}) - Anomaly of a large variance in broker connections | ||
{{/is_alert}} | ||
escalation_message: "" | ||
tags: [ "ManagedBy:Terraform" ] | ||
notify_no_data: false | ||
notify_audit: true | ||
require_full_window: true | ||
enable_logs_sample: false | ||
force_delete: true | ||
include_tags: true | ||
locked: false | ||
renotify_interval: 60 | ||
timeout_h: 60 | ||
evaluation_delay: 900 | ||
new_host_delay: 300 | ||
no_data_timeframe: 10 | ||
threshold_windows: | ||
trigger_window: "last_15m" | ||
recovery_window: "last_15m" | ||
thresholds: | ||
critical: 1 | ||
#warning: | ||
#unknown: | ||
#ok: | ||
critical_recovery: 0 | ||
#warning_recovery: |