diff --git a/redis_cloud/CHANGELOG.md b/redis_cloud/CHANGELOG.md index 7b047a077..d7d8c6664 100644 --- a/redis_cloud/CHANGELOG.md +++ b/redis_cloud/CHANGELOG.md @@ -1,8 +1,20 @@ # CHANGELOG - Redis Cloud +## 1.1.0 / 2024-11-04 + +***Changed***: + +* Removed redis_cloud.yaml file +* Replaced file descriptor panel with buffer memory panel in Redis Cloud Networking dashboard + +***Added***: + +* Added Proxy Dashboard +* Added Proxy-Threads Dashboard +* Added Active-Active Dashboard + ## 1.0.0 / 2024-09-26 ***Added***: * Initial Release - diff --git a/redis_cloud/README.md b/redis_cloud/README.md index 42f8bca8c..fc7347494 100644 --- a/redis_cloud/README.md +++ b/redis_cloud/README.md @@ -17,11 +17,11 @@ For a full list of supported metrics, see the **Metrics** section below. 1. Run the following command to install the Agent integration: - For the Datadog Agent v6: ```shell - datadog-agent integration install -t datadog-redis_cloud==1.0.0 + datadog-agent integration install -t datadog-redis_cloud==1.1.0 ``` - For the Datadog Agent v7: ```shell - agent integration install -t datadog-redis_cloud==1.0.0 + agent integration install -t datadog-redis_cloud==1.1.0 ``` 2. Configure the integration by setting `openmetrics_endpoint` to your cluster's master node. See [Getting Started with Integrations][4] for more information. diff --git a/redis_cloud/assets/dashboards/redis_cloud_active-active.json b/redis_cloud/assets/dashboards/redis_cloud_active-active.json new file mode 100644 index 000000000..f6b5ebc92 --- /dev/null +++ b/redis_cloud/assets/dashboards/redis_cloud_active-active.json @@ -0,0 +1,633 @@ +{ + "title": "Redis Cloud - Active-Active", + "description": "Dashboards specific to the replication of Conflict-free replicated data types", + "widgets": [ + { + "id": 1612047639653128, + "definition": { + "title": "Summary", + "background_color": "blue", + "show_title": true, + "type": "group", + "layout_type": "ordered", + "widgets": [ + { + "id": 2915118368107020, + "definition": { + "type": "note", + "content": "In Redis Cloud, Active-Active geo-distribution is based on CRDT technology. The Redis Cloud implementation of CRDT is called an Active-Active database (formerly known as CRDB). With Active-Active databases, applications can read and write to the same data set from different geographical locations seamlessly and with latency less than one millisecond (ms), without changing the way the application connects to the database.\n\nActive-Active databases also provide disaster recovery and accelerated data read-access for geographically distributed users.", + "background_color": "vivid_blue", + "font_size": "14", + "text_align": "left", + "vertical_align": "top", + "show_tick": false, + "tick_pos": "50%", + "tick_edge": "left", + "has_padding": true + }, + "layout": { + "x": 0, + "y": 0, + "width": 6, + "height": 4 + } + }, + { + "id": 2930533713442618, + "definition": { + "title": "Replication Status", + "title_size": "16", + "title_align": "left", + "type": "query_value", + "requests": [ + { + "response_format": "scalar", + "queries": [ + { + "data_source": "metrics", + "name": "query1", + "query": "avg:rdse.bdb_crdt_syncer_status{$cluster}", + "aggregator": "avg" + } + ], + "formulas": [ + { + "formula": "query1" + } + ] + } + ], + "autoscale": true, + "precision": 2, + "timeseries_background": { + "type": "area" + } + }, + "layout": { + "x": 6, + "y": 0, + "width": 2, + "height": 2 + } + }, + { + "id": 2755812436657366, + "definition": { + "title": "Replication Lag", + "title_size": "16", + "title_align": "left", + "type": "query_value", + "requests": [ + { + "response_format": "scalar", + "queries": [ + { + "data_source": "metrics", + "name": "query1", + "query": "avg:rdse.redis_crdt_peer_lag{$cluster}", + "aggregator": "avg" + } + ], + "formulas": [ + { + "formula": "query1" + } + ] + } + ], + "autoscale": true, + "precision": 2, + "timeseries_background": { + "type": "area" + } + }, + "layout": { + "x": 8, + "y": 0, + "width": 2, + "height": 2 + } + }, + { + "id": 8236114950898096, + "definition": { + "title": "Pending Garbage Collection", + "title_size": "16", + "title_align": "left", + "type": "query_value", + "requests": [ + { + "response_format": "scalar", + "queries": [ + { + "data_source": "metrics", + "name": "query1", + "query": "avg:rdse.redis_crdt_gc_collected{$cluster}", + "aggregator": "avg" + } + ], + "formulas": [ + { + "formula": "query1" + } + ] + } + ], + "autoscale": true, + "precision": 2, + "timeseries_background": { + "type": "area" + } + }, + "layout": { + "x": 10, + "y": 0, + "width": 2, + "height": 2 + } + }, + { + "id": 6968331984484540, + "definition": { + "title": "Raw DB Size", + "title_size": "16", + "title_align": "left", + "type": "query_value", + "requests": [ + { + "response_format": "scalar", + "queries": [ + { + "data_source": "metrics", + "name": "query1", + "query": "avg:rdse.redis_crdt_raw_dbsize{role:master , $cluster}", + "aggregator": "avg" + } + ], + "formulas": [ + { + "formula": "query1" + } + ] + } + ], + "autoscale": true, + "precision": 2, + "timeseries_background": { + "type": "area" + } + }, + "layout": { + "x": 6, + "y": 2, + "width": 2, + "height": 2 + } + }, + { + "id": 6989215750840428, + "definition": { + "title": "Replication Backlog", + "title_size": "16", + "title_align": "left", + "type": "query_value", + "requests": [ + { + "response_format": "scalar", + "queries": [ + { + "data_source": "metrics", + "name": "query1", + "query": "avg:rdse.redis_crdt_backlog_histlen{role:master , $cluster}", + "aggregator": "avg" + } + ], + "formulas": [ + { + "formula": "query1" + } + ] + } + ], + "autoscale": true, + "precision": 2, + "timeseries_background": { + "type": "area" + } + }, + "layout": { + "x": 8, + "y": 2, + "width": 2, + "height": 2 + } + }, + { + "id": 3111427893009204, + "definition": { + "title": "Pending Max Writes", + "title_size": "16", + "title_align": "left", + "type": "query_value", + "requests": [ + { + "response_format": "scalar", + "queries": [ + { + "data_source": "metrics", + "name": "query1", + "query": "avg:rdse.bdb_crdt_syncer_egress_bytes_max{$cluster}", + "aggregator": "avg" + } + ], + "formulas": [ + { + "formula": "query1" + } + ] + } + ], + "autoscale": true, + "precision": 2, + "timeseries_background": { + "type": "area" + } + }, + "layout": { + "x": 10, + "y": 2, + "width": 2, + "height": 2 + } + } + ] + }, + "layout": { + "x": 0, + "y": 0, + "width": 12, + "height": 5 + } + }, + { + "id": 2721361520379598, + "definition": { + "title": "Lag", + "background_color": "vivid_purple", + "show_title": true, + "type": "group", + "layout_type": "ordered", + "widgets": [ + { + "id": 300596519375860, + "definition": { + "title": "Synchronization Lag", + "title_size": "16", + "title_align": "left", + "show_legend": true, + "legend_layout": "auto", + "legend_columns": [ + "avg", + "min", + "max", + "value", + "sum" + ], + "type": "timeseries", + "requests": [ + { + "formulas": [ + { + "formula": "query1" + } + ], + "queries": [ + { + "data_source": "metrics", + "name": "query1", + "query": "avg:rdse.bdb_crdt_syncer_local_ingress_lag_time{$cluster}" + } + ], + "response_format": "timeseries", + "style": { + "palette": "dog_classic", + "order_by": "values", + "color_order": "shuffled", + "line_type": "solid", + "line_width": "normal" + }, + "display_type": "line" + } + ] + }, + "layout": { + "x": 0, + "y": 0, + "width": 6, + "height": 4 + } + }, + { + "id": 7680670658379236, + "definition": { + "title": "Merge Requests", + "title_size": "16", + "title_align": "left", + "show_legend": true, + "legend_layout": "auto", + "legend_columns": [ + "avg", + "min", + "max", + "value", + "sum" + ], + "type": "timeseries", + "requests": [ + { + "formulas": [ + { + "formula": "query1" + } + ], + "queries": [ + { + "data_source": "metrics", + "name": "query1", + "query": "avg:rdse.redis_crdt_merge_reqs{$cluster}" + } + ], + "response_format": "timeseries", + "style": { + "palette": "dog_classic", + "order_by": "values", + "color_order": "shuffled", + "line_type": "solid", + "line_width": "normal" + }, + "display_type": "line" + } + ] + }, + "layout": { + "x": 6, + "y": 0, + "width": 6, + "height": 4 + } + } + ] + }, + "layout": { + "x": 0, + "y": 5, + "width": 12, + "height": 5 + } + }, + { + "id": 3129203195092272, + "definition": { + "title": "Ingress", + "background_color": "green", + "show_title": true, + "type": "group", + "layout_type": "ordered", + "widgets": [ + { + "id": 7889024152378106, + "definition": { + "title": "Ingress", + "title_size": "16", + "title_align": "left", + "show_legend": true, + "legend_layout": "auto", + "legend_columns": [ + "avg", + "min", + "max", + "value", + "sum" + ], + "type": "timeseries", + "requests": [ + { + "formulas": [ + { + "formula": "query1" + } + ], + "queries": [ + { + "data_source": "metrics", + "name": "query1", + "query": "avg:rdse.bdb_crdt_syncer_ingress_bytes{$cluster}" + } + ], + "response_format": "timeseries", + "style": { + "palette": "dog_classic", + "order_by": "values", + "color_order": "shuffled", + "line_type": "solid", + "line_width": "normal" + }, + "display_type": "line" + } + ] + }, + "layout": { + "x": 0, + "y": 0, + "width": 6, + "height": 4 + } + }, + { + "id": 445842004133046, + "definition": { + "title": "Ingress (de-compressed)", + "title_size": "16", + "title_align": "left", + "show_legend": true, + "legend_layout": "auto", + "legend_columns": [ + "avg", + "min", + "max", + "value", + "sum" + ], + "type": "timeseries", + "requests": [ + { + "formulas": [ + { + "formula": "query1" + } + ], + "queries": [ + { + "data_source": "metrics", + "name": "query1", + "query": "avg:rdse.bdb_crdt_syncer_ingress_bytes_decompressed{crdt_replica_id:2, $cluster}" + } + ], + "response_format": "timeseries", + "style": { + "palette": "dog_classic", + "order_by": "values", + "color_order": "shuffled", + "line_type": "solid", + "line_width": "normal" + }, + "display_type": "line" + } + ] + }, + "layout": { + "x": 6, + "y": 0, + "width": 6, + "height": 4 + } + } + ] + }, + "layout": { + "x": 0, + "y": 0, + "width": 12, + "height": 5, + "is_column_break": true + } + }, + { + "id": 8072238870944916, + "definition": { + "title": "Egress", + "background_color": "vivid_pink", + "show_title": true, + "type": "group", + "layout_type": "ordered", + "widgets": [ + { + "id": 4267779925837744, + "definition": { + "title": "Egress", + "title_size": "16", + "title_align": "left", + "show_legend": true, + "legend_layout": "auto", + "legend_columns": [ + "avg", + "min", + "max", + "value", + "sum" + ], + "type": "timeseries", + "requests": [ + { + "formulas": [ + { + "formula": "query1" + } + ], + "queries": [ + { + "data_source": "metrics", + "name": "query1", + "query": "avg:rdse.bdb_crdt_syncer_egress_bytes{$cluster}" + } + ], + "response_format": "timeseries", + "style": { + "palette": "dog_classic", + "order_by": "values", + "color_order": "shuffled", + "line_type": "solid", + "line_width": "normal" + }, + "display_type": "line" + } + ] + }, + "layout": { + "x": 0, + "y": 0, + "width": 6, + "height": 4 + } + }, + { + "id": 108369510108736, + "definition": { + "title": "Egress (de-compressed)", + "title_size": "16", + "title_align": "left", + "show_legend": true, + "legend_layout": "auto", + "legend_columns": [ + "avg", + "min", + "max", + "value", + "sum" + ], + "type": "timeseries", + "requests": [ + { + "formulas": [ + { + "formula": "query1" + } + ], + "queries": [ + { + "data_source": "metrics", + "name": "query1", + "query": "avg:rdse.bdb_crdt_syncer_egress_bytes_decompressed{crdt_replica_id:2 , $cluster}" + } + ], + "response_format": "timeseries", + "style": { + "palette": "dog_classic", + "order_by": "values", + "color_order": "shuffled", + "line_type": "solid", + "line_width": "normal" + }, + "display_type": "line" + } + ] + }, + "layout": { + "x": 6, + "y": 0, + "width": 6, + "height": 4 + } + } + ] + }, + "layout": { + "x": 0, + "y": 5, + "width": 12, + "height": 5 + } + } + ], + "template_variables": [ + { + "name": "cluster", + "prefix": "cluster", + "available_values": [], + "default": "*" + } + ], + "layout_type": "ordered", + "notify_list": [], + "reflow_type": "fixed" +} \ No newline at end of file diff --git a/redis_cloud/assets/dashboards/redis_cloud_networking.json b/redis_cloud/assets/dashboards/redis_cloud_networking.json index 794f1e815..cc9a10ad0 100644 --- a/redis_cloud/assets/dashboards/redis_cloud_networking.json +++ b/redis_cloud/assets/dashboards/redis_cloud_networking.json @@ -1,6 +1,6 @@ { "title": "Redis Cloud - Networking", - "description": "Redis Cloud Nodes are the physical hardware that hosts database partitions", + "description": "Metrics specific to network performance in Redis clusters", "widgets": [ { "id": 8148371123349408, @@ -141,9 +141,10 @@ { "id": 2062451612637888, "definition": { - "title": "File Descriptors", + "title": "Buffer Memory", "title_size": "16", "title_align": "left", + "time": {}, "type": "query_value", "requests": [ { @@ -152,7 +153,7 @@ { "data_source": "metrics", "name": "query1", - "query": "avg:rdsc.dmcproxy_process_open_fds{$cluster}", + "query": "avg:rdsc.redis_mem_clients_normal{$cluster}", "aggregator": "sum" } ], diff --git a/redis_cloud/assets/dashboards/redis_cloud_proxy-threads.json b/redis_cloud/assets/dashboards/redis_cloud_proxy-threads.json new file mode 100644 index 000000000..50eccce53 --- /dev/null +++ b/redis_cloud/assets/dashboards/redis_cloud_proxy-threads.json @@ -0,0 +1,528 @@ +{ + "title": "Redis Cloud - Proxy Threads", + "description": "Redis Cluster Proxy CPU details", + "widgets": [ + { + "id": 4181575023845814, + "definition": { + "title": "Proxy Threads Overview", + "background_color": "vivid_blue", + "show_title": true, + "type": "group", + "layout_type": "ordered", + "widgets": [ + { + "id": 7104524980730882, + "definition": { + "type": "note", + "content": "Redis Cloud allows multiple databases to be created. Each database gets an endpoint (a unique URL and port on the FQDN). This endpoint receives all the traffic for all operations for that database. By default, Redis Software binds this database endpoint to one of the proxies on a single node in the cluster. This proxy becomes an active proxy and receives all the operations for the given database. (Note that if the node with the active proxy fails, a new proxy on another node takes over as part of the failover process automatically.)", + "background_color": "white", + "font_size": "14", + "text_align": "left", + "vertical_align": "top", + "show_tick": false, + "tick_pos": "50%", + "tick_edge": "left", + "has_padding": true + }, + "layout": { + "x": 0, + "y": 0, + "width": 12, + "height": 2 + } + } + ] + }, + "layout": { + "x": 0, + "y": 0, + "width": 12, + "height": 3 + } + }, + { + "id": 2976279033317650, + "definition": { + "title": "Proxy Threads - Workers", + "background_color": "vivid_green", + "show_title": true, + "type": "group", + "layout_type": "ordered", + "widgets": [ + { + "id": 2418700679517252, + "definition": { + "title": "Workers", + "title_size": "16", + "title_align": "left", + "show_legend": true, + "legend_layout": "auto", + "legend_columns": [ + "avg", + "min", + "max", + "value", + "sum" + ], + "type": "timeseries", + "requests": [ + { + "formulas": [ + { + "alias": "syystem", + "formula": "query1" + }, + { + "alias": "user", + "formula": "query2" + } + ], + "queries": [ + { + "data_source": "metrics", + "name": "query1", + "query": "avg:rdse.redis_process_main_thread_cpu_system_seconds_total{$cluster,threadname:worker.*} by {node}" + }, + { + "data_source": "metrics", + "name": "query2", + "query": "avg:rdse.redis_process_main_thread_cpu_system_seconds_total{$cluster,threadname:worker.*} by {node}" + } + ], + "response_format": "timeseries", + "style": { + "palette": "dog_classic", + "order_by": "values", + "color_order": "shuffled", + "line_type": "solid", + "line_width": "normal" + }, + "display_type": "line" + } + ] + }, + "layout": { + "x": 0, + "y": 0, + "width": 12, + "height": 3 + } + } + ] + }, + "layout": { + "x": 0, + "y": 3, + "width": 12, + "height": 4 + } + }, + { + "id": 7717974699945770, + "definition": { + "title": "Proxy Threads - Listeners", + "background_color": "vivid_yellow", + "show_title": true, + "type": "group", + "layout_type": "ordered", + "widgets": [ + { + "id": 1205350136707792, + "definition": { + "title": "Listeners", + "title_size": "16", + "title_align": "left", + "show_legend": true, + "legend_layout": "auto", + "legend_columns": [ + "avg", + "min", + "max", + "value", + "sum" + ], + "type": "timeseries", + "requests": [ + { + "formulas": [ + { + "alias": "system", + "formula": "query1" + }, + { + "alias": "user", + "formula": "query2" + } + ], + "queries": [ + { + "data_source": "metrics", + "name": "query1", + "query": "avg:rdse.redis_process_main_thread_cpu_system_seconds_total{$cluster,threadname:listener.*,mode:system} by {node}" + }, + { + "data_source": "metrics", + "name": "query2", + "query": "avg:rdse.redis_process_main_thread_cpu_system_seconds_total{$cluster,threadname:listener.*,mode:user} by {node}" + } + ], + "response_format": "timeseries", + "style": { + "palette": "dog_classic", + "order_by": "values", + "color_order": "shuffled", + "line_type": "solid", + "line_width": "normal" + }, + "display_type": "line" + } + ] + }, + "layout": { + "x": 0, + "y": 0, + "width": 12, + "height": 3 + } + } + ] + }, + "layout": { + "x": 0, + "y": 7, + "width": 12, + "height": 4 + } + }, + { + "id": 5330583329708778, + "definition": { + "title": "Proxy File Descriptors", + "background_color": "vivid_purple", + "show_title": true, + "type": "group", + "layout_type": "ordered", + "widgets": [ + { + "id": 5368842331723112, + "definition": { + "title": "Open FDs", + "title_size": "16", + "title_align": "left", + "type": "query_value", + "requests": [ + { + "response_format": "scalar", + "queries": [ + { + "data_source": "metrics", + "name": "query1", + "query": "avg:rdse.redis_process_open_fds{$cluster}", + "aggregator": "avg" + } + ], + "formulas": [ + { + "formula": "query1" + } + ] + } + ], + "autoscale": true, + "precision": 2, + "timeseries_background": { + "type": "area" + } + }, + "layout": { + "x": 0, + "y": 0, + "width": 4, + "height": 2 + } + }, + { + "id": 2048427695631512, + "definition": { + "title": "Max FDs", + "title_size": "16", + "title_align": "left", + "type": "query_value", + "requests": [ + { + "response_format": "scalar", + "queries": [ + { + "data_source": "metrics", + "name": "query1", + "query": "avg:rdse.redis_process_max_fds{$cluster}", + "aggregator": "avg" + } + ], + "formulas": [ + { + "formula": "query1" + } + ] + } + ], + "autoscale": true, + "precision": 2, + "timeseries_background": { + "type": "area" + } + }, + "layout": { + "x": 4, + "y": 0, + "width": 4, + "height": 2 + } + }, + { + "id": 5133761891470680, + "definition": { + "title": "Resident Memory", + "title_size": "16", + "title_align": "left", + "type": "query_value", + "requests": [ + { + "response_format": "scalar", + "queries": [ + { + "data_source": "metrics", + "name": "query1", + "query": "avg:rdse.redis_process_resident_memory_bytes{$cluster}", + "aggregator": "avg" + } + ], + "formulas": [ + { + "formula": "query1" + } + ] + } + ], + "autoscale": true, + "precision": 2, + "timeseries_background": { + "type": "area" + } + }, + "layout": { + "x": 8, + "y": 0, + "width": 4, + "height": 2 + } + } + ] + }, + "layout": { + "x": 0, + "y": 0, + "width": 12, + "height": 3, + "is_column_break": true + } + }, + { + "id": 1107644513966124, + "definition": { + "title": "Proxy Threads - Core", + "background_color": "vivid_orange", + "show_title": true, + "type": "group", + "layout_type": "ordered", + "widgets": [ + { + "id": 4493462146648202, + "definition": { + "title": "Core", + "title_size": "16", + "title_align": "left", + "show_legend": true, + "legend_layout": "horizontal", + "legend_columns": [ + "avg", + "min", + "max", + "value", + "sum" + ], + "type": "timeseries", + "requests": [ + { + "formulas": [ + { + "alias": "{{threadname}}", + "formula": "query1" + }, + { + "formula": "query2" + }, + { + "formula": "query3" + }, + { + "formula": "query4" + }, + { + "formula": "query5" + }, + { + "formula": "query6" + } + ], + "queries": [ + { + "data_source": "metrics", + "name": "query1", + "query": "avg:rdse.redis_process_main_thread_cpu_system_seconds_total{$cluster,threadname:anonymous*} by {node}" + }, + { + "data_source": "metrics", + "name": "query2", + "query": "avg:rdse.redis_process_main_thread_cpu_system_seconds_total{$cluster,threadname:mgmt*} by {node}" + }, + { + "data_source": "metrics", + "name": "query3", + "query": "avg:rdse.redis_process_main_thread_cpu_system_seconds_total{$cluster,threadname:directory*} by {node}" + }, + { + "data_source": "metrics", + "name": "query4", + "query": "avg:rdse.redis_process_main_thread_cpu_system_seconds_total{$cluster,threadname:offloader*} by {node}" + }, + { + "data_source": "metrics", + "name": "query5", + "query": "avg:rdse.redis_process_main_thread_cpu_system_seconds_total{$cluster,threadname:dmcproxy*} by {node}" + }, + { + "data_source": "metrics", + "name": "query6", + "query": "avg:rdse.redis_process_main_thread_cpu_system_seconds_total{$cluster,threadname:logger*} by {node}" + } + ], + "response_format": "timeseries", + "style": { + "palette": "dog_classic", + "order_by": "values", + "color_order": "shuffled", + "line_type": "solid", + "line_width": "normal" + }, + "display_type": "line" + } + ] + }, + "layout": { + "x": 0, + "y": 0, + "width": 12, + "height": 3 + } + } + ] + }, + "layout": { + "x": 0, + "y": 3, + "width": 12, + "height": 4 + } + }, + { + "id": 4319695888231230, + "definition": { + "title": "Proxy Threads - CLI Session", + "background_color": "vivid_pink", + "show_title": true, + "type": "group", + "layout_type": "ordered", + "widgets": [ + { + "id": 4608051052982734, + "definition": { + "title": "CLI Session", + "title_size": "16", + "title_align": "left", + "show_legend": true, + "legend_layout": "auto", + "legend_columns": [ + "avg", + "min", + "max", + "value", + "sum" + ], + "type": "timeseries", + "requests": [ + { + "formulas": [ + { + "alias": "system", + "formula": "query1" + }, + { + "alias": "user", + "formula": "query2" + } + ], + "queries": [ + { + "data_source": "metrics", + "name": "query1", + "query": "avg:rdse.redis_process_main_thread_cpu_system_seconds_total{$cluster,threadname:cli_session.*,mode:system} by {node}" + }, + { + "data_source": "metrics", + "name": "query2", + "query": "avg:rdse.redis_process_main_thread_cpu_system_seconds_total{$cluster,threadname:cli_session.*,mode:user} by {node}" + } + ], + "response_format": "timeseries", + "style": { + "palette": "dog_classic", + "order_by": "values", + "color_order": "shuffled", + "line_type": "solid", + "line_width": "normal" + }, + "display_type": "line" + } + ] + }, + "layout": { + "x": 0, + "y": 0, + "width": 12, + "height": 3 + } + } + ] + }, + "layout": { + "x": 0, + "y": 7, + "width": 12, + "height": 4 + } + } + ], + "template_variables": [ + { + "name": "cluster", + "prefix": "cluster", + "available_values": [], + "default": "*" + } + ], + "layout_type": "ordered", + "notify_list": [], + "reflow_type": "fixed" +} \ No newline at end of file diff --git a/redis_cloud/assets/dashboards/redis_cloud_proxy.json b/redis_cloud/assets/dashboards/redis_cloud_proxy.json new file mode 100644 index 000000000..60c57043b --- /dev/null +++ b/redis_cloud/assets/dashboards/redis_cloud_proxy.json @@ -0,0 +1,889 @@ +{ + "title": "Redis Cloud - Proxy", + "description": "Redis Cluster Proxy operational details", + "widgets": [ + { + "id": 4181575023845814, + "definition": { + "title": "Proxy Overview", + "background_color": "vivid_blue", + "show_title": true, + "type": "group", + "layout_type": "ordered", + "widgets": [ + { + "id": 7104524980730882, + "definition": { + "type": "note", + "content": "Redis Cloud Proxy is an entity with negligible latency that mediates between applications and the database. It exposes the database endpoint to database clients while masking behind-the-scenes activities that the Redis Cloud cluster performs. This allows developers to focus on how an application is using the data, instead of worrying about frequent changes in database topology.\n\nThe proxy employs a multi-threaded architecture. It can easily scale up by using more available cores. It is designed to cope with high traffic by using multiplexing and pipelining. When thousands of clients are connected to Redis Cloud simultaneously, the proxy consolidates all of the incoming requests into a set of inner pipelines and distributes them to the relevant database shard. The net result is that requests are processed much faster, allowing high throughput with low latency.", + "background_color": "white", + "font_size": "14", + "text_align": "left", + "vertical_align": "top", + "show_tick": false, + "tick_pos": "50%", + "tick_edge": "left", + "has_padding": true + }, + "layout": { + "x": 0, + "y": 0, + "width": 4, + "height": 3 + } + }, + { + "id": 4369424159881430, + "definition": { + "title": "Proxy Memory Usage", + "title_size": "16", + "title_align": "left", + "show_legend": true, + "legend_layout": "auto", + "legend_columns": [ + "avg", + "min", + "max", + "value", + "sum" + ], + "type": "timeseries", + "requests": [ + { + "formulas": [ + { + "formula": "query1" + } + ], + "queries": [ + { + "data_source": "metrics", + "name": "query1", + "query": "avg:rdse.dmcproxy_process_resident_memory_bytes{$cluster} by {node}" + } + ], + "response_format": "timeseries", + "style": { + "palette": "dog_classic", + "order_by": "values", + "color_order": "shuffled", + "line_type": "solid", + "line_width": "normal" + }, + "display_type": "line" + } + ] + }, + "layout": { + "x": 4, + "y": 0, + "width": 4, + "height": 3 + } + }, + { + "id": 3018908171808098, + "definition": { + "title": "Proxy Client Connections", + "title_size": "16", + "title_align": "left", + "show_legend": true, + "legend_layout": "auto", + "legend_columns": [ + "avg", + "min", + "max", + "value", + "sum" + ], + "type": "timeseries", + "requests": [ + { + "formulas": [ + { + "formula": "query1" + } + ], + "queries": [ + { + "data_source": "metrics", + "name": "query1", + "query": "avg:rdse.listener_conns{$cluster} by {node}" + } + ], + "response_format": "timeseries", + "style": { + "palette": "dog_classic", + "order_by": "values", + "color_order": "shuffled", + "line_type": "solid", + "line_width": "normal" + }, + "display_type": "line" + } + ] + }, + "layout": { + "x": 8, + "y": 0, + "width": 4, + "height": 3 + } + } + ] + }, + "layout": { + "x": 0, + "y": 0, + "width": 12, + "height": 4 + } + }, + { + "id": 2976279033317650, + "definition": { + "title": "Network", + "background_color": "vivid_green", + "show_title": true, + "type": "group", + "layout_type": "ordered", + "widgets": [ + { + "id": 5129890666020128, + "definition": { + "title": "Endpoints", + "title_size": "16", + "title_align": "left", + "type": "query_value", + "requests": [ + { + "formulas": [ + { + "formula": "query1" + } + ], + "queries": [ + { + "data_source": "metrics", + "name": "query1", + "query": "avg:rdse.listener_conns{$cluster}", + "aggregator": "last" + } + ], + "response_format": "scalar" + } + ], + "autoscale": true, + "precision": 2, + "timeseries_background": { + "type": "area" + } + }, + "layout": { + "x": 0, + "y": 0, + "width": 2, + "height": 3 + } + }, + { + "id": 2418700679517252, + "definition": { + "title": "Endpoints Total Ingress", + "title_size": "16", + "title_align": "left", + "show_legend": true, + "legend_layout": "auto", + "legend_columns": [ + "avg", + "min", + "max", + "value", + "sum" + ], + "type": "timeseries", + "requests": [ + { + "formulas": [ + { + "alias": "listener ingress", + "formula": "query1" + } + ], + "queries": [ + { + "data_source": "metrics", + "name": "query1", + "query": "avg:rdse.listener_ingress_bytes{$cluster} by {node}" + } + ], + "response_format": "timeseries", + "style": { + "palette": "dog_classic", + "order_by": "values", + "color_order": "shuffled", + "line_type": "solid", + "line_width": "normal" + }, + "display_type": "line" + } + ] + }, + "layout": { + "x": 2, + "y": 0, + "width": 5, + "height": 3 + } + }, + { + "id": 1368636092995330, + "definition": { + "title": "Ingress/Egress by Node", + "title_size": "16", + "title_align": "left", + "show_legend": true, + "legend_layout": "auto", + "legend_columns": [ + "avg", + "min", + "max", + "value", + "sum" + ], + "type": "timeseries", + "requests": [ + { + "formulas": [ + { + "alias": "ingress", + "formula": "query1" + }, + { + "alias": "egress", + "formula": "query2 * -1" + } + ], + "queries": [ + { + "data_source": "metrics", + "name": "query1", + "query": "avg:rdse.node_ingress_bytes{$cluster} by {node}" + }, + { + "data_source": "metrics", + "name": "query2", + "query": "avg:rdse.node_egress_bytes{$cluster} by {node}" + } + ], + "response_format": "timeseries", + "style": { + "palette": "dog_classic", + "order_by": "values", + "color_order": "shuffled", + "line_type": "solid", + "line_width": "normal" + }, + "display_type": "line" + } + ] + }, + "layout": { + "x": 7, + "y": 0, + "width": 5, + "height": 3 + } + } + ] + }, + "layout": { + "x": 0, + "y": 4, + "width": 12, + "height": 4 + } + }, + { + "id": 7717974699945770, + "definition": { + "title": "CPU/Threads", + "background_color": "vivid_yellow", + "show_title": true, + "type": "group", + "layout_type": "ordered", + "widgets": [ + { + "id": 1205350136707792, + "definition": { + "title": "Thread Utilization", + "title_size": "16", + "title_align": "left", + "show_legend": true, + "legend_layout": "auto", + "legend_columns": [ + "avg", + "min", + "max", + "value", + "sum" + ], + "type": "timeseries", + "requests": [ + { + "formulas": [ + { + "formula": "query1" + } + ], + "queries": [ + { + "data_source": "metrics", + "name": "query1", + "query": "avg:rdse.namedprocess_namegroup_thread_cpu_seconds_total{$cluster} by {threadname}" + } + ], + "response_format": "timeseries", + "style": { + "palette": "dog_classic", + "order_by": "values", + "color_order": "shuffled", + "line_type": "solid", + "line_width": "normal" + }, + "display_type": "line" + } + ] + }, + "layout": { + "x": 0, + "y": 0, + "width": 6, + "height": 3 + } + }, + { + "id": 1641628053179844, + "definition": { + "title": "CPU Utilization", + "title_size": "16", + "title_align": "left", + "show_legend": true, + "legend_layout": "auto", + "legend_columns": [ + "avg", + "min", + "max", + "value", + "sum" + ], + "type": "timeseries", + "requests": [ + { + "formulas": [ + { + "formula": "query1" + } + ], + "queries": [ + { + "data_source": "metrics", + "name": "query1", + "query": "avg:rdse.dmcproxy_process_cpu_usage_percent{$cluster , $node}" + } + ], + "response_format": "timeseries", + "style": { + "palette": "dog_classic", + "order_by": "values", + "color_order": "shuffled", + "line_type": "solid", + "line_width": "normal" + }, + "display_type": "line" + } + ] + }, + "layout": { + "x": 6, + "y": 0, + "width": 6, + "height": 3 + } + } + ] + }, + "layout": { + "x": 0, + "y": 8, + "width": 12, + "height": 4 + } + }, + { + "id": 5217564291542670, + "definition": { + "title": "Total Operations", + "background_color": "vivid_pink", + "show_title": true, + "type": "group", + "layout_type": "ordered", + "widgets": [ + { + "id": 6812333349868960, + "definition": { + "title": "Requests", + "title_size": "16", + "title_align": "left", + "show_legend": false, + "legend_layout": "auto", + "legend_columns": [ + "avg", + "min", + "max", + "value", + "sum" + ], + "type": "timeseries", + "requests": [ + { + "formulas": [ + { + "formula": "query1" + } + ], + "queries": [ + { + "data_source": "metrics", + "name": "query1", + "query": "avg:rdse.listener_total_req{$cluster} by {node}" + } + ], + "response_format": "timeseries", + "style": { + "palette": "dog_classic", + "order_by": "values", + "color_order": "shuffled", + "line_type": "solid", + "line_width": "normal" + }, + "display_type": "line" + } + ] + }, + "layout": { + "x": 0, + "y": 0, + "width": 6, + "height": 3 + } + }, + { + "id": 7356449359833222, + "definition": { + "title": "Responses", + "title_size": "16", + "title_align": "left", + "show_legend": false, + "legend_layout": "auto", + "legend_columns": [ + "avg", + "min", + "max", + "value", + "sum" + ], + "type": "timeseries", + "requests": [ + { + "formulas": [ + { + "formula": "query1" + } + ], + "queries": [ + { + "data_source": "metrics", + "name": "query1", + "query": "avg:rdse.listener_total_res{$cluster} by {node}" + } + ], + "response_format": "timeseries", + "style": { + "palette": "dog_classic", + "order_by": "values", + "color_order": "shuffled", + "line_type": "solid", + "line_width": "normal" + }, + "display_type": "line" + } + ] + }, + "layout": { + "x": 6, + "y": 0, + "width": 6, + "height": 3 + } + } + ] + }, + "layout": { + "x": 0, + "y": 12, + "width": 12, + "height": 4 + } + }, + { + "id": 5330583329708778, + "definition": { + "title": "Read/Write Requests/Responses", + "background_color": "vivid_purple", + "show_title": true, + "type": "group", + "layout_type": "ordered", + "widgets": [ + { + "id": 5368842331723112, + "definition": { + "title": "Read", + "title_size": "16", + "title_align": "left", + "show_legend": false, + "legend_layout": "auto", + "legend_columns": [ + "avg", + "min", + "max", + "value", + "sum" + ], + "type": "timeseries", + "requests": [ + { + "formulas": [ + { + "alias": "requests", + "formula": "query1" + }, + { + "alias": "responses", + "formula": "query2" + } + ], + "queries": [ + { + "data_source": "metrics", + "name": "query1", + "query": "avg:rdse.listener_read_req{$cluster} by {node}" + }, + { + "data_source": "metrics", + "name": "query2", + "query": "avg:rdse.listener_read_res{$cluster} by {node}" + } + ], + "response_format": "timeseries", + "style": { + "palette": "dog_classic", + "order_by": "values", + "color_order": "shuffled", + "line_type": "solid", + "line_width": "normal" + }, + "display_type": "line" + } + ] + }, + "layout": { + "x": 0, + "y": 0, + "width": 6, + "height": 3 + } + }, + { + "id": 2048427695631512, + "definition": { + "title": "Write", + "title_size": "16", + "title_align": "left", + "show_legend": false, + "legend_layout": "auto", + "legend_columns": [ + "avg", + "min", + "max", + "value", + "sum" + ], + "type": "timeseries", + "requests": [ + { + "formulas": [ + { + "alias": "requests", + "formula": "query1" + }, + { + "alias": "responses", + "formula": "query2" + } + ], + "queries": [ + { + "data_source": "metrics", + "name": "query1", + "query": "avg:rdse.listener_write_req{$cluster} by {node}" + }, + { + "data_source": "metrics", + "name": "query2", + "query": "avg:rdse.listener_write_res{$cluster} by {node}" + } + ], + "response_format": "timeseries", + "style": { + "palette": "dog_classic", + "order_by": "values", + "color_order": "shuffled", + "line_type": "solid", + "line_width": "normal" + }, + "display_type": "line" + } + ] + }, + "layout": { + "x": 6, + "y": 0, + "width": 6, + "height": 3 + } + } + ] + }, + "layout": { + "x": 0, + "y": 16, + "width": 12, + "height": 4, + "is_column_break": true + } + }, + { + "id": 1107644513966124, + "definition": { + "title": "Commands", + "background_color": "vivid_orange", + "show_title": true, + "type": "group", + "layout_type": "ordered", + "widgets": [ + { + "id": 4493462146648202, + "definition": { + "title": "Get/Set", + "title_size": "16", + "title_align": "left", + "show_legend": true, + "legend_layout": "auto", + "legend_columns": [ + "avg", + "min", + "max", + "value", + "sum" + ], + "type": "timeseries", + "requests": [ + { + "formulas": [ + { + "alias": "GET", + "formula": "query1" + }, + { + "alias": "GET", + "formula": "query2" + } + ], + "queries": [ + { + "data_source": "metrics", + "name": "query1", + "query": "avg:rdse.listener_cmd_get{$cluster}" + }, + { + "data_source": "metrics", + "name": "query2", + "query": "avg:rdsc.listener_cmd_set{$cluster}" + } + ], + "response_format": "timeseries", + "style": { + "palette": "dog_classic", + "order_by": "values", + "color_order": "shuffled", + "line_type": "solid", + "line_width": "normal" + }, + "display_type": "line" + } + ] + }, + "layout": { + "x": 0, + "y": 0, + "width": 4, + "height": 3 + } + }, + { + "id": 2699327014422812, + "definition": { + "title": "Flush/Touch", + "title_size": "16", + "title_align": "left", + "show_legend": true, + "legend_layout": "horizontal", + "legend_columns": [ + "avg", + "min", + "max", + "value", + "sum" + ], + "type": "timeseries", + "requests": [ + { + "formulas": [ + { + "alias": "Flush", + "formula": "query1" + }, + { + "alias": "Touch", + "formula": "query2" + } + ], + "queries": [ + { + "data_source": "metrics", + "name": "query1", + "query": "avg:rdsc.listener_cmd_flush{$cluster}" + }, + { + "data_source": "metrics", + "name": "query2", + "query": "avg:rdsc.listener_cmd_touch{$cluster}" + } + ], + "response_format": "timeseries", + "style": { + "palette": "dog_classic", + "order_by": "values", + "color_order": "shuffled", + "line_type": "solid", + "line_width": "normal" + }, + "display_type": "line" + } + ] + }, + "layout": { + "x": 4, + "y": 0, + "width": 4, + "height": 3 + } + }, + { + "id": 5763295485850238, + "definition": { + "title": "Authentication/Error", + "title_size": "16", + "title_align": "left", + "show_legend": true, + "legend_layout": "horizontal", + "legend_columns": [ + "avg", + "min", + "max", + "value", + "sum" + ], + "type": "timeseries", + "requests": [ + { + "formulas": [ + { + "alias": "Authentication", + "formula": "query1" + }, + { + "alias": "Errors", + "formula": "query2" + } + ], + "queries": [ + { + "data_source": "metrics", + "name": "query1", + "query": "avg:rdsc.listener_auth_cmds{$cluster}" + }, + { + "data_source": "metrics", + "name": "query2", + "query": "avg:rdsc.listener_auth_errors{$cluster}" + } + ], + "response_format": "timeseries", + "style": { + "palette": "dog_classic", + "order_by": "values", + "color_order": "shuffled", + "line_type": "solid", + "line_width": "normal" + }, + "display_type": "line" + } + ] + }, + "layout": { + "x": 8, + "y": 0, + "width": 4, + "height": 3 + } + } + ] + }, + "layout": { + "x": 0, + "y": 20, + "width": 12, + "height": 4 + } + } + ], + "template_variables": [ + { + "name": "cluster", + "prefix": "cluster", + "available_values": [], + "default": "*" + }, + { + "name": "node", + "prefix": "node", + "available_values": [], + "default": "*" + } + ], + "layout_type": "ordered", + "notify_list": [], + "reflow_type": "fixed" +} \ No newline at end of file diff --git a/redis_cloud/datadog_checks/redis_cloud/__about__.py b/redis_cloud/datadog_checks/redis_cloud/__about__.py index 1f356cc57..1a72d32e5 100644 --- a/redis_cloud/datadog_checks/redis_cloud/__about__.py +++ b/redis_cloud/datadog_checks/redis_cloud/__about__.py @@ -1 +1 @@ -__version__ = '1.0.0' +__version__ = '1.1.0' diff --git a/redis_cloud/datadog_checks/redis_cloud/data/redis_cloud.yaml b/redis_cloud/datadog_checks/redis_cloud/data/redis_cloud.yaml deleted file mode 100644 index 5ffb9725d..000000000 --- a/redis_cloud/datadog_checks/redis_cloud/data/redis_cloud.yaml +++ /dev/null @@ -1,163 +0,0 @@ -init_config: - min_collection_interval: 15 - tls_verify: false - -instances: -- openmetrics_endpoint: https://172.27.1.4:8070/metrics - namespace: "rdsc" - metrics: - - bdb_avg_read_latency_max - - bdb_avg_write_latency_max - - bdb_conns - - bdb_egress_bytes - - bdb_ingress_bytes - - bdb_memory_limit - - bdb_total_req - - bdb_up - - bdb_used_memory - - bdb_avg_latency - - bdb_no_of_keys - - bdb_avg_latency_max - - bdb_avg_read_latency - - bdb_avg_write_latency - - bdb_egress_bytes_max - - bdb_evicted_objects - - bdb_evicted_objects_max - - bdb_expired_objects - - bdb_expired_objects_max - - bdb_fork_cpu_system - - bdb_fork_cpu_system_max - - bdb_fork_cpu_user - - bdb_fork_cpu_user_max - - bdb_ingress_bytes_max - - bdb_instantaneous_ops_per_sec - - bdb_main_thread_cpu_system - - bdb_main_thread_cpu_system_max - - bdb_main_thread_cpu_user - - bdb_main_thread_cpu_user_max - - bdb_mem_frag_ratio - - bdb_mem_size_lua - - bdb_monitor_sessions_count - - bdb_other_req - - bdb_other_req_max - - bdb_other_res - - bdb_other_res_max - - bdb_pubsub_channels - - bdb_pubsub_channels_max - - bdb_pubsub_patterns - - bdb_pubsub_patterns_max - - bdb_read_hits - - bdb_read_hits_max - - bdb_read_misses - - bdb_read_misses_max - - bdb_read_req - - bdb_read_req_max - - bdb_read_res - - bdb_read_res_max - - bdb_shard_cpu_system - - bdb_shard_cpu_system_max - - bdb_shard_cpu_user - - bdb_shard_cpu_user_max - - bdb_total_connections_received - - bdb_total_connections_received_max - - bdb_total_req_max - - bdb_total_res - - bdb_total_res_max - - bdb_write_hits - - bdb_write_hits_max - - bdb_write_misses - - bdb_write_misses_max - - bdb_write_req - - bdb_write_req_max - - bdb_write_res - - bdb_write_res_max - - bdb_crdt_syncer_ingress_bytes - - bdb_crdt_syncer_ingress_bytes_decompressed - - bdb_crdt_syncer_local_ingress_lag_time - - bdb_crdt_syncer_status - - bdb_replicaof_syncer_ingress_bytes - - bdb_replicaof_syncer_ingress_bytes_decompressed - - bdb_replicaof_syncer_local_ingress_lag_time - - bdb_replicaof_syncer_status - - bdb_crdt_syncer_egress_bytes - - bdb_crdt_syncer_egress_bytes_decompressed - - bdb_crdt_syncer_egress_bytes_decompressed_max - - bdb_crdt_syncer_egress_bytes_max - - - listener_acc_latency_total - - listener_acc_latency_max_total - - listener_acc_other_latency_total - - listener_acc_other_latency_max_total - - listener_acc_read_latency_total - - listener_acc_read_latency_max_total - - listener_acc_write_latency_total - - listener_acc_write_latency_max_total - - listener_auth_cmds_total - - listener_auth_cmds_max_total - - listener_auth_errors_total - - listener_auth_errors_max_total - - listener_cmd_flush_total - - listener_cmd_flush_max_total - - listener_cmd_get_total - - listener_cmd_get_max_total - - listener_cmd_set_total - - listener_cmd_set_max_total - - listener_cmd_touch_total - - listener_cmd_touch_max_total - - listener_conns_total - - listener_egress_bytes_total - - listener_egress_bytes_max_total - - listener_ingress_bytes_total - - listener_ingress_bytes_max_total - - listener_last_req_time_total - - listener_last_res_time_total - - listener_max_connections_exceeded_total - - listener_max_connections_exceeded_max_total - - listener_monitor_sessions_count_total - - listener_other_req_total - - listener_other_req_max_total - - listener_other_res_total - - listener_other_res_max_total - - listener_other_started_res_total - - listener_other_started_res_max_total - - listener_read_req_total - - listener_read_req_max_total - - listener_read_res_total - - listener_read_res_max_total - - listener_read_started_res_total - - listener_read_started_res_max_total - - listener_resp2_clients_total - - listener_resp2_clients_max_total - - listener_resp3_clients_total - - listener_resp3_clients_max_total - - listener_sconn_hello_failed_total - - listener_sconn_hello_failed_max_total - - listener_sconn_hello_setresp_total - - listener_sconn_hello_setresp_max_total - - listener_sconn_hello_setuser_total - - listener_sconn_hello_setuser_max_total - - listener_total_connections_received_total - - listener_total_connections_received_max_total - - listener_total_req_total - - listener_total_req_max_total - - listener_total_res_total - - listener_total_res_max_total - - listener_total_started_res_total - - listener_total_started_res_max_total - - listener_write_req_total - - listener_write_req_max_total - - listener_write_res_total - - listener_write_res_max_total - - listener_write_started_res_total - - listener_write_started_res_max_total - - - dmcproxy_process_cpu_system_seconds_total - - dmcproxy_process_cpu_usage_percent - - dmcproxy_process_cpu_user_seconds_total - - dmcproxy_process_main_thread_cpu_system_seconds_total - - dmcproxy_process_main_thread_cpu_user_seconds_total - - dmcproxy_process_max_fds - - dmcproxy_process_open_fds - - dmcproxy_process_resident_memory_bytes - - dmcproxy_process_start_time_seconds - - dmcproxy_process_virtual_memory_bytes diff --git a/redis_cloud/manifest.json b/redis_cloud/manifest.json index 60f8d036c..eed3b405a 100644 --- a/redis_cloud/manifest.json +++ b/redis_cloud/manifest.json @@ -62,7 +62,10 @@ "dashboards": { "redis-cloud-overview": "assets/dashboards/redis_cloud_overview.json", "redis-cloud-database": "assets/dashboards/redis_cloud_database.json", - "redis-cloud-networking": "assets/dashboards/redis_cloud_networking.json" + "redis-cloud-networking": "assets/dashboards/redis_cloud_networking.json", + "redis-cloud-active-active": "assets/dashboards/redis_cloud_active-active.json", + "redis-cloud-proxy": "assets/dashboards/redis_cloud_proxy.json", + "redis-cloud-proxy-threads": "assets/dashboards/redis_cloud_proxy-threads.json" }, "logs": {} },