Skip to content

Commit

Permalink
fix: add redis sentinel master config
Browse files Browse the repository at this point in the history
  • Loading branch information
opan committed Dec 12, 2023
1 parent 1bb5a53 commit 3b39007
Show file tree
Hide file tree
Showing 2 changed files with 77 additions and 62 deletions.
122 changes: 63 additions & 59 deletions exporter/exporter.go
Original file line number Diff line number Diff line change
Expand Up @@ -343,65 +343,69 @@ func NewRedisExporter(redisURI string, opts Options) (*Exporter, error) {
txt string
lbls []string
}{
"commands_duration_seconds_total": {txt: `Total amount of time in seconds spent per command`, lbls: []string{"cmd"}},
"commands_failed_calls_total": {txt: `Total number of errors prior command execution per command`, lbls: []string{"cmd"}},
"commands_rejected_calls_total": {txt: `Total number of errors within command execution per command`, lbls: []string{"cmd"}},
"commands_total": {txt: `Total number of calls per command`, lbls: []string{"cmd"}},
"latency_percentiles_usec": {txt: `A summary of latency percentile distribution per command`, lbls: []string{"cmd"}},
"config_key_value": {txt: `Config key and value`, lbls: []string{"key", "value"}},
"config_value": {txt: `Config key and value as metric`, lbls: []string{"key"}},
"connected_clients_details": {txt: "Details about connected clients", lbls: connectedClientsLabels},
"connected_slave_lag_seconds": {txt: "Lag of connected slave", lbls: []string{"slave_ip", "slave_port", "slave_state"}},
"connected_slave_offset_bytes": {txt: "Offset of connected slave", lbls: []string{"slave_ip", "slave_port", "slave_state"}},
"db_avg_ttl_seconds": {txt: "Avg TTL in seconds", lbls: []string{"db"}},
"db_keys": {txt: "Total number of keys by DB", lbls: []string{"db"}},
"db_keys_expiring": {txt: "Total number of expiring keys by DB", lbls: []string{"db"}},
"db_keys_cached": {txt: "Total number of cached keys by DB", lbls: []string{"db"}},
"errors_total": {txt: `Total number of errors per error type`, lbls: []string{"err"}},
"exporter_last_scrape_error": {txt: "The last scrape error status.", lbls: []string{"err"}},
"instance_info": {txt: "Information about the Redis instance", lbls: []string{"role", "redis_version", "redis_build_id", "redis_mode", "os", "maxmemory_policy", "tcp_port", "run_id", "process_id"}},
"key_group_count": {txt: `Count of keys in key group`, lbls: []string{"db", "key_group"}},
"key_group_memory_usage_bytes": {txt: `Total memory usage of key group in bytes`, lbls: []string{"db", "key_group"}},
"key_size": {txt: `The length or size of "key"`, lbls: []string{"db", "key"}},
"key_value": {txt: `The value of "key"`, lbls: []string{"db", "key"}},
"key_value_as_string": {txt: `The value of "key" as a string`, lbls: []string{"db", "key", "val"}},
"keys_count": {txt: `Count of keys`, lbls: []string{"db", "key"}},
"last_key_groups_scrape_duration_milliseconds": {txt: `Duration of the last key group metrics scrape in milliseconds`},
"last_slow_execution_duration_seconds": {txt: `The amount of time needed for last slow execution, in seconds`},
"latency_spike_duration_seconds": {txt: `Length of the last latency spike in seconds`, lbls: []string{"event_name"}},
"latency_spike_last": {txt: `When the latency spike last occurred`, lbls: []string{"event_name"}},
"master_last_io_seconds_ago": {txt: "Master last io seconds ago", lbls: []string{"master_host", "master_port"}},
"master_link_up": {txt: "Master link status on Redis slave", lbls: []string{"master_host", "master_port"}},
"master_sync_in_progress": {txt: "Master sync in progress", lbls: []string{"master_host", "master_port"}},
"number_of_distinct_key_groups": {txt: `Number of distinct key groups`, lbls: []string{"db"}},
"script_values": {txt: "Values returned by the collect script", lbls: []string{"key"}},
"sentinel_master_ok_sentinels": {txt: "The number of okay sentinels monitoring this master", lbls: []string{"master_name", "master_address"}},
"sentinel_master_ok_slaves": {txt: "The number of okay slaves of the master", lbls: []string{"master_name", "master_address"}},
"sentinel_master_sentinels": {txt: "The number of sentinels monitoring this master", lbls: []string{"master_name", "master_address"}},
"sentinel_master_slaves": {txt: "The number of slaves of the master", lbls: []string{"master_name", "master_address"}},
"sentinel_master_status": {txt: "Master status on Sentinel", lbls: []string{"master_name", "master_address", "master_status"}},
"sentinel_master_ckquorum_status": {txt: "Master ckquorum status", lbls: []string{"master_name", "message"}},
"sentinel_masters": {txt: "The number of masters this sentinel is watching"},
"sentinel_running_scripts": {txt: "Number of scripts in execution right now"},
"sentinel_scripts_queue_length": {txt: "Queue of user scripts to execute"},
"sentinel_simulate_failure_flags": {txt: "Failures simulations"},
"sentinel_tilt": {txt: "Sentinel is in TILT mode"},
"slave_info": {txt: "Information about the Redis slave", lbls: []string{"master_host", "master_port", "read_only"}},
"slave_repl_offset": {txt: "Slave replication offset", lbls: []string{"master_host", "master_port"}},
"slowlog_last_id": {txt: `Last id of slowlog`},
"slowlog_length": {txt: `Total slowlog`},
"start_time_seconds": {txt: "Start time of the Redis instance since unix epoch in seconds."},
"stream_group_consumer_idle_seconds": {txt: `Consumer idle time in seconds`, lbls: []string{"db", "stream", "group", "consumer"}},
"stream_group_consumer_messages_pending": {txt: `Pending number of messages for this specific consumer`, lbls: []string{"db", "stream", "group", "consumer"}},
"stream_group_consumers": {txt: `Consumers count of stream group`, lbls: []string{"db", "stream", "group"}},
"stream_group_last_delivered_id": {txt: `The epoch timestamp (ms) of the last delivered message`, lbls: []string{"db", "stream", "group"}},
"stream_group_messages_pending": {txt: `Pending number of messages in that stream group`, lbls: []string{"db", "stream", "group"}},
"stream_groups": {txt: `Groups count of stream`, lbls: []string{"db", "stream"}},
"stream_last_generated_id": {txt: `The epoch timestamp (ms) of the latest message on the stream`, lbls: []string{"db", "stream"}},
"stream_length": {txt: `The number of elements of the stream`, lbls: []string{"db", "stream"}},
"stream_radix_tree_keys": {txt: `Radix tree keys count"`, lbls: []string{"db", "stream"}},
"stream_radix_tree_nodes": {txt: `Radix tree nodes count`, lbls: []string{"db", "stream"}},
"up": {txt: "Information about the Redis instance"},
"commands_duration_seconds_total": {txt: `Total amount of time in seconds spent per command`, lbls: []string{"cmd"}},
"commands_failed_calls_total": {txt: `Total number of errors prior command execution per command`, lbls: []string{"cmd"}},
"commands_rejected_calls_total": {txt: `Total number of errors within command execution per command`, lbls: []string{"cmd"}},
"commands_total": {txt: `Total number of calls per command`, lbls: []string{"cmd"}},
"latency_percentiles_usec": {txt: `A summary of latency percentile distribution per command`, lbls: []string{"cmd"}},
"config_key_value": {txt: `Config key and value`, lbls: []string{"key", "value"}},
"config_value": {txt: `Config key and value as metric`, lbls: []string{"key"}},
"connected_clients_details": {txt: "Details about connected clients", lbls: connectedClientsLabels},
"connected_slave_lag_seconds": {txt: "Lag of connected slave", lbls: []string{"slave_ip", "slave_port", "slave_state"}},
"connected_slave_offset_bytes": {txt: "Offset of connected slave", lbls: []string{"slave_ip", "slave_port", "slave_state"}},
"db_avg_ttl_seconds": {txt: "Avg TTL in seconds", lbls: []string{"db"}},
"db_keys": {txt: "Total number of keys by DB", lbls: []string{"db"}},
"db_keys_expiring": {txt: "Total number of expiring keys by DB", lbls: []string{"db"}},
"db_keys_cached": {txt: "Total number of cached keys by DB", lbls: []string{"db"}},
"errors_total": {txt: `Total number of errors per error type`, lbls: []string{"err"}},
"exporter_last_scrape_error": {txt: "The last scrape error status.", lbls: []string{"err"}},
"instance_info": {txt: "Information about the Redis instance", lbls: []string{"role", "redis_version", "redis_build_id", "redis_mode", "os", "maxmemory_policy", "tcp_port", "run_id", "process_id"}},
"key_group_count": {txt: `Count of keys in key group`, lbls: []string{"db", "key_group"}},
"key_group_memory_usage_bytes": {txt: `Total memory usage of key group in bytes`, lbls: []string{"db", "key_group"}},
"key_size": {txt: `The length or size of "key"`, lbls: []string{"db", "key"}},
"key_value": {txt: `The value of "key"`, lbls: []string{"db", "key"}},
"key_value_as_string": {txt: `The value of "key" as a string`, lbls: []string{"db", "key", "val"}},
"keys_count": {txt: `Count of keys`, lbls: []string{"db", "key"}},
"last_key_groups_scrape_duration_milliseconds": {txt: `Duration of the last key group metrics scrape in milliseconds`},
"last_slow_execution_duration_seconds": {txt: `The amount of time needed for last slow execution, in seconds`},
"latency_spike_duration_seconds": {txt: `Length of the last latency spike in seconds`, lbls: []string{"event_name"}},
"latency_spike_last": {txt: `When the latency spike last occurred`, lbls: []string{"event_name"}},
"master_last_io_seconds_ago": {txt: "Master last io seconds ago", lbls: []string{"master_host", "master_port"}},
"master_link_up": {txt: "Master link status on Redis slave", lbls: []string{"master_host", "master_port"}},
"master_sync_in_progress": {txt: "Master sync in progress", lbls: []string{"master_host", "master_port"}},
"number_of_distinct_key_groups": {txt: `Number of distinct key groups`, lbls: []string{"db"}},
"script_values": {txt: "Values returned by the collect script", lbls: []string{"key"}},
"sentinel_master_ok_sentinels": {txt: "The number of okay sentinels monitoring this master", lbls: []string{"master_name", "master_address"}},
"sentinel_master_ok_slaves": {txt: "The number of okay slaves of the master", lbls: []string{"master_name", "master_address"}},
"sentinel_master_sentinels": {txt: "The number of sentinels monitoring this master", lbls: []string{"master_name", "master_address"}},
"sentinel_master_slaves": {txt: "The number of slaves of the master", lbls: []string{"master_name", "master_address"}},
"sentinel_master_status": {txt: "Master status on Sentinel", lbls: []string{"master_name", "master_address", "master_status"}},
"sentinel_master_ckquorum_status": {txt: "Master ckquorum status", lbls: []string{"master_name", "message"}},
"sentinel_masters": {txt: "The number of masters this sentinel is watching"},
"sentinel_master_setting_ckquorum": {txt: "Show the current ckquorum config for each master"},
"sentinel_master_setting_failover_timeout": {txt: "Show the current failover-timeout config for each master"},
"sentinel_master_setting_parallel_syncs": {txt: "Show the current parallel-syncs config for each master"},
"sentinel_master_setting_down_after_milliseconds": {txt: "Show the current down-after-milliseconds config for each master"},
"sentinel_running_scripts": {txt: "Number of scripts in execution right now"},
"sentinel_scripts_queue_length": {txt: "Queue of user scripts to execute"},
"sentinel_simulate_failure_flags": {txt: "Failures simulations"},
"sentinel_tilt": {txt: "Sentinel is in TILT mode"},
"slave_info": {txt: "Information about the Redis slave", lbls: []string{"master_host", "master_port", "read_only"}},
"slave_repl_offset": {txt: "Slave replication offset", lbls: []string{"master_host", "master_port"}},
"slowlog_last_id": {txt: `Last id of slowlog`},
"slowlog_length": {txt: `Total slowlog`},
"start_time_seconds": {txt: "Start time of the Redis instance since unix epoch in seconds."},
"stream_group_consumer_idle_seconds": {txt: `Consumer idle time in seconds`, lbls: []string{"db", "stream", "group", "consumer"}},
"stream_group_consumer_messages_pending": {txt: `Pending number of messages for this specific consumer`, lbls: []string{"db", "stream", "group", "consumer"}},
"stream_group_consumers": {txt: `Consumers count of stream group`, lbls: []string{"db", "stream", "group"}},
"stream_group_last_delivered_id": {txt: `The epoch timestamp (ms) of the last delivered message`, lbls: []string{"db", "stream", "group"}},
"stream_group_messages_pending": {txt: `Pending number of messages in that stream group`, lbls: []string{"db", "stream", "group"}},
"stream_groups": {txt: `Groups count of stream`, lbls: []string{"db", "stream"}},
"stream_last_generated_id": {txt: `The epoch timestamp (ms) of the latest message on the stream`, lbls: []string{"db", "stream"}},
"stream_length": {txt: `The number of elements of the stream`, lbls: []string{"db", "stream"}},
"stream_radix_tree_keys": {txt: `Radix tree keys count"`, lbls: []string{"db", "stream"}},
"stream_radix_tree_nodes": {txt: `Radix tree nodes count`, lbls: []string{"db", "stream"}},
"up": {txt: "Information about the Redis instance"},
} {
e.metricDescriptions[k] = newMetricDescr(opts.Namespace, k, desc.txt, desc.lbls)
}
Expand Down
17 changes: 14 additions & 3 deletions exporter/sentinels.go
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,16 @@ func (e *Exporter) extractSentinelMetrics(ch chan<- prometheus.Metric, c redis.C
}
e.registerConstMetricGauge(ch, "sentinel_master_ckquorum_status", float64(masterCkquorumStatus), masterName, masterCkquorumMsg)

masterCkquorum, _ := strconv.ParseFloat(masterDetailMap["ckquorum"], 64)
masterFailoverTimeout, _ := strconv.ParseFloat(masterDetailMap["failover-timeout"], 64)
masterParallelSyncs, _ := strconv.ParseFloat(masterDetailMap["parallel-syncs"], 64)
masterDownAfterMs, _ := strconv.ParseFloat(masterDetailMap["down-after-milliseconds"], 64)

e.registerConstMetricGauge(ch, "sentinel_master_setting_ckquorum", masterCkquorum, masterName, masterAddr)
e.registerConstMetricGauge(ch, "sentinel_master_setting_failover_timeout", masterFailoverTimeout, masterName, masterAddr)
e.registerConstMetricGauge(ch, "sentinel_master_setting_parallel_syncs", masterParallelSyncs, masterName, masterAddr)
e.registerConstMetricGauge(ch, "sentinel_master_setting_down_after_milliseconds", masterDownAfterMs, masterName, masterAddr)

sentinelDetails, _ := redis.Values(doRedisCmd(c, "SENTINEL", "SENTINELS", masterName))
log.Debugf("Sentinel details for master %s: %s", masterName, sentinelDetails)
e.processSentinelSentinels(ch, sentinelDetails, masterName, masterAddr)
Expand Down Expand Up @@ -137,9 +147,10 @@ func (e *Exporter) processSentinelSlaves(ch chan<- prometheus.Metric, slaveDetai
}

/*
valid examples:
master0:name=user03,status=sdown,address=192.169.2.52:6381,slaves=1,sentinels=5
master1:name=user02,status=ok,address=192.169.2.54:6380,slaves=1,sentinels=5
valid examples:
master0:name=user03,status=sdown,address=192.169.2.52:6381,slaves=1,sentinels=5
master1:name=user02,status=ok,address=192.169.2.54:6380,slaves=1,sentinels=5
*/
func parseSentinelMasterString(master string, masterInfo string) (masterName string, masterStatus string, masterAddr string, masterSlaves float64, masterSentinels float64, ok bool) {
ok = false
Expand Down

0 comments on commit 3b39007

Please sign in to comment.