Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added latency_percentiles_usec summary (A summary of latency percentile distribution per command) #652

Merged
merged 5 commits into from
May 30, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions .drone.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,14 @@ services:
ports:
- 6379

- name: redis7
image: redis:7
pull: if-not-exists
commands:
- "redis-server --protected-mode no --dbfilename dump7.rdb"
ports:
- 6384

- name: pwd-redis5
image: redis:5
pull: if-not-exists
Expand Down
1 change: 1 addition & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ test:
TEST_REDIS_URI="redis://redis6:6379" \
TEST_REDIS5_URI="redis://redis5:6383" \
TEST_REDIS6_URI="redis://redis6:6379" \
TEST_REDIS7_URI="redis://redis7:6384" \
TEST_REDIS_2_8_URI="redis://redis-2-8:6381" \
TEST_KEYDB01_URI="redis://keydb-01:6401" \
TEST_KEYDB02_URI="redis://keydb-02:6402" \
Expand Down
6 changes: 6 additions & 0 deletions contrib/docker-compose-for-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,12 @@ services:
ports:
- "6379"

redis7:
image: redis:7.0
command: "redis-server --protected-mode no --dbfilename dump7.rdb"
ports:
- "6384"

pwd-redis5:
image: redis:5
command: "redis-server --port 6380 --requirepass redis-password --dbfilename dump5-pwd.rdb"
Expand Down
1 change: 1 addition & 0 deletions exporter/exporter.go
Original file line number Diff line number Diff line change
Expand Up @@ -342,6 +342,7 @@ func NewRedisExporter(redisURI string, opts Options) (*Exporter, error) {
"commands_failed_calls_total": {txt: `Total number of errors prior command execution per command`, lbls: []string{"cmd"}},
"commands_rejected_calls_total": {txt: `Total number of errors within command execution per command`, lbls: []string{"cmd"}},
"commands_total": {txt: `Total number of calls per command`, lbls: []string{"cmd"}},
"latency_percentiles_usec": {txt: `A summary of latency percentile distribution per command`, lbls: []string{"cmd"}},
"config_key_value": {txt: `Config key and value`, lbls: []string{"key", "value"}},
"config_value": {txt: `Config key and value as metric`, lbls: []string{"key"}},
"connected_clients_details": {txt: "Details about connected clients", lbls: connectedClientsLabels},
Expand Down
102 changes: 99 additions & 3 deletions exporter/info.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,25 @@ func extractVal(s string) (val float64, err error) {
return
}

func extractPercentileVal(s string) (percentile float64, val float64, err error) {
split := strings.Split(s, "=")
if len(split) != 2 {
return
}
percentile, err = strconv.ParseFloat(split[0][1:], 64)
if err != nil {
return
}
val, err = strconv.ParseFloat(split[1], 64)
return
}

func (e *Exporter) extractInfoMetrics(ch chan<- prometheus.Metric, info string, dbCount int) {
keyValues := map[string]string{}
handledDBs := map[string]bool{}
cmdCount := map[string]uint64{}
cmdSum := map[string]float64{}
cmdLatencyMap := map[string]map[float64]float64{}

fieldClass := ""
lines := strings.Split(info, "\n")
Expand Down Expand Up @@ -70,7 +86,13 @@ func (e *Exporter) extractInfoMetrics(ch chan<- prometheus.Metric, info string,
e.handleMetricsServer(ch, fieldKey, fieldValue)

case "Commandstats":
e.handleMetricsCommandStats(ch, fieldKey, fieldValue)
cmd, calls, usecsTotal := e.handleMetricsCommandStats(ch, fieldKey, fieldValue)
cmdCount[cmd] = uint64(calls)
cmdSum[cmd] = usecsTotal
continue

case "Latencystats":
e.handleMetricsLatencyStats(fieldKey, fieldValue, cmdLatencyMap)
continue

case "Errorstats":
Expand Down Expand Up @@ -102,6 +124,10 @@ func (e *Exporter) extractInfoMetrics(ch chan<- prometheus.Metric, info string,
e.parseAndRegisterConstMetric(ch, fieldKey, fieldValue)
}

// To be able to generate the latency summaries we need the count and sum that we get
// from #Commandstats processing and the percentile info that we get from the #Latencystats processing
e.generateCommandLatencySummaries(ch, cmdLatencyMap, cmdCount, cmdSum)

for dbIndex := 0; dbIndex < dbCount; dbIndex++ {
dbName := "db" + strconv.Itoa(dbIndex)
if _, exists := handledDBs[dbName]; !exists {
Expand All @@ -128,6 +154,16 @@ func (e *Exporter) extractInfoMetrics(ch chan<- prometheus.Metric, info string,
}
}

func (e *Exporter) generateCommandLatencySummaries(ch chan<- prometheus.Metric, cmdLatencyMap map[string]map[float64]float64, cmdCount map[string]uint64, cmdSum map[string]float64) {
for cmd, latencyMap := range cmdLatencyMap {
count, okCount := cmdCount[cmd]
sum, okSum := cmdSum[cmd]
if okCount && okSum {
e.registerConstSummary(ch, "latency_percentiles_usec", []string{"cmd"}, count, sum, latencyMap, cmd)
}
}
}

func (e *Exporter) extractClusterInfoMetrics(ch chan<- prometheus.Metric, info string) {
lines := strings.Split(info, "\r\n")

Expand Down Expand Up @@ -347,6 +383,57 @@ func parseMetricsCommandStats(fieldKey string, fieldValue string) (cmd string, c
return
}

func parseMetricsLatencyStats(fieldKey string, fieldValue string) (cmd string, percentileMap map[float64]float64, errorOut error) {
/*
# Latencystats
latency_percentiles_usec_rpop:p50=0.001,p99=1.003,p99.9=4.015
latency_percentiles_usec_zadd:p50=0.001,p99=1.003,p99.9=4.015
latency_percentiles_usec_hset:p50=0.001,p99=1.003,p99.9=3.007
latency_percentiles_usec_set:p50=0.001,p99=1.003,p99.9=4.015
latency_percentiles_usec_lpop:p50=0.001,p99=1.003,p99.9=4.015
latency_percentiles_usec_lpush:p50=0.001,p99=1.003,p99.9=4.015
latency_percentiles_usec_lrange:p50=17.023,p99=21.119,p99.9=27.007
latency_percentiles_usec_get:p50=0.001,p99=1.003,p99.9=3.007
latency_percentiles_usec_mset:p50=1.003,p99=1.003,p99.9=1.003
latency_percentiles_usec_spop:p50=0.001,p99=1.003,p99.9=1.003
latency_percentiles_usec_incr:p50=0.001,p99=1.003,p99.9=3.007
latency_percentiles_usec_rpush:p50=0.001,p99=1.003,p99.9=4.015
latency_percentiles_usec_zpopmin:p50=0.001,p99=1.003,p99.9=3.007
latency_percentiles_usec_config|resetstat:p50=280.575,p99=280.575,p99.9=280.575
latency_percentiles_usec_config|get:p50=8.031,p99=27.007,p99.9=27.007
latency_percentiles_usec_ping:p50=0.001,p99=1.003,p99.9=1.003
latency_percentiles_usec_sadd:p50=0.001,p99=1.003,p99.9=3.007

broken up like this:
fieldKey = latency_percentiles_usec_ping
fieldValue= p50=0.001,p99=1.003,p99.9=3.007
*/

const cmdPrefix = "latency_percentiles_usec_"
percentileMap = map[float64]float64{}

if !strings.HasPrefix(fieldKey, cmdPrefix) {
errorOut = errors.New("Invalid fieldKey")
return
}
cmd = strings.TrimPrefix(fieldKey, cmdPrefix)
splitValue := strings.Split(fieldValue, ",")
splitLen := len(splitValue)
if splitLen < 1 {
errorOut = errors.New("Invalid fieldValue")
return
}
for pos, kv := range splitValue {
percentile, value, err := extractPercentileVal(kv)
if err != nil {
errorOut = fmt.Errorf("Invalid splitValue[%d]", pos)
return
}
percentileMap[percentile] = value
}
return
}

func parseMetricsErrorStats(fieldKey string, fieldValue string) (errorType string, count float64, errorOut error) {
/*
Format:
Expand All @@ -373,15 +460,24 @@ func parseMetricsErrorStats(fieldKey string, fieldValue string) (errorType strin
return
}

func (e *Exporter) handleMetricsCommandStats(ch chan<- prometheus.Metric, fieldKey string, fieldValue string) {
if cmd, calls, rejectedCalls, failedCalls, usecTotal, extendedStats, err := parseMetricsCommandStats(fieldKey, fieldValue); err == nil {
func (e *Exporter) handleMetricsCommandStats(ch chan<- prometheus.Metric, fieldKey string, fieldValue string) (cmd string, calls float64, usecTotal float64) {
cmd, calls, rejectedCalls, failedCalls, usecTotal, extendedStats, err := parseMetricsCommandStats(fieldKey, fieldValue)
if err == nil {
e.registerConstMetric(ch, "commands_total", calls, prometheus.CounterValue, cmd)
e.registerConstMetric(ch, "commands_duration_seconds_total", usecTotal/1e6, prometheus.CounterValue, cmd)
if extendedStats {
e.registerConstMetric(ch, "commands_rejected_calls_total", rejectedCalls, prometheus.CounterValue, cmd)
e.registerConstMetric(ch, "commands_failed_calls_total", failedCalls, prometheus.CounterValue, cmd)
}
}
return
}

func (e *Exporter) handleMetricsLatencyStats(fieldKey string, fieldValue string, cmdLatencyMap map[string]map[float64]float64) {
cmd, latencyMap, err := parseMetricsLatencyStats(fieldKey, fieldValue)
if err == nil {
cmdLatencyMap[cmd] = latencyMap
}
}

func (e *Exporter) handleMetricsErrorStats(ch chan<- prometheus.Metric, fieldKey string, fieldValue string) {
Expand Down
78 changes: 78 additions & 0 deletions exporter/info_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"fmt"
"net/http/httptest"
"os"
"reflect"
"regexp"
"strings"
"testing"
Expand Down Expand Up @@ -103,6 +104,18 @@ func TestCommandStats(t *testing.T) {
deleteKeysFromDB(t, redisSixTwoAddr)
}

func TestLatencyStats(t *testing.T) {
redisSevenAddr := os.Getenv("TEST_REDIS7_URI")

// Since Redis v7 we should have extended latency stats (summary of command latencies)
e := getTestExporterWithAddr(redisSevenAddr)
setupDBKeys(t, redisSevenAddr)

want := map[string]bool{"redis_latency_percentiles_usec": false}
commandStatsCheck(t, e, want)
deleteKeysFromDB(t, redisSevenAddr)
}

func commandStatsCheck(t *testing.T, e *Exporter, want map[string]bool) {
chM := make(chan prometheus.Metric)
go func() {
Expand Down Expand Up @@ -373,3 +386,68 @@ func TestParseErrorStats(t *testing.T) {
}

}

func Test_parseMetricsLatencyStats(t *testing.T) {
type args struct {
fieldKey string
fieldValue string
}
tests := []struct {
name string
args args
wantCmd string
wantPercentileMap map[float64]float64
wantErr bool
}{
{
name: "simple",
args: args{fieldKey: "latency_percentiles_usec_ping", fieldValue: "p50=0.001,p99=1.003,p99.9=3.007"},
wantCmd: "ping",
wantPercentileMap: map[float64]float64{50.0: 0.001, 99.0: 1.003, 99.9: 3.007},
wantErr: false,
},
{
name: "single-percentile",
args: args{fieldKey: "latency_percentiles_usec_ping", fieldValue: "p50=0.001"},
wantCmd: "ping",
wantPercentileMap: map[float64]float64{50.0: 0.001},
wantErr: false,
},
{
name: "empty",
args: args{fieldKey: "latency_percentiles_usec_ping", fieldValue: ""},
wantCmd: "ping",
wantPercentileMap: map[float64]float64{0: 0},
wantErr: false,
},
{
name: "invalid-percentile",
args: args{fieldKey: "latency_percentiles_usec_ping", fieldValue: "p50=a"},
wantCmd: "ping",
wantPercentileMap: map[float64]float64{},
wantErr: true,
},
{
name: "invalid prefix",
args: args{fieldKey: "wrong_prefix_", fieldValue: "p50=0.001,p99=1.003,p99.9=3.007"},
wantCmd: "",
wantPercentileMap: map[float64]float64{},
wantErr: true,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
gotCmd, gotPercentileMap, err := parseMetricsLatencyStats(tt.args.fieldKey, tt.args.fieldValue)
if (err != nil) != tt.wantErr {
t.Errorf("test %s. parseMetricsLatencyStats() error = %v, wantErr %v", tt.name, err, tt.wantErr)
return
}
if gotCmd != tt.wantCmd {
t.Errorf("parseMetricsLatencyStats() gotCmd = %v, want %v", gotCmd, tt.wantCmd)
}
if !reflect.DeepEqual(gotPercentileMap, tt.wantPercentileMap) {
t.Errorf("parseMetricsLatencyStats() gotPercentileMap = %v, want %v", gotPercentileMap, tt.wantPercentileMap)
}
})
}
}
15 changes: 15 additions & 0 deletions exporter/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -89,3 +89,18 @@ func (e *Exporter) registerConstMetric(ch chan<- prometheus.Metric, metric strin
ch <- m
}
}

func (e *Exporter) registerConstSummary(ch chan<- prometheus.Metric, metric string, labelValues []string, count uint64, sum float64, latencyMap map[float64]float64, cmd string) {
descr := e.metricDescriptions[metric]
if descr == nil {
descr = newMetricDescr(e.options.Namespace, metric, metric+" metric", labelValues)
}
// Create a constant summary from values we got from a 3rd party telemetry system.
s := prometheus.MustNewConstSummary(
descr,
count, sum,
latencyMap,
cmd,
)
ch <- s
}