Skip to content

Commit

Permalink
Added latency_percentiles_usec summary (A summary of latency percenti…
Browse files Browse the repository at this point in the history
…le distribution per command) (#652)
  • Loading branch information
filipecosta90 authored May 30, 2022
1 parent 29d12ac commit 123fca5
Show file tree
Hide file tree
Showing 7 changed files with 208 additions and 3 deletions.
8 changes: 8 additions & 0 deletions .drone.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,14 @@ services:
ports:
- 6379

- name: redis7
image: redis:7
pull: if-not-exists
commands:
- "redis-server --protected-mode no --dbfilename dump7.rdb"
ports:
- 6384

- name: pwd-redis5
image: redis:5
pull: if-not-exists
Expand Down
1 change: 1 addition & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ test:
TEST_REDIS_URI="redis://redis6:6379" \
TEST_REDIS5_URI="redis://redis5:6383" \
TEST_REDIS6_URI="redis://redis6:6379" \
TEST_REDIS7_URI="redis://redis7:6384" \
TEST_REDIS_2_8_URI="redis://redis-2-8:6381" \
TEST_KEYDB01_URI="redis://keydb-01:6401" \
TEST_KEYDB02_URI="redis://keydb-02:6402" \
Expand Down
6 changes: 6 additions & 0 deletions contrib/docker-compose-for-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,12 @@ services:
ports:
- "6379"

redis7:
image: redis:7.0
command: "redis-server --protected-mode no --dbfilename dump7.rdb"
ports:
- "6384"

pwd-redis5:
image: redis:5
command: "redis-server --port 6380 --requirepass redis-password --dbfilename dump5-pwd.rdb"
Expand Down
1 change: 1 addition & 0 deletions exporter/exporter.go
Original file line number Diff line number Diff line change
Expand Up @@ -342,6 +342,7 @@ func NewRedisExporter(redisURI string, opts Options) (*Exporter, error) {
"commands_failed_calls_total": {txt: `Total number of errors prior command execution per command`, lbls: []string{"cmd"}},
"commands_rejected_calls_total": {txt: `Total number of errors within command execution per command`, lbls: []string{"cmd"}},
"commands_total": {txt: `Total number of calls per command`, lbls: []string{"cmd"}},
"latency_percentiles_usec": {txt: `A summary of latency percentile distribution per command`, lbls: []string{"cmd"}},
"config_key_value": {txt: `Config key and value`, lbls: []string{"key", "value"}},
"config_value": {txt: `Config key and value as metric`, lbls: []string{"key"}},
"connected_clients_details": {txt: "Details about connected clients", lbls: connectedClientsLabels},
Expand Down
102 changes: 99 additions & 3 deletions exporter/info.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,25 @@ func extractVal(s string) (val float64, err error) {
return
}

func extractPercentileVal(s string) (percentile float64, val float64, err error) {
split := strings.Split(s, "=")
if len(split) != 2 {
return
}
percentile, err = strconv.ParseFloat(split[0][1:], 64)
if err != nil {
return
}
val, err = strconv.ParseFloat(split[1], 64)
return
}

func (e *Exporter) extractInfoMetrics(ch chan<- prometheus.Metric, info string, dbCount int) {
keyValues := map[string]string{}
handledDBs := map[string]bool{}
cmdCount := map[string]uint64{}
cmdSum := map[string]float64{}
cmdLatencyMap := map[string]map[float64]float64{}

fieldClass := ""
lines := strings.Split(info, "\n")
Expand Down Expand Up @@ -70,7 +86,13 @@ func (e *Exporter) extractInfoMetrics(ch chan<- prometheus.Metric, info string,
e.handleMetricsServer(ch, fieldKey, fieldValue)

case "Commandstats":
e.handleMetricsCommandStats(ch, fieldKey, fieldValue)
cmd, calls, usecsTotal := e.handleMetricsCommandStats(ch, fieldKey, fieldValue)
cmdCount[cmd] = uint64(calls)
cmdSum[cmd] = usecsTotal
continue

case "Latencystats":
e.handleMetricsLatencyStats(fieldKey, fieldValue, cmdLatencyMap)
continue

case "Errorstats":
Expand Down Expand Up @@ -102,6 +124,10 @@ func (e *Exporter) extractInfoMetrics(ch chan<- prometheus.Metric, info string,
e.parseAndRegisterConstMetric(ch, fieldKey, fieldValue)
}

// To be able to generate the latency summaries we need the count and sum that we get
// from #Commandstats processing and the percentile info that we get from the #Latencystats processing
e.generateCommandLatencySummaries(ch, cmdLatencyMap, cmdCount, cmdSum)

for dbIndex := 0; dbIndex < dbCount; dbIndex++ {
dbName := "db" + strconv.Itoa(dbIndex)
if _, exists := handledDBs[dbName]; !exists {
Expand All @@ -128,6 +154,16 @@ func (e *Exporter) extractInfoMetrics(ch chan<- prometheus.Metric, info string,
}
}

func (e *Exporter) generateCommandLatencySummaries(ch chan<- prometheus.Metric, cmdLatencyMap map[string]map[float64]float64, cmdCount map[string]uint64, cmdSum map[string]float64) {
for cmd, latencyMap := range cmdLatencyMap {
count, okCount := cmdCount[cmd]
sum, okSum := cmdSum[cmd]
if okCount && okSum {
e.registerConstSummary(ch, "latency_percentiles_usec", []string{"cmd"}, count, sum, latencyMap, cmd)
}
}
}

func (e *Exporter) extractClusterInfoMetrics(ch chan<- prometheus.Metric, info string) {
lines := strings.Split(info, "\r\n")

Expand Down Expand Up @@ -347,6 +383,57 @@ func parseMetricsCommandStats(fieldKey string, fieldValue string) (cmd string, c
return
}

func parseMetricsLatencyStats(fieldKey string, fieldValue string) (cmd string, percentileMap map[float64]float64, errorOut error) {
/*
# Latencystats
latency_percentiles_usec_rpop:p50=0.001,p99=1.003,p99.9=4.015
latency_percentiles_usec_zadd:p50=0.001,p99=1.003,p99.9=4.015
latency_percentiles_usec_hset:p50=0.001,p99=1.003,p99.9=3.007
latency_percentiles_usec_set:p50=0.001,p99=1.003,p99.9=4.015
latency_percentiles_usec_lpop:p50=0.001,p99=1.003,p99.9=4.015
latency_percentiles_usec_lpush:p50=0.001,p99=1.003,p99.9=4.015
latency_percentiles_usec_lrange:p50=17.023,p99=21.119,p99.9=27.007
latency_percentiles_usec_get:p50=0.001,p99=1.003,p99.9=3.007
latency_percentiles_usec_mset:p50=1.003,p99=1.003,p99.9=1.003
latency_percentiles_usec_spop:p50=0.001,p99=1.003,p99.9=1.003
latency_percentiles_usec_incr:p50=0.001,p99=1.003,p99.9=3.007
latency_percentiles_usec_rpush:p50=0.001,p99=1.003,p99.9=4.015
latency_percentiles_usec_zpopmin:p50=0.001,p99=1.003,p99.9=3.007
latency_percentiles_usec_config|resetstat:p50=280.575,p99=280.575,p99.9=280.575
latency_percentiles_usec_config|get:p50=8.031,p99=27.007,p99.9=27.007
latency_percentiles_usec_ping:p50=0.001,p99=1.003,p99.9=1.003
latency_percentiles_usec_sadd:p50=0.001,p99=1.003,p99.9=3.007
broken up like this:
fieldKey = latency_percentiles_usec_ping
fieldValue= p50=0.001,p99=1.003,p99.9=3.007
*/

const cmdPrefix = "latency_percentiles_usec_"
percentileMap = map[float64]float64{}

if !strings.HasPrefix(fieldKey, cmdPrefix) {
errorOut = errors.New("Invalid fieldKey")
return
}
cmd = strings.TrimPrefix(fieldKey, cmdPrefix)
splitValue := strings.Split(fieldValue, ",")
splitLen := len(splitValue)
if splitLen < 1 {
errorOut = errors.New("Invalid fieldValue")
return
}
for pos, kv := range splitValue {
percentile, value, err := extractPercentileVal(kv)
if err != nil {
errorOut = fmt.Errorf("Invalid splitValue[%d]", pos)
return
}
percentileMap[percentile] = value
}
return
}

func parseMetricsErrorStats(fieldKey string, fieldValue string) (errorType string, count float64, errorOut error) {
/*
Format:
Expand All @@ -373,15 +460,24 @@ func parseMetricsErrorStats(fieldKey string, fieldValue string) (errorType strin
return
}

func (e *Exporter) handleMetricsCommandStats(ch chan<- prometheus.Metric, fieldKey string, fieldValue string) {
if cmd, calls, rejectedCalls, failedCalls, usecTotal, extendedStats, err := parseMetricsCommandStats(fieldKey, fieldValue); err == nil {
func (e *Exporter) handleMetricsCommandStats(ch chan<- prometheus.Metric, fieldKey string, fieldValue string) (cmd string, calls float64, usecTotal float64) {
cmd, calls, rejectedCalls, failedCalls, usecTotal, extendedStats, err := parseMetricsCommandStats(fieldKey, fieldValue)
if err == nil {
e.registerConstMetric(ch, "commands_total", calls, prometheus.CounterValue, cmd)
e.registerConstMetric(ch, "commands_duration_seconds_total", usecTotal/1e6, prometheus.CounterValue, cmd)
if extendedStats {
e.registerConstMetric(ch, "commands_rejected_calls_total", rejectedCalls, prometheus.CounterValue, cmd)
e.registerConstMetric(ch, "commands_failed_calls_total", failedCalls, prometheus.CounterValue, cmd)
}
}
return
}

func (e *Exporter) handleMetricsLatencyStats(fieldKey string, fieldValue string, cmdLatencyMap map[string]map[float64]float64) {
cmd, latencyMap, err := parseMetricsLatencyStats(fieldKey, fieldValue)
if err == nil {
cmdLatencyMap[cmd] = latencyMap
}
}

func (e *Exporter) handleMetricsErrorStats(ch chan<- prometheus.Metric, fieldKey string, fieldValue string) {
Expand Down
78 changes: 78 additions & 0 deletions exporter/info_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"fmt"
"net/http/httptest"
"os"
"reflect"
"regexp"
"strings"
"testing"
Expand Down Expand Up @@ -103,6 +104,18 @@ func TestCommandStats(t *testing.T) {
deleteKeysFromDB(t, redisSixTwoAddr)
}

func TestLatencyStats(t *testing.T) {
redisSevenAddr := os.Getenv("TEST_REDIS7_URI")

// Since Redis v7 we should have extended latency stats (summary of command latencies)
e := getTestExporterWithAddr(redisSevenAddr)
setupDBKeys(t, redisSevenAddr)

want := map[string]bool{"redis_latency_percentiles_usec": false}
commandStatsCheck(t, e, want)
deleteKeysFromDB(t, redisSevenAddr)
}

func commandStatsCheck(t *testing.T, e *Exporter, want map[string]bool) {
chM := make(chan prometheus.Metric)
go func() {
Expand Down Expand Up @@ -373,3 +386,68 @@ func TestParseErrorStats(t *testing.T) {
}

}

func Test_parseMetricsLatencyStats(t *testing.T) {
type args struct {
fieldKey string
fieldValue string
}
tests := []struct {
name string
args args
wantCmd string
wantPercentileMap map[float64]float64
wantErr bool
}{
{
name: "simple",
args: args{fieldKey: "latency_percentiles_usec_ping", fieldValue: "p50=0.001,p99=1.003,p99.9=3.007"},
wantCmd: "ping",
wantPercentileMap: map[float64]float64{50.0: 0.001, 99.0: 1.003, 99.9: 3.007},
wantErr: false,
},
{
name: "single-percentile",
args: args{fieldKey: "latency_percentiles_usec_ping", fieldValue: "p50=0.001"},
wantCmd: "ping",
wantPercentileMap: map[float64]float64{50.0: 0.001},
wantErr: false,
},
{
name: "empty",
args: args{fieldKey: "latency_percentiles_usec_ping", fieldValue: ""},
wantCmd: "ping",
wantPercentileMap: map[float64]float64{0: 0},
wantErr: false,
},
{
name: "invalid-percentile",
args: args{fieldKey: "latency_percentiles_usec_ping", fieldValue: "p50=a"},
wantCmd: "ping",
wantPercentileMap: map[float64]float64{},
wantErr: true,
},
{
name: "invalid prefix",
args: args{fieldKey: "wrong_prefix_", fieldValue: "p50=0.001,p99=1.003,p99.9=3.007"},
wantCmd: "",
wantPercentileMap: map[float64]float64{},
wantErr: true,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
gotCmd, gotPercentileMap, err := parseMetricsLatencyStats(tt.args.fieldKey, tt.args.fieldValue)
if (err != nil) != tt.wantErr {
t.Errorf("test %s. parseMetricsLatencyStats() error = %v, wantErr %v", tt.name, err, tt.wantErr)
return
}
if gotCmd != tt.wantCmd {
t.Errorf("parseMetricsLatencyStats() gotCmd = %v, want %v", gotCmd, tt.wantCmd)
}
if !reflect.DeepEqual(gotPercentileMap, tt.wantPercentileMap) {
t.Errorf("parseMetricsLatencyStats() gotPercentileMap = %v, want %v", gotPercentileMap, tt.wantPercentileMap)
}
})
}
}
15 changes: 15 additions & 0 deletions exporter/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -89,3 +89,18 @@ func (e *Exporter) registerConstMetric(ch chan<- prometheus.Metric, metric strin
ch <- m
}
}

func (e *Exporter) registerConstSummary(ch chan<- prometheus.Metric, metric string, labelValues []string, count uint64, sum float64, latencyMap map[float64]float64, cmd string) {
descr := e.metricDescriptions[metric]
if descr == nil {
descr = newMetricDescr(e.options.Namespace, metric, metric+" metric", labelValues)
}
// Create a constant summary from values we got from a 3rd party telemetry system.
s := prometheus.MustNewConstSummary(
descr,
count, sum,
latencyMap,
cmd,
)
ch <- s
}

0 comments on commit 123fca5

Please sign in to comment.