diff --git a/pkg/cmd/roachprod-microbench/BUILD.bazel b/pkg/cmd/roachprod-microbench/BUILD.bazel index 9d708f17b9a3..2055ccab6e09 100644 --- a/pkg/cmd/roachprod-microbench/BUILD.bazel +++ b/pkg/cmd/roachprod-microbench/BUILD.bazel @@ -11,6 +11,7 @@ go_library( "main.go", "metadata.go", "report.go", + "roachprod_util.go", "slack.go", "stage.go", ], diff --git a/pkg/cmd/roachprod-microbench/executor.go b/pkg/cmd/roachprod-microbench/executor.go index dd325a557b5a..bba409d31116 100644 --- a/pkg/cmd/roachprod-microbench/executor.go +++ b/pkg/cmd/roachprod-microbench/executor.go @@ -109,7 +109,7 @@ func newExecutor(config executorConfig) (*executor, error) { roachprodConfig.Quiet = config.quiet timestamp := timeutil.Now() - l := util.InitLogger(filepath.Join(config.outputDir, fmt.Sprintf("roachprod-microbench-%s.log", timestamp.Format(util.TimeFormat)))) + l := InitLogger(filepath.Join(config.outputDir, fmt.Sprintf("roachprod-microbench-%s.log", timestamp.Format(util.TimeFormat)))) excludeBenchmarks := util.GetRegexExclusionPairs(config.excludeList) return &executor{ @@ -275,7 +275,7 @@ func (e *executor) executeBenchmarks() error { } // Init `roachprod` and get the number of nodes in the cluster. - util.InitRoachprod() + InitRoachprod() statuses, err := roachprod.Status(context.Background(), e.log, e.cluster, "") if err != nil { return err diff --git a/pkg/cmd/roachprod-microbench/roachprod_util.go b/pkg/cmd/roachprod-microbench/roachprod_util.go new file mode 100644 index 000000000000..af2f4a900695 --- /dev/null +++ b/pkg/cmd/roachprod-microbench/roachprod_util.go @@ -0,0 +1,46 @@ +// Copyright 2024 The Cockroach Authors. +// +// Use of this software is governed by the CockroachDB Software License +// included in the /LICENSE file. + +package main + +import ( + "context" + "fmt" + "os" + + "github.com/cockroachdb/cockroach/pkg/roachprod" + "github.com/cockroachdb/cockroach/pkg/roachprod/install" + "github.com/cockroachdb/cockroach/pkg/roachprod/logger" +) + +// InitRoachprod initializes the roachprod providers by calling InitProviders. +// This function sets up the environment for running roachprod commands. +func InitRoachprod() { + _ = roachprod.InitProviders() +} + +// RoachprodRun runs a command on a roachprod cluster with the given cluster name and logger. +// It takes a list of command arguments and passes them to the roachprod command execution. +func RoachprodRun(clusterName string, l *logger.Logger, cmdArray []string) error { + // Execute the roachprod command with the provided context, logger, cluster name, and options. + return roachprod.Run( + context.Background(), l, clusterName, "", "", false, + os.Stdout, os.Stderr, cmdArray, install.DefaultRunOptions(), + ) +} + +// InitLogger initializes and returns a logger based on the provided log file path. +// If the logger configuration fails, the program prints an error and exits. +func InitLogger(path string) *logger.Logger { + loggerCfg := logger.Config{Stdout: os.Stdout, Stderr: os.Stderr} // Create a logger config with standard output and error. + var loggerError error + l, loggerError := loggerCfg.NewLogger(path) // Create a new logger based on the configuration. + if loggerError != nil { + // If there is an error initializing the logger, print the error message and exit the program. + _, _ = fmt.Fprintf(os.Stderr, "unable to configure logger: %s\n", loggerError) + os.Exit(1) + } + return l // Return the initialized logger. +} diff --git a/pkg/cmd/roachprod-microbench/stage.go b/pkg/cmd/roachprod-microbench/stage.go index e56aaa441377..a0c3cd5cd745 100644 --- a/pkg/cmd/roachprod-microbench/stage.go +++ b/pkg/cmd/roachprod-microbench/stage.go @@ -11,7 +11,6 @@ import ( "path" "strings" - "github.com/cockroachdb/cockroach/pkg/cmd/roachprod-microbench/util" "github.com/cockroachdb/cockroach/pkg/roachprod" "github.com/cockroachdb/cockroach/pkg/roachprod/logger" "github.com/cockroachdb/errors" @@ -22,7 +21,7 @@ import ( func stage(cluster, archivePath, remoteDest string) (err error) { ctx := context.Background() - util.InitRoachprod() + InitRoachprod() loggerCfg := logger.Config{Stdout: os.Stdout, Stderr: os.Stderr} l, err := loggerCfg.NewLogger("") if err != nil { @@ -34,22 +33,22 @@ func stage(cluster, archivePath, remoteDest string) (err error) { defer func() { // Remove the remote archive after we're done. - cleanUpErr := util.RoachprodRun(cluster, l, []string{"rm", "-rf", archiveRemotePath}) + cleanUpErr := RoachprodRun(cluster, l, []string{"rm", "-rf", archiveRemotePath}) err = errors.CombineErrors(err, errors.Wrapf(cleanUpErr, "removing remote archive: %s", archiveRemotePath)) }() // Remove the remote archive and destination directory if they exist. - if err = util.RoachprodRun(cluster, l, []string{"rm", "-rf", archiveRemotePath}); err != nil { + if err = RoachprodRun(cluster, l, []string{"rm", "-rf", archiveRemotePath}); err != nil { return errors.Wrapf(err, "removing remote archive: %s", archiveRemotePath) } - if err = util.RoachprodRun(cluster, l, []string{"rm", "-rf", remoteDest}); err != nil { + if err = RoachprodRun(cluster, l, []string{"rm", "-rf", remoteDest}); err != nil { return errors.Wrapf(err, "removing remote destination: %s", remoteDest) } // Copy the archive to the remote machine. copyFromGCS := strings.HasPrefix(archivePath, "gs://") if copyFromGCS { - if err = util.RoachprodRun(cluster, l, []string{"gsutil", "-q", "-m", "cp", archivePath, archiveRemotePath}); err != nil { + if err = RoachprodRun(cluster, l, []string{"gsutil", "-q", "-m", "cp", archivePath, archiveRemotePath}); err != nil { return errors.Wrapf(err, "copying archive from GCS: %s", archivePath) } } else { @@ -59,10 +58,10 @@ func stage(cluster, archivePath, remoteDest string) (err error) { } // Extract the archive on the remote machine. - if err = util.RoachprodRun(cluster, l, []string{"mkdir", "-p", remoteDest}); err != nil { + if err = RoachprodRun(cluster, l, []string{"mkdir", "-p", remoteDest}); err != nil { return errors.Wrapf(err, "creating remote destination: %s", remoteDest) } - if err = util.RoachprodRun(cluster, l, []string{"tar", "-C", remoteDest, "-xzf", archiveRemotePath}); err != nil { + if err = RoachprodRun(cluster, l, []string{"tar", "-C", remoteDest, "-xzf", archiveRemotePath}); err != nil { return errors.Wrapf(err, "extracting archive: %s", archiveRemotePath) } diff --git a/pkg/cmd/roachprod-microbench/util/BUILD.bazel b/pkg/cmd/roachprod-microbench/util/BUILD.bazel index cf9b1129a9a0..319a7c5651ce 100644 --- a/pkg/cmd/roachprod-microbench/util/BUILD.bazel +++ b/pkg/cmd/roachprod-microbench/util/BUILD.bazel @@ -6,9 +6,6 @@ go_library( importpath = "github.com/cockroachdb/cockroach/pkg/cmd/roachprod-microbench/util", visibility = ["//visibility:public"], deps = [ - "//pkg/roachprod", - "//pkg/roachprod/install", - "//pkg/roachprod/logger", "@com_github_spf13_cobra//:cobra", "@org_golang_x_exp//maps", ], diff --git a/pkg/cmd/roachprod-microbench/util/util.go b/pkg/cmd/roachprod-microbench/util/util.go index 1667159fe6d0..6e3522fe323d 100644 --- a/pkg/cmd/roachprod-microbench/util/util.go +++ b/pkg/cmd/roachprod-microbench/util/util.go @@ -6,16 +6,12 @@ package util import ( - "context" "fmt" "os" "regexp" "sort" "strings" - "github.com/cockroachdb/cockroach/pkg/roachprod" - "github.com/cockroachdb/cockroach/pkg/roachprod/install" - "github.com/cockroachdb/cockroach/pkg/roachprod/logger" "github.com/spf13/cobra" "golang.org/x/exp/maps" ) @@ -25,13 +21,15 @@ const TimeFormat = "2006-01-02T15_04_05" const PackageSeparator = "→" var ( - // invalidCharRegex matches + // invalidCharKeyRegex matches // the first character if it is not a letter (a-z, A-Z) or an underscore (_) // or // any character that is not a letter (a-z, A-Z), digit (0-9), or underscore (_). - invalidCharKeyRegex = regexp.MustCompile(`(^[^a-zA-Z_])|([^a-zA-Z0-9_])`) - // invalidCharValueRegex + invalidCharKeyRegex = regexp.MustCompile(`(^[^a-zA-Z_])|([^a-zA-Z0-9_])`) invalidCharValueRegex = regexp.MustCompile(`[\\\n"]`) + + // invalidMetricNameRegex matches any other character other than _, :, characters and digits + invalidMetricNameRegex = regexp.MustCompile(`[^a-zA-Z0-9_:]`) ) // LabelMapToString converts a map of labels (key-value pairs) into a formatted string. @@ -55,6 +53,11 @@ func SanitizeKey(input string) string { return invalidCharKeyRegex.ReplaceAllString(input, "_") } +// SanitizeMetricName replaces all invalid characters in input string with underscores +func SanitizeMetricName(input string) string { + return invalidMetricNameRegex.ReplaceAllString(input, "_") +} + // SanitizeValue replaces all \,\n and " with underscores (_). func SanitizeValue(input string) string { // Replace all characters that match as per the regex with an underscore. @@ -120,33 +123,3 @@ func GetRegexExclusionPairs(excludeList []string) [][]*regexp.Regexp { } return excludeRegexes } - -// InitRoachprod initializes the roachprod providers by calling InitProviders. -// This function sets up the environment for running roachprod commands. -func InitRoachprod() { - _ = roachprod.InitProviders() -} - -// RoachprodRun runs a command on a roachprod cluster with the given cluster name and logger. -// It takes a list of command arguments and passes them to the roachprod command execution. -func RoachprodRun(clusterName string, l *logger.Logger, cmdArray []string) error { - // Execute the roachprod command with the provided context, logger, cluster name, and options. - return roachprod.Run( - context.Background(), l, clusterName, "", "", false, - os.Stdout, os.Stderr, cmdArray, install.DefaultRunOptions(), - ) -} - -// InitLogger initializes and returns a logger based on the provided log file path. -// If the logger configuration fails, the program prints an error and exits. -func InitLogger(path string) *logger.Logger { - loggerCfg := logger.Config{Stdout: os.Stdout, Stderr: os.Stderr} // Create a logger config with standard output and error. - var loggerError error - l, loggerError := loggerCfg.NewLogger(path) // Create a new logger based on the configuration. - if loggerError != nil { - // If there is an error initializing the logger, print the error message and exit the program. - _, _ = fmt.Fprintf(os.Stderr, "unable to configure logger: %s\n", loggerError) - os.Exit(1) - } - return l // Return the initialized logger. -} diff --git a/pkg/cmd/roachtest/clusterstats/BUILD.bazel b/pkg/cmd/roachtest/clusterstats/BUILD.bazel index 73be50eb46d8..b373a1ffe23a 100644 --- a/pkg/cmd/roachtest/clusterstats/BUILD.bazel +++ b/pkg/cmd/roachtest/clusterstats/BUILD.bazel @@ -36,6 +36,7 @@ go_library( "@com_github_prometheus_client_golang//api", "@com_github_prometheus_client_golang//api/prometheus/v1:prometheus", "@com_github_prometheus_common//model", + "@org_golang_x_exp//maps", ], ) diff --git a/pkg/cmd/roachtest/clusterstats/exporter.go b/pkg/cmd/roachtest/clusterstats/exporter.go index 4a8f5fcc95b4..25a029ff9062 100644 --- a/pkg/cmd/roachtest/clusterstats/exporter.go +++ b/pkg/cmd/roachtest/clusterstats/exporter.go @@ -11,6 +11,7 @@ import ( "encoding/json" "fmt" "path/filepath" + "strings" "time" "github.com/cockroachdb/cockroach/pkg/cmd/roachprod-microbench/util" @@ -21,6 +22,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/roachprod/logger" "github.com/cockroachdb/cockroach/pkg/util/timeutil" "github.com/cockroachdb/errors" + "golang.org/x/exp/maps" ) // ClusterStat represents a filtered query by the given LabelName. For example, @@ -128,7 +130,7 @@ func (r *ClusterStatRun) serializeOpenmetricsOutRun( ctx context.Context, t test.Test, c cluster.Cluster, ) error { - labelString := GetDefaultOpenmetricsLabelString(t, c) + labelString := GetOpenmetricsLabelString(t, c, nil) report, err := serializeOpenmetricsReport(*r, &labelString) if err != nil { return errors.Wrap(err, "failed to serialize perf artifacts") @@ -142,13 +144,13 @@ func serializeOpenmetricsReport(r ClusterStatRun, labelString *string) (*bytes.B // Emit summary metrics from Total for key, value := range r.Total { - buffer.WriteString(fmt.Sprintf("# TYPE %s gauge\n", util.SanitizeKey(key))) + buffer.WriteString(fmt.Sprintf("# TYPE %s gauge\n", util.SanitizeMetricName(key))) buffer.WriteString(fmt.Sprintf("%s{%s} %f %d\n", util.SanitizeKey(key), *labelString, value, timeutil.Now().UTC().Unix())) } // Emit histogram metrics from Stats for _, stat := range r.Stats { - buffer.WriteString(fmt.Sprintf("# TYPE %s gauge\n", util.SanitizeKey(stat.Tag))) + buffer.WriteString(fmt.Sprintf("# TYPE %s gauge\n", util.SanitizeMetricName(stat.Tag))) for i, timestamp := range stat.Time { t := timeutil.Unix(0, timestamp) buffer.WriteString( @@ -397,15 +399,49 @@ func (cs *clusterStatCollector) getStatSummary( return ret, nil } -func GetDefaultOpenmetricsLabelString(t test.Test, c cluster.Cluster) string { - return util.LabelMapToString(GetDefaultOpenmetricsLabelMap(t, c)) +// GetOpenmetricsLabelString creates a string that follows the openmetrics labels format +func GetOpenmetricsLabelString(t test.Test, c cluster.Cluster, labels map[string]string) string { + return util.LabelMapToString(GetOpenmetricsLabelMap(t, c, labels)) } -func GetDefaultOpenmetricsLabelMap(t test.Test, c cluster.Cluster) map[string]string { - return map[string]string{ - "test": t.Name(), +// GetOpenmetricsLabelMap creates a map of label keys and values +// It takes roachtest parameters and create relevant labels. +// Test name is split and each split is added as a subtype +func GetOpenmetricsLabelMap( + t test.Test, c cluster.Cluster, labels map[string]string, +) map[string]string { + defaultMap := map[string]string{ "cloud": c.Cloud().String(), "owner": string(t.Spec().(*registry.TestSpec).Owner), "suite": t.Spec().(*registry.TestSpec).Suites.String(), } + + // Since the roachtest have / delimiter for subtests + // Partitioning the name by '/' + testNameArray := strings.Split(t.Name(), "/") + defaultMap["test"] = testNameArray[0] + + subTestIterator := 1 + for i := 1; i < len(testNameArray); i++ { + testSubName := testNameArray[i] + + // If the partition has '=', add the key as a label itself + // and the value as its value + if strings.Contains(testSubName, "=") { + testLabels := strings.Split(testSubName, "=") + defaultMap[testLabels[0]] = testLabels[1] + } else { + + // Add the subtest label with the iterator since there can be nested subtests + testSubType := fmt.Sprintf("subtest-%d", subTestIterator) + subTestIterator++ + defaultMap[testSubType] = testSubName + } + } + + // If the tests has passed some custom labels, copy them to the map created above + if labels != nil { + maps.Copy(defaultMap, labels) + } + return defaultMap } diff --git a/pkg/cmd/roachtest/roachtestutil/BUILD.bazel b/pkg/cmd/roachtest/roachtestutil/BUILD.bazel index 18afad4db7a2..494eea23c4c2 100644 --- a/pkg/cmd/roachtest/roachtestutil/BUILD.bazel +++ b/pkg/cmd/roachtest/roachtestutil/BUILD.bazel @@ -19,6 +19,7 @@ go_library( visibility = ["//visibility:public"], deps = [ "//pkg/cmd/roachtest/cluster", + "//pkg/cmd/roachtest/clusterstats", "//pkg/cmd/roachtest/option", "//pkg/cmd/roachtest/spec", "//pkg/cmd/roachtest/test", @@ -36,6 +37,7 @@ go_library( "//pkg/util/retry", "//pkg/util/syncutil", "//pkg/util/timeutil", + "//pkg/workload/histogram/exporter", "@com_github_cockroachdb_errors//:errors", "@com_github_stretchr_testify//require", ], diff --git a/pkg/cmd/roachtest/roachtestutil/utils.go b/pkg/cmd/roachtest/roachtestutil/utils.go index 0cdf7bfb4f12..f2fd49fa3719 100644 --- a/pkg/cmd/roachtest/roachtestutil/utils.go +++ b/pkg/cmd/roachtest/roachtestutil/utils.go @@ -6,21 +6,25 @@ package roachtestutil import ( + "bytes" "context" "fmt" "io" "net/http" "os" + "path/filepath" "regexp" "time" "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/cluster" + "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/clusterstats" "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/option" "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/test" "github.com/cockroachdb/cockroach/pkg/roachprod/config" "github.com/cockroachdb/cockroach/pkg/roachprod/install" "github.com/cockroachdb/cockroach/pkg/util" "github.com/cockroachdb/cockroach/pkg/util/timeutil" + "github.com/cockroachdb/cockroach/pkg/workload/histogram/exporter" "github.com/cockroachdb/errors" "github.com/stretchr/testify/require" ) @@ -65,6 +69,72 @@ func (e *EveryN) ShouldLog() bool { return e.ShouldProcess(timeutil.Now()) } +// GetWorkloadHistogramArgs creates a histogram flag string based on the roachtest to pass to workload binary +// This is used to make use of t.ExportOpenmetrics() method and create appropriate exporter +func GetWorkloadHistogramArgs(t test.Test, c cluster.Cluster, labels map[string]string) string { + var histogramArgs string + if t.ExportOpenmetrics() { + // Add openmetrics related labels and arguments + histogramArgs = fmt.Sprintf(" --histogram-export-format='openmetrics' --histograms=%s/%s --openmetrics-labels='%s'", + t.PerfArtifactsDir(), GetBenchmarkMetricsFileName(t), clusterstats.GetOpenmetricsLabelString(t, c, labels)) + } else { + // Since default is json, no need to add --histogram-export-format flag in this case and also the labels + histogramArgs = fmt.Sprintf(" --histograms=%s/%s", GetBenchmarkMetricsFileName(t), t.PerfArtifactsDir()) + } + + return histogramArgs +} + +// GetBenchmarkMetricsFileName returns the file name to store the benchmark output +func GetBenchmarkMetricsFileName(t test.Test) string { + if t.ExportOpenmetrics() { + return "stats.om" + } + + return "stats.json" +} + +// CreateWorkloadHistogramExporter creates a exporter.Exporter based on the roachtest parameters +func CreateWorkloadHistogramExporter(t test.Test, c cluster.Cluster) exporter.Exporter { + var metricsExporter exporter.Exporter + if t.ExportOpenmetrics() { + labels := clusterstats.GetOpenmetricsLabelMap(t, c, nil) + openMetricsExporter := &exporter.OpenMetricsExporter{} + openMetricsExporter.SetLabels(&labels) + metricsExporter = openMetricsExporter + + } else { + metricsExporter = &exporter.HdrJsonExporter{} + } + + return metricsExporter +} + +func CreateStatsFileInClusterFromExporter( + ctx context.Context, + t test.Test, + c cluster.Cluster, + perfBuf *bytes.Buffer, + exporter exporter.Exporter, + node option.NodeListOption, +) (string, error) { + if err := exporter.Close(nil); err != nil { + return "", err + } + destinationFileName := GetBenchmarkMetricsFileName(t) + // Upload the perf artifacts to any one of the nodes so that the test + // runner copies it into an appropriate directory path. + dest := filepath.Join(t.PerfArtifactsDir(), destinationFileName) + if err := c.RunE(ctx, option.WithNodes(node), "mkdir -p "+filepath.Dir(dest)); err != nil { + t.L().ErrorfCtx(ctx, "failed to create perf dir: %+v", err) + } + if err := c.PutString(ctx, perfBuf.String(), dest, 0755, node); err != nil { + t.L().ErrorfCtx(ctx, "failed to upload perf artifacts to node: %s", err.Error()) + } + + return destinationFileName, nil +} + // WaitForReady waits until the given nodes report ready via health checks. // This implies that the node has completed server startup, is heartbeating its // liveness record, and can serve SQL clients. diff --git a/pkg/cmd/roachtest/tests/admission_control_disk_bandwidth_overload.go b/pkg/cmd/roachtest/tests/admission_control_disk_bandwidth_overload.go index b1078091ee93..7ffe59c14eec 100644 --- a/pkg/cmd/roachtest/tests/admission_control_disk_bandwidth_overload.go +++ b/pkg/cmd/roachtest/tests/admission_control_disk_bandwidth_overload.go @@ -98,10 +98,16 @@ func registerDiskBandwidthOverload(r registry.Registry) { m.Go(func(ctx context.Context) error { t.Status(fmt.Sprintf("starting foreground kv workload thread (<%s)", time.Minute)) dur := " --duration=" + duration.String() + labels := map[string]string{ + "concurrency": "2", + "splits": "1000", + "read-percent": "50", + } url := fmt.Sprintf(" {pgurl%s}", c.CRDBNodes()) - cmd := "./cockroach workload run kv --histograms=perf/stats.json --concurrency=2 " + - "--splits=1000 --read-percent=50 --min-block-bytes=1024 --max-block-bytes=1024 " + - "--txn-qos='regular' --tolerate-errors" + foregroundDB + dur + url + cmd := fmt.Sprintf("./cockroach workload run kv %s--concurrency=2 "+ + "--splits=1000 --read-percent=50 --min-block-bytes=1024 --max-block-bytes=1024 "+ + "--txn-qos='regular' --tolerate-errors %s %s %s", + roachtestutil.GetWorkloadHistogramArgs(t, c, labels), foregroundDB, dur, url) c.Run(ctx, option.WithNodes(c.WorkloadNode()), cmd) return nil }) @@ -111,9 +117,13 @@ func registerDiskBandwidthOverload(r registry.Registry) { t.Status(fmt.Sprintf("starting background kv workload thread (<%s)", time.Minute)) dur := " --duration=" + duration.String() url := fmt.Sprintf(" {pgurl%s}", c.CRDBNodes()) - cmd := "./cockroach workload run kv --histograms=perf/stats.json --concurrency=1024 " + - "--read-percent=0 --min-block-bytes=4096 --max-block-bytes=4096 " + - "--txn-qos='background' --tolerate-errors" + backgroundDB + dur + url + labels := map[string]string{ + "concurrency": "1024", + "read-percent": "0", + } + cmd := fmt.Sprintf("./cockroach workload run kv %s --concurrency=1024 "+ + "--read-percent=0 --min-block-bytes=4096 --max-block-bytes=4096 "+ + "--txn-qos='background' --tolerate-errors %s %s %s", roachtestutil.GetWorkloadHistogramArgs(t, c, labels), backgroundDB, dur, url) c.Run(ctx, option.WithNodes(c.WorkloadNode()), cmd) return nil }) diff --git a/pkg/cmd/roachtest/tests/admission_control_elastic_io.go b/pkg/cmd/roachtest/tests/admission_control_elastic_io.go index fbf441c319ac..f66aee2e0935 100644 --- a/pkg/cmd/roachtest/tests/admission_control_elastic_io.go +++ b/pkg/cmd/roachtest/tests/admission_control_elastic_io.go @@ -71,12 +71,17 @@ func registerElasticIO(r registry.Registry) { duration := 30 * time.Minute t.Status("running workload") m := c.NewMonitor(ctx, c.CRDBNodes()) + labels := map[string]string{ + "duration": fmt.Sprintf("%d", duration.Milliseconds()), + "concurrency": "512", + } m.Go(func(ctx context.Context) error { dur := " --duration=" + duration.String() url := fmt.Sprintf(" {pgurl%s}", c.CRDBNodes()) - cmd := "./cockroach workload run kv --init --histograms=perf/stats.json --concurrency=512 " + - "--splits=1000 --read-percent=0 --min-block-bytes=65536 --max-block-bytes=65536 " + - "--txn-qos=background --tolerate-errors --secure" + dur + url + cmd := fmt.Sprintf("./cockroach workload run kv --init %s -concurrency=512 "+ + "--splits=1000 --read-percent=0 --min-block-bytes=65536 --max-block-bytes=65536 "+ + "--txn-qos=background --tolerate-errors --secure %s %s", + roachtestutil.GetWorkloadHistogramArgs(t, c, labels), dur, url) c.Run(ctx, option.WithNodes(c.WorkloadNode()), cmd) return nil }) diff --git a/pkg/cmd/roachtest/tests/admission_control_elastic_mixed_version.go b/pkg/cmd/roachtest/tests/admission_control_elastic_mixed_version.go index 975251bafffe..1dec1fa84742 100644 --- a/pkg/cmd/roachtest/tests/admission_control_elastic_mixed_version.go +++ b/pkg/cmd/roachtest/tests/admission_control_elastic_mixed_version.go @@ -81,25 +81,31 @@ func registerElasticWorkloadMixedVersion(r registry.Registry) { "--min-block-bytes=512 --max-block-bytes=1024 {pgurl%s}", binary, c.Node(1))) } + labels := map[string]string{ + "concurrency": "500", + "read-percent": "5", + } // The workloads are tuned to keep the cluster busy at 30-40% CPU, and IO // overload metric approaching 20-30% which causes elastic traffic being // de-prioritized and wait. runForeground := func(ctx context.Context, duration time.Duration) error { cmd := roachtestutil.NewCommand("./cockroach workload run kv "+ - "--histograms=perf/stats.json --concurrency=500 "+ + "%s --concurrency=500 "+ "--max-rate=5000 --read-percent=5 "+ "--min-block-bytes=512 --max-block-bytes=1024 "+ "--txn-qos='regular' "+ - "--duration=%v {pgurl%s}", duration, c.CRDBNodes()) + "--duration=%v {pgurl%s}", roachtestutil.GetWorkloadHistogramArgs(t, c, labels), duration, c.CRDBNodes()) return c.RunE(ctx, option.WithNodes(c.WorkloadNode()), cmd.String()) } + + labels["read-percent"] = "0" runBackground := func(ctx context.Context, duration time.Duration) error { cmd := roachtestutil.NewCommand("./cockroach workload run kv "+ - "--histograms=perf/stats.json --concurrency=500 "+ + "%s --concurrency=500 "+ "--max-rate=10000 --read-percent=0 "+ "--min-block-bytes=2048 --max-block-bytes=4096 "+ "--txn-qos='background' "+ - "--duration=%v {pgurl%s}", duration, c.CRDBNodes()) + "--duration=%v {pgurl%s}", roachtestutil.GetWorkloadHistogramArgs(t, c, labels), duration, c.CRDBNodes()) return c.RunE(ctx, option.WithNodes(c.WorkloadNode()), cmd.String()) } runWorkloads := func(ctx2 context.Context) error { diff --git a/pkg/cmd/roachtest/tests/admission_control_multi_store_overload.go b/pkg/cmd/roachtest/tests/admission_control_multi_store_overload.go index 80c7d382fb93..cf2483e99e03 100644 --- a/pkg/cmd/roachtest/tests/admission_control_multi_store_overload.go +++ b/pkg/cmd/roachtest/tests/admission_control_multi_store_overload.go @@ -54,7 +54,10 @@ func registerMultiStoreOverload(r registry.Registry) { t.Status("running workload") dur := 20 * time.Minute duration := " --duration=" + roachtestutil.IfLocal(c, "10s", dur.String()) - histograms := " --histograms=" + t.PerfArtifactsDir() + "/stats.json" + labels := map[string]string{ + "duration": dur.String(), + } + histograms := " " + roachtestutil.GetWorkloadHistogramArgs(t, c, labels) m1 := c.NewMonitor(ctx, c.CRDBNodes()) m1.Go(func(ctx context.Context) error { dbRegular := " --db=db1" diff --git a/pkg/cmd/roachtest/tests/admission_control_snapshot_overload.go b/pkg/cmd/roachtest/tests/admission_control_snapshot_overload.go index 768cabe9d544..7260d20bd6b6 100644 --- a/pkg/cmd/roachtest/tests/admission_control_snapshot_overload.go +++ b/pkg/cmd/roachtest/tests/admission_control_snapshot_overload.go @@ -142,10 +142,16 @@ func registerSnapshotOverload(r registry.Registry) { m := c.NewMonitor(ctx, c.CRDBNodes()) m.Go(func(ctx context.Context) error { duration := " --duration=" + totalWorkloadDuration.String() - histograms := " --histograms=" + t.PerfArtifactsDir() + "/stats.json" concurrency := roachtestutil.IfLocal(c, " --concurrency=8", " --concurrency=256") maxRate := roachtestutil.IfLocal(c, " --max-rate=100", " --max-rate=12000") splits := roachtestutil.IfLocal(c, " --splits=10", " --splits=100") + + labels := map[string]string{ + "concurrency": concurrency, + "max-rate": maxRate, + "splits": splits, + } + histograms := roachtestutil.GetWorkloadHistogramArgs(t, c, labels) c.Run(ctx, option.WithNodes(c.WorkloadNode()), "./cockroach workload run kv --max-block-bytes=1 --read-percent=95 "+ histograms+duration+concurrency+maxRate+splits+fmt.Sprintf(" {pgurl%s}", c.CRDBNodes()), diff --git a/pkg/cmd/roachtest/tests/admission_control_snapshot_overload_io.go b/pkg/cmd/roachtest/tests/admission_control_snapshot_overload_io.go index 658a2dea174d..9f664be609d4 100644 --- a/pkg/cmd/roachtest/tests/admission_control_snapshot_overload_io.go +++ b/pkg/cmd/roachtest/tests/admission_control_snapshot_overload_io.go @@ -8,6 +8,7 @@ package tests import ( "context" "fmt" + "strconv" "time" "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/cluster" @@ -192,12 +193,17 @@ func runAdmissionControlSnapshotOverloadIO( t.Status(fmt.Sprintf("starting kv workload thread (<%s)", time.Minute)) m := c.NewMonitor(ctx, c.CRDBNodes()) m.Go(func(ctx context.Context) error { + + labels := map[string]string{ + "concurrency": "4000", + "read-percent": strconv.Itoa(cfg.readPercent), + } c.Run(ctx, option.WithNodes(c.WorkloadNode()), fmt.Sprintf("./cockroach workload run kv --tolerate-errors "+ - "--splits=1000 --histograms=%s/stats.json --read-percent=%d "+ + "--splits=1000 %s --read-percent=%d "+ "--max-rate=600 --max-block-bytes=%d --min-block-bytes=%d "+ "--concurrency=4000 --duration=%s {pgurl:1-2}", - t.PerfArtifactsDir(), + roachtestutil.GetWorkloadHistogramArgs(t, c, labels), cfg.readPercent, cfg.workloadBlockBytes, cfg.workloadBlockBytes, diff --git a/pkg/cmd/roachtest/tests/admission_control_tpcc_overload.go b/pkg/cmd/roachtest/tests/admission_control_tpcc_overload.go index 8320f3b0eeca..ab80572aa07f 100644 --- a/pkg/cmd/roachtest/tests/admission_control_tpcc_overload.go +++ b/pkg/cmd/roachtest/tests/admission_control_tpcc_overload.go @@ -8,12 +8,14 @@ package tests import ( "context" "fmt" + "strconv" "strings" "time" "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/cluster" "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/option" "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/registry" + "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/roachtestutil" "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/spec" "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/test" "github.com/cockroachdb/cockroach/pkg/roachprod/install" @@ -57,16 +59,18 @@ func (s tpccOLAPSpec) run(ctx context.Context, t test.Test, c cluster.Cluster) { m := c.NewMonitor(ctx, c.CRDBNodes()) rampDuration := 2 * time.Minute duration := 3 * time.Minute + labels := getTpccLabels(s.Warehouses, rampDuration, duration, map[string]string{"concurrency": strconv.Itoa(s.Concurrency)}) m.Go(func(ctx context.Context) error { t.WorkerStatus("running querybench") cmd := fmt.Sprintf( "./workload run querybench --db tpcc"+ " --tolerate-errors=t"+ " --concurrency=%d"+ - " --query-file %s"+ - " --histograms="+t.PerfArtifactsDir()+"/stats.json "+ + " --query-file %s "+ + " %s"+ " --ramp=%s --duration=%s {pgurl:1-%d}", - s.Concurrency, queryFileName, rampDuration, duration, c.Spec().NodeCount-1) + s.Concurrency, queryFileName, roachtestutil.GetWorkloadHistogramArgs(t, c, labels), + rampDuration, duration, c.Spec().NodeCount-1) c.Run(ctx, option.WithNodes(c.WorkloadNode()), cmd) return nil }) diff --git a/pkg/cmd/roachtest/tests/backup.go b/pkg/cmd/roachtest/tests/backup.go index e2176dc589ad..7abe08189218 100644 --- a/pkg/cmd/roachtest/tests/backup.go +++ b/pkg/cmd/roachtest/tests/backup.go @@ -10,12 +10,11 @@ import ( "context" gosql "database/sql" "encoding/base64" - "encoding/json" "fmt" + "io" "net" "net/url" "os" - "path/filepath" "strings" "time" @@ -39,6 +38,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/util/retry" "github.com/cockroachdb/cockroach/pkg/util/timeutil" "github.com/cockroachdb/cockroach/pkg/workload/histogram" + "github.com/cockroachdb/cockroach/pkg/workload/histogram/exporter" "github.com/cockroachdb/errors" "github.com/stretchr/testify/require" ) @@ -286,21 +286,28 @@ func fingerprint(ctx context.Context, conn *gosql.DB, db, table string) (string, // initBulkJobPerfArtifacts registers a histogram, creates a performance // artifact directory and returns a method that when invoked records a tick. -func initBulkJobPerfArtifacts(testName string, timeout time.Duration) (func(), *bytes.Buffer) { +func initBulkJobPerfArtifacts( + timeout time.Duration, t test.Test, e exporter.Exporter, +) (func(), *bytes.Buffer) { // Register a named histogram to track the total time the bulk job took. // Roachperf uses this information to display information about this // roachtest. - reg := histogram.NewRegistry( + + reg := histogram.NewRegistryWithExporter( timeout, histogram.MockWorkloadName, + e, ) - reg.GetHandle().Get(testName) + reg.GetHandle().Get(t.Name()) bytesBuf := bytes.NewBuffer([]byte{}) - jsonEnc := json.NewEncoder(bytesBuf) + writer := io.Writer(bytesBuf) + + e.Init(&writer) + tick := func() { reg.Tick(func(tick histogram.Tick) { - _ = jsonEnc.Encode(tick.Snapshot()) + _ = tick.Exporter.SnapshotAndWrite(tick.Hist, tick.Now, tick.Elapsed, &tick.Name) }) } @@ -328,7 +335,8 @@ func registerBackup(r registry.Registry) { rows = 100 } dest := importBankData(ctx, rows, t, c) - tick, perfBuf := initBulkJobPerfArtifacts("backup/2TB", 2*time.Hour) + exporter := roachtestutil.CreateWorkloadHistogramExporter(t, c) + tick, perfBuf := initBulkJobPerfArtifacts(2*time.Hour, t, exporter) m := c.NewMonitor(ctx) m.Go(func(ctx context.Context) error { @@ -349,14 +357,8 @@ func registerBackup(r registry.Registry) { } tick() - // Upload the perf artifacts to any one of the nodes so that the test - // runner copies it into an appropriate directory path. - dest := filepath.Join(t.PerfArtifactsDir(), "stats.json") - if err := c.RunE(ctx, option.WithNodes(c.Node(1)), "mkdir -p "+filepath.Dir(dest)); err != nil { - t.L().ErrorfCtx(ctx, "failed to create perf dir: %+v", err) - } - if err := c.PutString(ctx, perfBuf.String(), dest, 0755, c.Node(1)); err != nil { - t.L().ErrorfCtx(ctx, "failed to upload perf artifacts to node: %s", err.Error()) + if _, err := roachtestutil.CreateStatsFileInClusterFromExporter(ctx, t, c, perfBuf, exporter, c.Node(1)); err != nil { + return err } return nil }) diff --git a/pkg/cmd/roachtest/tests/cdc_bench.go b/pkg/cmd/roachtest/tests/cdc_bench.go index 5246267d0901..b050b685a72c 100644 --- a/pkg/cmd/roachtest/tests/cdc_bench.go +++ b/pkg/cmd/roachtest/tests/cdc_bench.go @@ -9,9 +9,8 @@ import ( "bytes" "context" gosql "database/sql" - "encoding/json" "fmt" - "path/filepath" + "io" "strconv" "sync/atomic" "time" @@ -346,7 +345,7 @@ func runCDCBenchScan( t.L().Printf("changefeed completed in %s (scanned %s rows per second)", duration.Truncate(time.Second), humanize.Comma(rate)) - // Record scan rate to stats.json. + // Record scan rate to stats file. return writeCDCBenchStats(ctx, t, c, nCoord, "scan-rate", rate) }) @@ -547,10 +546,17 @@ func runCDCBenchWorkload( extra += ` --tolerate-errors` } t.L().Printf("running workload") + labels := map[string]string{ + "duration": duration.String(), + "concurrency": fmt.Sprintf("%d", concurrency), + "read_percent": fmt.Sprintf("%d", readPercent), + "insert_count": fmt.Sprintf("%d", insertCount), + } + err := c.RunE(ctx, option.WithNodes(nWorkload), fmt.Sprintf( - `./cockroach workload run kv --seed %d --histograms=%s/stats.json `+ + `./cockroach workload run kv --seed %d %s `+ `--concurrency %d --duration %s --write-seq R%d --read-percent %d %s {pgurl:%d-%d}`, - workloadSeed, t.PerfArtifactsDir(), concurrency, duration, insertCount, readPercent, extra, + workloadSeed, roachtestutil.GetWorkloadHistogramArgs(t, c, labels), concurrency, duration, insertCount, readPercent, extra, nData[0], nData[len(nData)-1])) if err != nil { return err @@ -627,7 +633,7 @@ func waitForChangefeed( } } -// writeCDCBenchStats writes a single perf metric into stats.json on the +// writeCDCBenchStats writes a single perf metric into stats file on the // given node, for graphing in roachperf. func writeCDCBenchStats( ctx context.Context, @@ -640,25 +646,24 @@ func writeCDCBenchStats( // The easiest way to record a precise metric for roachperf is to cast it as a // duration in seconds in the histogram's upper bound. valueS := time.Duration(value) * time.Second - reg := histogram.NewRegistry(valueS, histogram.MockWorkloadName) + + exporter := roachtestutil.CreateWorkloadHistogramExporter(t, c) + reg := histogram.NewRegistryWithExporter(valueS, histogram.MockWorkloadName, exporter) + bytesBuf := bytes.NewBuffer([]byte{}) - jsonEnc := json.NewEncoder(bytesBuf) + writer := io.Writer(bytesBuf) + exporter.Init(&writer) var err error reg.GetHandle().Get(metric).Record(valueS) reg.Tick(func(tick histogram.Tick) { - err = jsonEnc.Encode(tick.Snapshot()) + err = tick.Exporter.SnapshotAndWrite(tick.Hist, tick.Now, tick.Elapsed, &tick.Name) }) if err != nil { return err } - // Upload the perf artifacts to the given node. - path := filepath.Join(t.PerfArtifactsDir(), "stats.json") - if err := c.RunE(ctx, option.WithNodes(node), "mkdir -p "+filepath.Dir(path)); err != nil { - return err - } - if err := c.PutString(ctx, bytesBuf.String(), path, 0755, node); err != nil { + if _, err = roachtestutil.CreateStatsFileInClusterFromExporter(ctx, t, c, bytesBuf, exporter, node); err != nil { return err } return nil diff --git a/pkg/cmd/roachtest/tests/connection_latency.go b/pkg/cmd/roachtest/tests/connection_latency.go index 11395c0dc674..99cb84a3b4a0 100644 --- a/pkg/cmd/roachtest/tests/connection_latency.go +++ b/pkg/cmd/roachtest/tests/connection_latency.go @@ -71,10 +71,15 @@ func runConnectionLatencyTest( t.L().Printf("running workload in %q against urls:\n%s", locality, strings.Join(urls, "\n")) + labels := map[string]string{ + "duration": "30000", + "locality": locality, + } + workloadCmd := fmt.Sprintf( - `./workload run connectionlatency %s --secure --duration 30s --histograms=%s/stats.json --locality %s`, + `./workload run connectionlatency %s --secure --duration 30s %s --locality %s`, urlString, - t.PerfArtifactsDir(), + roachtestutil.GetWorkloadHistogramArgs(t, c, labels), locality, ) err = c.RunE(ctx, option.WithNodes(loadNode), workloadCmd) diff --git a/pkg/cmd/roachtest/tests/decommissionbench.go b/pkg/cmd/roachtest/tests/decommissionbench.go index f3fc6da6f90f..d26b4a7c26b9 100644 --- a/pkg/cmd/roachtest/tests/decommissionbench.go +++ b/pkg/cmd/roachtest/tests/decommissionbench.go @@ -9,8 +9,8 @@ import ( "bytes" "context" gosql "database/sql" - "encoding/json" "fmt" + "io" "math" "os" "path/filepath" @@ -30,6 +30,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/util/humanizeutil" "github.com/cockroachdb/cockroach/pkg/util/timeutil" "github.com/cockroachdb/cockroach/pkg/workload/histogram" + "github.com/cockroachdb/cockroach/pkg/workload/histogram/exporter" "github.com/cockroachdb/errors" "github.com/stretchr/testify/require" ) @@ -335,18 +336,28 @@ func fireAfter(ctx context.Context, duration time.Duration, fn func()) { // rather than utilizing the values recorded in the histogram, and can be // recorded in the perfBuf by utilizing the returned tickByName(name) function. func createDecommissionBenchPerfArtifacts( - opNames ...string, -) (reg *histogram.Registry, tickByName func(name string), perfBuf *bytes.Buffer) { + t test.Test, c cluster.Cluster, opNames ...string, +) ( + reg *histogram.Registry, + tickByName func(name string), + perfBuf *bytes.Buffer, + exporter exporter.Exporter, +) { + + exporter = roachtestutil.CreateWorkloadHistogramExporter(t, c) + // Create a histogram registry for recording multiple decommission metrics, // following the "bulk job" form of measuring performance. // See runDecommissionBench for more explanation. - reg = histogram.NewRegistry( + reg = histogram.NewRegistryWithExporter( defaultTimeout, histogram.MockWorkloadName, + exporter, ) perfBuf = bytes.NewBuffer([]byte{}) - jsonEnc := json.NewEncoder(perfBuf) + writer := io.Writer(perfBuf) + exporter.Init(&writer) registeredOpNames := make(map[string]struct{}) for _, opName := range opNames { @@ -357,12 +368,12 @@ func createDecommissionBenchPerfArtifacts( tickByName = func(name string) { reg.Tick(func(tick histogram.Tick) { if _, ok := registeredOpNames[name]; ok && tick.Name == name { - _ = jsonEnc.Encode(tick.Snapshot()) + _ = tick.Exporter.SnapshotAndWrite(tick.Hist, tick.Now, tick.Elapsed, &tick.Name) } }) } - return reg, tickByName, perfBuf + return reg, tickByName, perfBuf, exporter } // setupDecommissionBench performs the initial cluster setup needed prior to @@ -488,23 +499,24 @@ func uploadPerfArtifacts( benchSpec decommissionBenchSpec, pinnedNode, workloadNode int, perfBuf *bytes.Buffer, + exporter exporter.Exporter, ) { // Store the perf artifacts on the pinned node so that the test // runner copies it into an appropriate directory path. - dest := filepath.Join(t.PerfArtifactsDir(), "stats.json") - if err := c.RunE(ctx, option.WithNodes(c.Node(pinnedNode)), "mkdir -p "+filepath.Dir(dest)); err != nil { - t.L().Errorf("failed to create perf dir: %+v", err) - } - if err := c.PutString(ctx, perfBuf.String(), dest, 0755, c.Node(pinnedNode)); err != nil { - t.L().Errorf("failed to upload perf artifacts to node: %s", err.Error()) + + destFileName, err := roachtestutil.CreateStatsFileInClusterFromExporter(ctx, t, c, perfBuf, exporter, c.Node(pinnedNode)) + if err != nil { + t.L().Errorf("error creating perf stats file: %s", err) + return } + dest := filepath.Join(t.PerfArtifactsDir(), destFileName) // Get the workload perf artifacts and move them to the pinned node, so that // they can be used to display the workload operation rates during decommission. if !benchSpec.noLoad { - workloadStatsSrc := filepath.Join(t.PerfArtifactsDir(), "stats.json") - localWorkloadStatsPath := filepath.Join(t.ArtifactsDir(), "workload_stats.json") - workloadStatsDest := filepath.Join(t.PerfArtifactsDir(), "workload_stats.json") + workloadStatsSrc := filepath.Join(t.PerfArtifactsDir(), destFileName) + localWorkloadStatsPath := filepath.Join(t.ArtifactsDir(), "workload_"+destFileName) + workloadStatsDest := filepath.Join(t.PerfArtifactsDir(), "workload_"+destFileName) if err := c.Get( ctx, t.L(), workloadStatsSrc, localWorkloadStatsPath, c.Node(workloadNode), ); err != nil { @@ -608,8 +620,8 @@ func runDecommissionBench( benchSpec.warehouses, ) workloadCmd := fmt.Sprintf("./cockroach workload run tpcc --warehouses=%d --max-rate=%d --duration=%s "+ - "--histograms=%s/stats.json --ramp=%s --tolerate-errors {pgurl:1-%d}", maxRate, benchSpec.warehouses, - testTimeout, t.PerfArtifactsDir(), rampDuration, benchSpec.nodes) + "%s --ramp=%s --tolerate-errors {pgurl:1-%d}", maxRate, benchSpec.warehouses, + testTimeout, roachtestutil.GetWorkloadHistogramArgs(t, c, nil), rampDuration, benchSpec.nodes) // In the case that we want to simulate high read amplification, we use kv0 // to run a write-heavy workload known to be difficult for compactions to keep @@ -617,8 +629,8 @@ func runDecommissionBench( if benchSpec.slowWrites { workloadCmd = fmt.Sprintf("./cockroach workload run kv --init --concurrency=%d --splits=1000 "+ "--read-percent=50 --min-block-bytes=8192 --max-block-bytes=8192 --duration=%s "+ - "--histograms=%s/stats.json --ramp=%s --tolerate-errors {pgurl:1-%d}", benchSpec.nodes*64, - testTimeout, t.PerfArtifactsDir(), rampDuration, benchSpec.nodes) + "%s --ramp=%s --tolerate-errors {pgurl:1-%d}", benchSpec.nodes*64, + testTimeout, roachtestutil.GetWorkloadHistogramArgs(t, c, nil), rampDuration, benchSpec.nodes) } setupDecommissionBench(ctx, t, c, benchSpec, pinnedNode, importCmd) @@ -656,7 +668,8 @@ func runDecommissionBench( // long-running metrics measured by the elapsed time between each tick, // as opposed to the histograms of workload operation latencies or other // recorded values that are typically output in a "tick" each second. - reg, tickByName, perfBuf := createDecommissionBenchPerfArtifacts( + reg, tickByName, perfBuf, exporter := createDecommissionBenchPerfArtifacts( + t, c, decommissionMetric, estimatedMetric, bytesUsedMetric, @@ -722,7 +735,7 @@ func runDecommissionBench( t.Fatal(err) } - uploadPerfArtifacts(ctx, t, c, benchSpec, pinnedNode, workloadNode, perfBuf) + uploadPerfArtifacts(ctx, t, c, benchSpec, pinnedNode, workloadNode, perfBuf, exporter) } // runDecommissionBenchLong initializes a cluster with TPCC and attempts to @@ -753,8 +766,8 @@ func runDecommissionBenchLong( benchSpec.warehouses, ) workloadCmd := fmt.Sprintf("./cockroach workload run tpcc --warehouses=%d --max-rate=%d --duration=%s "+ - "--histograms=%s/stats.json --ramp=%s --tolerate-errors {pgurl:1-%d}", maxRate, benchSpec.warehouses, - testTimeout, t.PerfArtifactsDir(), rampDuration, benchSpec.nodes) + "%s --ramp=%s --tolerate-errors {pgurl:1-%d}", maxRate, benchSpec.warehouses, + testTimeout, roachtestutil.GetWorkloadHistogramArgs(t, c, nil), rampDuration, benchSpec.nodes) setupDecommissionBench(ctx, t, c, benchSpec, pinnedNode, importCmd) @@ -790,7 +803,7 @@ func runDecommissionBenchLong( // long-running metrics measured by the elapsed time between each tick, // as opposed to the histograms of workload operation latencies or other // recorded values that are typically output in a "tick" each second. - reg, tickByName, perfBuf := createDecommissionBenchPerfArtifacts( + reg, tickByName, perfBuf, exporter := createDecommissionBenchPerfArtifacts(t, c, decommissionMetric, upreplicateMetric, bytesUsedMetric, ) @@ -848,7 +861,7 @@ func runDecommissionBenchLong( t.Fatal(err) } - uploadPerfArtifacts(ctx, t, c, benchSpec, pinnedNode, workloadNode, perfBuf) + uploadPerfArtifacts(ctx, t, c, benchSpec, pinnedNode, workloadNode, perfBuf, exporter) } // runSingleDecommission picks a random node and attempts to decommission that diff --git a/pkg/cmd/roachtest/tests/failover.go b/pkg/cmd/roachtest/tests/failover.go index 11b0170e7d60..b64b13ac3241 100644 --- a/pkg/cmd/roachtest/tests/failover.go +++ b/pkg/cmd/roachtest/tests/failover.go @@ -282,11 +282,11 @@ func runFailoverChaos(ctx context.Context, t test.Test, c cluster.Cluster, readO if readOnly { readPercent = 100 } + err := c.RunE(ctx, option.WithNodes(c.WorkloadNode()), fmt.Sprintf( `./cockroach workload run kv --read-percent %d --write-seq R%d `+ - `--concurrency 256 --max-rate 2048 --timeout 1m --tolerate-errors `+ - `--histograms=`+t.PerfArtifactsDir()+`/stats.json {pgurl:1-2}`, - readPercent, insertCount)) + `--concurrency 256 --max-rate 2048 --timeout 1m --tolerate-errors %s {pgurl:1-2}`, readPercent, insertCount, + roachtestutil.GetWorkloadHistogramArgs(t, c, getKVLabels(256, insertCount, readPercent)))) if ctx.Err() != nil { return nil // test requested workload shutdown } @@ -460,7 +460,7 @@ func runFailoverPartialLeaseGateway(ctx context.Context, t test.Test, c cluster. cancelWorkload := m.GoWithCancel(func(ctx context.Context) error { err := c.RunE(ctx, option.WithNodes(c.WorkloadNode()), `./cockroach workload run kv --read-percent 50 `+ `--concurrency 256 --max-rate 2048 --timeout 1m --tolerate-errors `+ - `--histograms=`+t.PerfArtifactsDir()+`/stats.json {pgurl:6-7}`) + roachtestutil.GetWorkloadHistogramArgs(t, c, getKVLabels(256, 0, 50))+` {pgurl:6-7}`) if ctx.Err() != nil { return nil // test requested workload shutdown } @@ -603,7 +603,7 @@ func runFailoverPartialLeaseLeader(ctx context.Context, t test.Test, c cluster.C cancelWorkload := m.GoWithCancel(func(ctx context.Context) error { err := c.RunE(ctx, option.WithNodes(c.WorkloadNode()), `./cockroach workload run kv --read-percent 50 `+ `--concurrency 256 --max-rate 2048 --timeout 1m --tolerate-errors `+ - `--histograms=`+t.PerfArtifactsDir()+`/stats.json {pgurl:1-3}`) + roachtestutil.GetWorkloadHistogramArgs(t, c, getKVLabels(256, 0, 50))+` {pgurl:1-3}`) if ctx.Err() != nil { return nil // test requested workload shutdown } @@ -725,7 +725,7 @@ func runFailoverPartialLeaseLiveness(ctx context.Context, t test.Test, c cluster cancelWorkload := m.GoWithCancel(func(ctx context.Context) error { err := c.RunE(ctx, option.WithNodes(c.WorkloadNode()), `./cockroach workload run kv --read-percent 50 `+ `--concurrency 256 --max-rate 2048 --timeout 1m --tolerate-errors `+ - `--histograms=`+t.PerfArtifactsDir()+`/stats.json {pgurl:1-3}`) + roachtestutil.GetWorkloadHistogramArgs(t, c, getKVLabels(256, 0, 50))+` {pgurl:1-3}`) if ctx.Err() != nil { return nil // test requested workload shutdown } @@ -836,7 +836,7 @@ func runFailoverNonSystem( cancelWorkload := m.GoWithCancel(func(ctx context.Context) error { err := c.RunE(ctx, option.WithNodes(c.WorkloadNode()), `./cockroach workload run kv --read-percent 50 `+ `--concurrency 256 --max-rate 2048 --timeout 1m --tolerate-errors `+ - `--histograms=`+t.PerfArtifactsDir()+`/stats.json {pgurl:1-3}`) + roachtestutil.GetWorkloadHistogramArgs(t, c, getKVLabels(256, 0, 50))+` {pgurl:1-3}`) if ctx.Err() != nil { return nil // test requested workload shutdown } @@ -949,7 +949,7 @@ func runFailoverLiveness( cancelWorkload := m.GoWithCancel(func(ctx context.Context) error { err := c.RunE(ctx, option.WithNodes(c.WorkloadNode()), `./cockroach workload run kv --read-percent 50 `+ `--concurrency 256 --max-rate 2048 --timeout 1m --tolerate-errors `+ - `--histograms=`+t.PerfArtifactsDir()+`/stats.json {pgurl:1-3}`) + roachtestutil.GetWorkloadHistogramArgs(t, c, getKVLabels(256, 0, 50))+` {pgurl:1-3}`) if ctx.Err() != nil { return nil // test requested workload shutdown } @@ -1061,7 +1061,7 @@ func runFailoverSystemNonLiveness( cancelWorkload := m.GoWithCancel(func(ctx context.Context) error { err := c.RunE(ctx, option.WithNodes(c.WorkloadNode()), `./cockroach workload run kv --read-percent 50 `+ `--concurrency 256 --max-rate 2048 --timeout 1m --tolerate-errors `+ - `--histograms=`+t.PerfArtifactsDir()+`/stats.json {pgurl:1-3}`) + roachtestutil.GetWorkloadHistogramArgs(t, c, getKVLabels(256, 0, 50))+` {pgurl:1-3}`) if ctx.Err() != nil { return nil // test requested workload shutdown } @@ -1828,3 +1828,11 @@ func failoverRestartOpts() option.StartOpts { startOpts.RoachprodOpts.SkipInit = true return startOpts } + +func getKVLabels(concurrency int, insertCount int, readPercent int) map[string]string { + return map[string]string{ + "concurrency": fmt.Sprintf("%d", concurrency), + "insert_count": fmt.Sprintf("%d", insertCount), + "read_percent": fmt.Sprintf("%d", readPercent), + } +} diff --git a/pkg/cmd/roachtest/tests/import.go b/pkg/cmd/roachtest/tests/import.go index ad98b1310ffb..21ac4dc0fe15 100644 --- a/pkg/cmd/roachtest/tests/import.go +++ b/pkg/cmd/roachtest/tests/import.go @@ -9,7 +9,6 @@ import ( "context" gosql "database/sql" "fmt" - "path/filepath" "strings" "time" @@ -137,7 +136,8 @@ func registerImportTPCC(r registry.Registry) { m.Go(hc.Runner) } - tick, perfBuf := initBulkJobPerfArtifacts(testName, timeout) + exporter := roachtestutil.CreateWorkloadHistogramExporter(t, c) + tick, perfBuf := initBulkJobPerfArtifacts(timeout, t, exporter) workloadStr := `./cockroach workload fixtures import tpcc --warehouses=%d --csv-server='http://localhost:8081' {pgurl:1}` m.Go(func(ctx context.Context) error { defer dul.Done() @@ -148,7 +148,7 @@ func registerImportTPCC(r registry.Registry) { } else { defer hc.Done() } - cmd := fmt.Sprintf(workloadStr, warehouses) + cmd := fmt.Sprintf(workloadStr, 1) // Tick once before starting the import, and once after to capture the // total elapsed time. This is used by roachperf to compute and display // the average MB/sec per node. @@ -156,14 +156,8 @@ func registerImportTPCC(r registry.Registry) { c.Run(ctx, option.WithNodes(c.Node(1)), cmd) tick() - // Upload the perf artifacts to any one of the nodes so that the test - // runner copies it into an appropriate directory path. - dest := filepath.Join(t.PerfArtifactsDir(), "stats.json") - if err := c.RunE(ctx, option.WithNodes(c.Node(1)), "mkdir -p "+filepath.Dir(dest)); err != nil { - t.L().ErrorfCtx(ctx, "failed to create perf dir: %+v", err) - } - if err := c.PutString(ctx, perfBuf.String(), dest, 0755, c.Node(1)); err != nil { - t.L().ErrorfCtx(ctx, "failed to upload perf artifacts to node: %s", err.Error()) + if _, err := roachtestutil.CreateStatsFileInClusterFromExporter(ctx, t, c, perfBuf, exporter, c.Node(1)); err != nil { + return err } return nil }) @@ -238,7 +232,9 @@ func registerImportTPCH(r registry.Registry) { EncryptionSupport: registry.EncryptionMetamorphic, Leases: registry.MetamorphicLeases, Run: func(ctx context.Context, t test.Test, c cluster.Cluster) { - tick, perfBuf := initBulkJobPerfArtifacts(t.Name(), item.timeout) + exporter := roachtestutil.CreateWorkloadHistogramExporter(t, c) + + tick, perfBuf := initBulkJobPerfArtifacts(item.timeout, t, exporter) c.Start(ctx, t.L(), option.DefaultStartOpts(), install.MakeClusterSettings()) conn := c.Conn(ctx, t.L(), 1) @@ -323,14 +319,8 @@ func registerImportTPCH(r registry.Registry) { } tick() - // Upload the perf artifacts to any one of the nodes so that the test - // runner copies it into an appropriate directory path. - dest := filepath.Join(t.PerfArtifactsDir(), "stats.json") - if err := c.RunE(ctx, option.WithNodes(c.Node(1)), "mkdir -p "+filepath.Dir(dest)); err != nil { - t.L().ErrorfCtx(ctx, "failed to create perf dir: %+v", err) - } - if err := c.PutString(ctx, perfBuf.String(), dest, 0755, c.Node(1)); err != nil { - t.L().ErrorfCtx(ctx, "failed to upload perf artifacts to node: %s", err.Error()) + if _, err = roachtestutil.CreateStatsFileInClusterFromExporter(ctx, t, c, perfBuf, exporter, c.Node(1)); err != nil { + return err } return nil }) diff --git a/pkg/cmd/roachtest/tests/indexes.go b/pkg/cmd/roachtest/tests/indexes.go index 967a43d95095..94422ff29b19 100644 --- a/pkg/cmd/roachtest/tests/indexes.go +++ b/pkg/cmd/roachtest/tests/indexes.go @@ -130,8 +130,11 @@ func registerNIndexes(r registry.Registry, secondaryIndexes int) { payload := " --payload=64" concurrency := roachtestutil.IfLocal(c, "", " --concurrency="+strconv.Itoa(conc)) duration := " --duration=" + roachtestutil.IfLocal(c, "10s", "10m") - runCmd := fmt.Sprintf("./workload run indexes --histograms="+t.PerfArtifactsDir()+"/stats.json"+ - payload+concurrency+duration+" {pgurl%s}", gatewayNodes) + labels := map[string]string{ + "concurrency": fmt.Sprintf("%d", conc), + "parallel_writes": fmt.Sprintf("%d", parallelWrites), + } + runCmd := fmt.Sprintf("./workload run indexes %s %s %s %s {pgurl%s}", roachtestutil.GetWorkloadHistogramArgs(t, c, labels), payload, concurrency, duration, gatewayNodes) c.Run(ctx, option.WithNodes(c.WorkloadNode()), runCmd) return nil }) diff --git a/pkg/cmd/roachtest/tests/kv.go b/pkg/cmd/roachtest/tests/kv.go index 6297e877a9aa..ab320d400702 100644 --- a/pkg/cmd/roachtest/tests/kv.go +++ b/pkg/cmd/roachtest/tests/kv.go @@ -147,7 +147,7 @@ func registerKV(r registry.Registry) { } else { readPercent = fmt.Sprintf(" --read-percent=%d", opts.readPercent) } - histograms := " --histograms=" + t.PerfArtifactsDir() + "/stats.json" + var batchSize string if opts.batchSize > 0 { batchSize = fmt.Sprintf(" --batch=%d", opts.batchSize) @@ -171,6 +171,8 @@ func registerKV(r registry.Registry) { if opts.sharedProcessMT { url = fmt.Sprintf(" {pgurl:1-%d:%s}", nodes, appTenantName) } + + histograms := " " + roachtestutil.GetWorkloadHistogramArgs(t, c, nil) cmd := fmt.Sprintf( "./cockroach workload run kv --tolerate-errors --init --user=%s --password=%s", install.DefaultUser, install.DefaultPassword, ) + diff --git a/pkg/cmd/roachtest/tests/large_schema_benchmark.go b/pkg/cmd/roachtest/tests/large_schema_benchmark.go index 94e8e45d536e..110d8fc51e52 100644 --- a/pkg/cmd/roachtest/tests/large_schema_benchmark.go +++ b/pkg/cmd/roachtest/tests/large_schema_benchmark.go @@ -196,14 +196,11 @@ func registerLargeSchemaBenchmark(r registry.Registry, numTables int, isMultiReg populateFileName := fmt.Sprintf("populate_%d", dbListType) mon.Go(func(ctx context.Context) error { waitEnabled := "--wait 0.0" - // Export histograms out for the roach perf dashboard - histograms := " --histograms=" + t.PerfArtifactsDir() + "/stats.json" var wlInstance []workloadInstance // Inactive databases will intentionally have wait time on // them and not include them in our histograms. if dbListType == inactiveDbListType { waitEnabled = "--wait 1.0" - histograms = "" // Use a different prometheus port for the inactive databases, // this will not be measured. wlInstance = append( @@ -224,7 +221,7 @@ func registerLargeSchemaBenchmark(r registry.Registry, numTables int, isMultiReg WorkloadInstances: wlInstance, Duration: time.Minute * 60, ExtraRunArgs: fmt.Sprintf("--db-list-file=%s --txn-preamble-file=%s --admin-urls=%q "+ - "--console-api-file=apiCalls --console-api-username=%q --console-api-password=%q --conns=%d --workers=%d %s %s", + "--console-api-file=apiCalls --console-api-username=%q --console-api-password=%q --conns=%d --workers=%d %s", populateFileName, "ormQueries.sql", strings.Join(webConsoleURLs, ","), @@ -232,8 +229,7 @@ func registerLargeSchemaBenchmark(r registry.Registry, numTables int, isMultiReg "roacher", numWorkers, numWorkers, - waitEnabled, - histograms), + waitEnabled), } runTPCC(ctx, t, t.L(), c, options) return nil diff --git a/pkg/cmd/roachtest/tests/ledger.go b/pkg/cmd/roachtest/tests/ledger.go index b118d21b026d..1851087e65b3 100644 --- a/pkg/cmd/roachtest/tests/ledger.go +++ b/pkg/cmd/roachtest/tests/ledger.go @@ -43,9 +43,14 @@ func registerLedger(r registry.Registry) { concurrency := roachtestutil.IfLocal(c, "", " --concurrency="+fmt.Sprint(nodes*32)) duration := " --duration=" + roachtestutil.IfLocal(c, "10s", "10m") + labels := map[string]string{ + "concurrency": fmt.Sprint(nodes * 32), + "duration": roachtestutil.IfLocal(c, "10000", "600000"), + } + // See https://github.com/cockroachdb/cockroach/issues/94062 for the --data-loader. - cmd := fmt.Sprintf("./workload run ledger --init --data-loader=INSERT --histograms="+t.PerfArtifactsDir()+"/stats.json"+ - concurrency+duration+" {pgurl%s}", gatewayNodes) + cmd := fmt.Sprintf("./workload run ledger --init --data-loader=INSERT %s %s %s {pgurl%s}", + roachtestutil.GetWorkloadHistogramArgs(t, c, labels), concurrency, duration, gatewayNodes) c.Run(ctx, option.WithNodes(c.WorkloadNode()), cmd) return nil }) diff --git a/pkg/cmd/roachtest/tests/loss_of_quorum_recovery.go b/pkg/cmd/roachtest/tests/loss_of_quorum_recovery.go index f28789066b4a..e4991936974d 100644 --- a/pkg/cmd/roachtest/tests/loss_of_quorum_recovery.go +++ b/pkg/cmd/roachtest/tests/loss_of_quorum_recovery.go @@ -9,8 +9,8 @@ import ( "bytes" "context" gosql "database/sql" - "encoding/json" "fmt" + "io" "path" "path/filepath" "time" @@ -343,8 +343,10 @@ func runRecoverLossOfQuorum(ctx context.Context, t test.Test, c cluster.Cluster, } } - recordOutcome, buffer := initPerfCapture() - recordOutcome(testOutcome) + recordOutcome, buffer := initPerfCapture(t, c) + if err := recordOutcome(testOutcome); err != nil { + t.L().Errorf("failed to record outcome: %s", err) + } buffer.upload(ctx, t, c) if testOutcome == success { @@ -569,8 +571,10 @@ func runHalfOnlineRecoverLossOfQuorum( } } - recordOutcome, buffer := initPerfCapture() - recordOutcome(testOutcome) + recordOutcome, buffer := initPerfCapture(t, c) + if err = recordOutcome(testOutcome); err != nil { + t.L().Errorf("failed to record outcome: %s", err) + } buffer.upload(ctx, t, c) if testOutcome == success { @@ -619,7 +623,7 @@ type perfArtifact bytes.Buffer func (p *perfArtifact) upload(ctx context.Context, t test.Test, c cluster.Cluster) { // Upload the perf artifacts to any one of the nodes so that the test // runner copies it into an appropriate directory path. - dest := filepath.Join(t.PerfArtifactsDir(), "stats.json") + dest := filepath.Join(t.PerfArtifactsDir(), roachtestutil.GetBenchmarkMetricsFileName(t)) if err := c.RunE(ctx, option.WithNodes(c.Node(1)), "mkdir -p "+filepath.Dir(dest)); err != nil { t.L().Errorf("failed to create perf dir: %+v", err) } @@ -631,20 +635,25 @@ func (p *perfArtifact) upload(ctx context.Context, t test.Test, c cluster.Cluste // Register histogram and create a function that would record test outcome value. // Returned buffer contains all recorded ticks for the test and is updated // every time metric function is called. -func initPerfCapture() (func(testOutcomeMetric), *perfArtifact) { - reg := histogram.NewRegistry(time.Second*time.Duration(success), histogram.MockWorkloadName) +func initPerfCapture( + t test.Test, c cluster.Cluster, +) (func(testOutcomeMetric) error, *perfArtifact) { + exporter := roachtestutil.CreateWorkloadHistogramExporter(t, c) + reg := histogram.NewRegistryWithExporter(time.Second*time.Duration(success), histogram.MockWorkloadName, exporter) bytesBuf := bytes.NewBuffer([]byte{}) - jsonEnc := json.NewEncoder(bytesBuf) + writer := io.Writer(bytesBuf) + exporter.Init(&writer) writeSnapshot := func() { reg.Tick(func(tick histogram.Tick) { - _ = jsonEnc.Encode(tick.Snapshot()) + _ = tick.Exporter.SnapshotAndWrite(tick.Hist, tick.Now, tick.Elapsed, &tick.Name) }) } - recordOutcome := func(metric testOutcomeMetric) { + recordOutcome := func(metric testOutcomeMetric) error { reg.GetHandle().Get("recovery_result").Record(time.Duration(metric) * time.Second) writeSnapshot() + return exporter.Close(nil) } // Capture start time for the test. diff --git a/pkg/cmd/roachtest/tests/perturbation/framework.go b/pkg/cmd/roachtest/tests/perturbation/framework.go index e7c5bbe4cb37..40bcfa8c4ce4 100644 --- a/pkg/cmd/roachtest/tests/perturbation/framework.go +++ b/pkg/cmd/roachtest/tests/perturbation/framework.go @@ -9,11 +9,10 @@ import ( "bytes" "context" gosql "database/sql" - "encoding/json" "fmt" + "io" "math" "math/rand" - "path/filepath" "reflect" "sort" "strings" @@ -483,7 +482,7 @@ func (v variations) runTest(ctx context.Context, t test.Test, c cluster.Cluster) t.Status("T5: validating results") require.NoError(t, roachtestutil.DownloadProfiles(ctx, c, t.L(), t.ArtifactsDir())) - require.NoError(t, v.writePerfArtifacts(ctx, t.Name(), t.PerfArtifactsDir(), baselineStats, perturbationStats, + require.NoError(t, v.writePerfArtifacts(ctx, t, c, baselineStats, perturbationStats, afterStats)) t.L().Printf("validating stats during the perturbation") @@ -682,40 +681,42 @@ func sortedStringKeys(m map[string]trackedStat) []string { return keys } -// writePerfArtifacts writes the stats.json in the right format to node 1 so it +// writePerfArtifacts writes the stats file in the right format to node 1 so it // can be picked up by roachperf. Currently it only writes the write stats since // there would be too many lines on the graph otherwise. func (v variations) writePerfArtifacts( ctx context.Context, - name string, - perfDir string, + t test.Test, + c cluster.Cluster, baseline, perturbation, recovery map[string]trackedStat, ) error { - reg := histogram.NewRegistry( + + exporter := roachtestutil.CreateWorkloadHistogramExporter(t, c) + + reg := histogram.NewRegistryWithExporter( time.Second, histogram.MockWorkloadName, + exporter, ) + + bytesBuf := bytes.NewBuffer([]byte{}) + writer := io.Writer(bytesBuf) + exporter.Init(&writer) + reg.GetHandle().Get("baseline").Record(baseline["write"].score) reg.GetHandle().Get("perturbation").Record(perturbation["write"].score) reg.GetHandle().Get("recovery").Record(recovery["write"].score) - bytesBuf := bytes.NewBuffer([]byte{}) - jsonEnc := json.NewEncoder(bytesBuf) var err error reg.Tick(func(tick histogram.Tick) { - err = jsonEnc.Encode(tick.Snapshot()) + err = tick.Exporter.SnapshotAndWrite(tick.Hist, tick.Now, tick.Elapsed, &tick.Name) }) if err != nil { return err } node := v.Node(1) - // Upload the perf artifacts to the given node. - if err := v.RunE(ctx, option.WithNodes(node), "mkdir -p "+perfDir); err != nil { - return err - } - path := filepath.Join(perfDir, "stats.json") - if err := v.PutString(ctx, bytesBuf.String(), path, 0755, node); err != nil { + if _, err := roachtestutil.CreateStatsFileInClusterFromExporter(ctx, t, c, bytesBuf, exporter, node); err != nil { return err } return nil diff --git a/pkg/cmd/roachtest/tests/queue.go b/pkg/cmd/roachtest/tests/queue.go index 930094e49d04..3f64b1b1287c 100644 --- a/pkg/cmd/roachtest/tests/queue.go +++ b/pkg/cmd/roachtest/tests/queue.go @@ -54,13 +54,18 @@ func runQueue(ctx context.Context, t test.Test, c cluster.Cluster) { if initTables { init = " --init" } + labels := map[string]string{ + "batch": "100", + "concurrency": roachtestutil.IfLocal(c, "", fmt.Sprint(dbNodeCount*64)), + "duration": duration, + } cmd := fmt.Sprintf( - "./workload run queue --histograms="+t.PerfArtifactsDir()+"/stats.json"+ - init+ - concurrency+ - duration+ - batch+ - " {pgurl%s}", + "./workload run queue %s %s %s %s %s {pgurl%s}", + roachtestutil.GetWorkloadHistogramArgs(t, c, labels), + init, + concurrency, + duration, + batch, c.CRDBNodes(), ) c.Run(ctx, option.WithNodes(c.WorkloadNode()), cmd) diff --git a/pkg/cmd/roachtest/tests/restore.go b/pkg/cmd/roachtest/tests/restore.go index 0c4daa4f1ece..dba8578f4e95 100644 --- a/pkg/cmd/roachtest/tests/restore.go +++ b/pkg/cmd/roachtest/tests/restore.go @@ -8,11 +8,10 @@ package tests import ( "bytes" "context" - "encoding/json" "fmt" + "io" "math/rand" "net/http" - "path/filepath" "strings" "time" @@ -1124,30 +1123,31 @@ func exportToRoachperf( ctx context.Context, t test.Test, c cluster.Cluster, testName string, metric int64, ) { + exporter := roachtestutil.CreateWorkloadHistogramExporter(t, c) + // The easiest way to record a precise metric for roachperf is to caste it as a duration, // in seconds in the histogram's upper bound. - reg := histogram.NewRegistry( - time.Duration(metric)*time.Second, - histogram.MockWorkloadName, - ) + reg := histogram.NewRegistryWithExporter(time.Duration(metric)*time.Second, histogram.MockWorkloadName, exporter) + bytesBuf := bytes.NewBuffer([]byte{}) - jsonEnc := json.NewEncoder(bytesBuf) + writer := io.Writer(bytesBuf) + exporter.Init(&writer) + var err error // Ensure the histogram contains the name of the roachtest reg.GetHandle().Get(testName) // Serialize the histogram into the buffer reg.Tick(func(tick histogram.Tick) { - _ = jsonEnc.Encode(tick.Snapshot()) + err = tick.Exporter.SnapshotAndWrite(tick.Hist, tick.Now, tick.Elapsed, &tick.Name) }) - // Upload the perf artifacts to any one of the nodes so that the test - // runner copies it into an appropriate directory path. - dest := filepath.Join(t.PerfArtifactsDir(), "stats.json") - if err := c.RunE(ctx, option.WithNodes(c.Node(1)), "mkdir -p "+filepath.Dir(dest)); err != nil { - t.L().ErrorfCtx(ctx, "failed to create perf dir: %+v", err) + + if err != nil { + return } - if err := c.PutString(ctx, bytesBuf.String(), dest, 0755, c.Node(1)); err != nil { - t.L().ErrorfCtx(ctx, "failed to upload perf artifacts to node: %s", err.Error()) + if _, err = roachtestutil.CreateStatsFileInClusterFromExporter(ctx, t, c, bytesBuf, exporter, c.Node(1)); err != nil { + t.L().Errorf("error creating stats file: %s", err) + return } } diff --git a/pkg/cmd/roachtest/tests/schemachange_random_load.go b/pkg/cmd/roachtest/tests/schemachange_random_load.go index c784e0e9506b..71616f43e8e5 100644 --- a/pkg/cmd/roachtest/tests/schemachange_random_load.go +++ b/pkg/cmd/roachtest/tests/schemachange_random_load.go @@ -14,6 +14,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/cluster" "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/option" "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/registry" + "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/roachtestutil" "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/spec" "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/test" "github.com/cockroachdb/cockroach/pkg/roachprod/install" @@ -109,13 +110,18 @@ func runSchemaChangeRandomLoad( runCmd := []string{ "./workload run schemachange --verbose=1", "--tolerate-errors=false", - // Save the histograms so that they can be reported to https://roachperf.crdb.dev/. - " --histograms=" + t.PerfArtifactsDir() + "/stats.json", fmt.Sprintf("--max-ops %d", maxOps), fmt.Sprintf("--concurrency %d", concurrency), fmt.Sprintf("--txn-log %s", filepath.Join(storeDirectory, txnLogFile)), fmt.Sprintf("{pgurl%s}", loadNode), } + + extraLabels := map[string]string{ + "concurrency": fmt.Sprintf("%d", concurrency), + "max-ops": fmt.Sprintf("%d", maxOps), + } + + runCmd = append(runCmd, roachtestutil.GetWorkloadHistogramArgs(t, c, extraLabels)) t.Status("running schemachange workload") err = c.RunE(ctx, option.WithNodes(loadNode), runCmd...) if err != nil { diff --git a/pkg/cmd/roachtest/tests/tpcc.go b/pkg/cmd/roachtest/tests/tpcc.go index 49980a2572a3..802806ce4912 100644 --- a/pkg/cmd/roachtest/tests/tpcc.go +++ b/pkg/cmd/roachtest/tests/tpcc.go @@ -9,6 +9,7 @@ import ( "context" gosql "database/sql" "fmt" + "maps" "math" "math/rand" "os" @@ -17,6 +18,7 @@ import ( "time" "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/cluster" + "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/clusterstats" "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/option" "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/registry" "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/roachtestutil" @@ -279,7 +281,7 @@ func runTPCC( // Make a copy of i for the goroutine. i := i m.Go(func(ctx context.Context) error { - // Only prefix stats.json with workload_i_ if we have multiple workloads, + // Only prefix stats file with workload_i_ if we have multiple workloads, // in case other processes relied on previous behavior. var statsPrefix string if len(workloadInstances) > 1 { @@ -288,11 +290,18 @@ func runTPCC( l.Printf("running tpcc worker=%d warehouses=%d ramp=%s duration=%s on %s (<%s)", i, opts.Warehouses, rampDur, opts.Duration, pgURLs[i], time.Minute) - histogramsPath := fmt.Sprintf("%s/%sstats.json", t.PerfArtifactsDir(), statsPrefix) + fileName := roachtestutil.GetBenchmarkMetricsFileName(t) + histogramsPath := fmt.Sprintf("%s/%s%s", t.PerfArtifactsDir(), statsPrefix, fileName) + var labelsMap map[string]string + if t.ExportOpenmetrics() { + labelsMap = getTpccLabels(opts.Warehouses, rampDur, opts.Duration, map[string]string{"database": opts.DB}) + } cmd := roachtestutil.NewCommand("%s workload run %s", test.DefaultCockroachPath, opts.getWorkloadCmd()). MaybeFlag(opts.DB != "", "db", opts.DB). Flag("warehouses", opts.Warehouses). MaybeFlag(!opts.DisableHistogram, "histograms", histogramsPath). + MaybeFlag(t.ExportOpenmetrics(), "histogram-export-format", "openmetrics"). + MaybeFlag(t.ExportOpenmetrics(), "openmetrics-labels", clusterstats.GetOpenmetricsLabelString(t, c, labelsMap)). Flag("ramp", rampDur). Flag("duration", opts.Duration). Flag("prometheus-port", workloadInstances[i].prometheusPort). @@ -484,11 +493,18 @@ func runTPCCMixedHeadroom(ctx context.Context, t test.Test, c cluster.Cluster) { workloadDur = 100 * time.Minute } } + histogramsPath := fmt.Sprintf("%s/%s", t.PerfArtifactsDir(), roachtestutil.GetBenchmarkMetricsFileName(t)) + var labelsMap map[string]string + if t.ExportOpenmetrics() { + labelsMap = getTpccLabels(headroomWarehouses, rampDur, workloadDur/time.Millisecond, nil) + } cmd := roachtestutil.NewCommand("./cockroach workload run tpcc"). Arg("{pgurl%s}", c.CRDBNodes()). Flag("duration", workloadDur). Flag("warehouses", headroomWarehouses). - Flag("histograms", t.PerfArtifactsDir()+"/stats.json"). + Flag("histograms", histogramsPath). + MaybeFlag(t.ExportOpenmetrics(), "histogram-export-format", "openmetrics"). + MaybeFlag(t.ExportOpenmetrics(), "openmetrics-labels", clusterstats.GetOpenmetricsLabelString(t, c, labelsMap)). Flag("ramp", rampDur). Flag("prometheus-port", 2112). Flag("pprofport", workloadPProfStartPort). @@ -1667,15 +1683,18 @@ func runTPCCBench(ctx context.Context, t test.Test, c cluster.Cluster, b tpccBen extraFlags += " --method=simple" } t.Status(fmt.Sprintf("running benchmark, warehouses=%d", warehouses)) - histogramsPath := fmt.Sprintf("%s/warehouses=%d/stats.json", t.PerfArtifactsDir(), warehouses) + histogramsPath := fmt.Sprintf("%s/warehouses=%d/%s", t.PerfArtifactsDir(), warehouses, roachtestutil.GetBenchmarkMetricsFileName(t)) var tenantSuffix string if b.SharedProcessMT { tenantSuffix = fmt.Sprintf(":%s", appTenantName) } + + labels := getTpccLabels(warehouses, rampDur, loadDur, nil) + cmd := fmt.Sprintf("./cockroach workload run tpcc --warehouses=%d --active-warehouses=%d "+ - "--tolerate-errors --ramp=%s --duration=%s%s --histograms=%s {pgurl%s%s}", + "--tolerate-errors --ramp=%s --duration=%s%s %s {pgurl%s%s}", b.LoadWarehouses(c.Cloud()), warehouses, rampDur, - loadDur, extraFlags, histogramsPath, sqlGateways, tenantSuffix) + loadDur, extraFlags, roachtestutil.GetWorkloadHistogramArgs(t, c, labels), sqlGateways, tenantSuffix) err := c.RunE(ctx, option.WithNodes(group.LoadNodes), cmd) loadDone <- timeutil.Now() if err != nil { @@ -1683,23 +1702,26 @@ func runTPCCBench(ctx context.Context, t test.Test, c cluster.Cluster, b tpccBen // count. return errors.Wrapf(err, "error running tpcc load generator") } - roachtestHistogramsPath := filepath.Join(resultsDir, fmt.Sprintf("%d.%d-stats.json", warehouses, groupIdx)) - if err := c.Get( - ctx, t.L(), histogramsPath, roachtestHistogramsPath, group.LoadNodes, - ); err != nil { - // NB: this will let the line search continue. The reason we do this - // is because it's conceivable that we made it here, but a VM just - // froze up on us. The next search iteration will handle this state. - return err - } - snapshots, err := histogram.DecodeSnapshots(roachtestHistogramsPath) - if err != nil { - // If we got this far, and can't decode data, it's not a case of - // overload but something that deserves failing the whole test. - t.Fatal(err) + if !t.ExportOpenmetrics() { + roachtestHistogramsPath := filepath.Join(resultsDir, fmt.Sprintf("%d.%d-stats.json", warehouses, groupIdx)) + if err := c.Get( + ctx, t.L(), histogramsPath, roachtestHistogramsPath, group.LoadNodes, + ); err != nil { + // NB: this will let the line search continue. The reason we do this + // is because it's conceivable that we made it here, but a VM just + // froze up on us. The next search iteration will handle this state. + return err + } + snapshots, err := histogram.DecodeSnapshots(roachtestHistogramsPath) + if err != nil { + // If we got this far, and can't decode data, it's not a case of + // overload but something that deserves failing the whole test. + t.Fatal(err) + } + result := tpcc.NewResultWithSnapshots(warehouses, 0, snapshots) + resultChan <- result + return nil } - result := tpcc.NewResultWithSnapshots(warehouses, 0, snapshots) - resultChan <- result return nil }) } @@ -1832,3 +1854,19 @@ func setupPrometheusForRoachtest( } return cfg, cleanupFunc } + +func getTpccLabels( + warehouses int, rampDur time.Duration, duration time.Duration, extraLabels map[string]string, +) map[string]string { + labels := map[string]string{ + "warehouses": fmt.Sprintf("%d", warehouses), + "duration": duration.String(), + "ramp": rampDur.String(), + } + + if extraLabels != nil { + maps.Copy(labels, extraLabels) + } + + return labels +} diff --git a/pkg/cmd/roachtest/tests/tpce.go b/pkg/cmd/roachtest/tests/tpce.go index 6984127cec0c..9bc485bc700b 100644 --- a/pkg/cmd/roachtest/tests/tpce.go +++ b/pkg/cmd/roachtest/tests/tpce.go @@ -7,6 +7,7 @@ package tests import ( "bufio" + "bytes" "context" "encoding/json" "fmt" @@ -15,6 +16,7 @@ import ( "time" "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/cluster" + "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/clusterstats" "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/option" "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/registry" "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/roachtestutil" @@ -23,6 +25,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/roachprod/install" "github.com/cockroachdb/cockroach/pkg/roachprod/logger" "github.com/cockroachdb/cockroach/pkg/roachprod/prometheus" + "github.com/cockroachdb/cockroach/pkg/util/timeutil" "github.com/cockroachdb/errors" "github.com/stretchr/testify/require" ) @@ -254,7 +257,7 @@ func runTPCE(ctx context.Context, t test.Test, c cluster.Cluster, opts tpceOptio } if opts.exportMetrics { - return exportTPCEResults(t, result.Stdout) + return exportTPCEResults(t, c, result.Stdout) } return nil }) @@ -327,7 +330,7 @@ type tpceMetrics struct { // // The Measured Throughput is computed as the total number of Valid Trade-Result Transactions // within the Measurement Interval divided by the duration of the Measurement Interval in seconds. -func exportTPCEResults(t test.Test, result string) error { +func exportTPCEResults(t test.Test, c cluster.Cluster, result string) error { // Filter out everything but the TradeResult transaction metrics. // // Example output of TPCE: @@ -357,7 +360,20 @@ func exportTPCEResults(t test.Test, result string) error { P99Latency: removeUnits(fields[11]), PMaxLatency: removeUnits(fields[12]), } - metricBytes, err := json.Marshal(metrics) + + var metricBytes []byte + var err error + fileName := roachtestutil.GetBenchmarkMetricsFileName(t) + if t.ExportOpenmetrics() { + labels := map[string]string{ + "workload": "tpce", + } + labelString := clusterstats.GetOpenmetricsLabelString(t, c, labels) + metricBytes = getOpenMetrics(metrics, fields[5], labelString) + } else { + metricBytes, err = json.Marshal(metrics) + } + if err != nil { return err } @@ -369,7 +385,26 @@ func exportTPCEResults(t test.Test, result string) error { return err } - return os.WriteFile(fmt.Sprintf("%s/stats.json", perfDir), metricBytes, 0666) + return os.WriteFile(fmt.Sprintf("%s/%s", perfDir, fileName), metricBytes, 0666) } return errors.Errorf("exportTPCEResults: found no lines starting with TradeResult") } + +func getOpenMetrics(metrics tpceMetrics, countOfLatencies string, labelString string) []byte { + + var buffer bytes.Buffer + now := timeutil.Now().Unix() + + buffer.WriteString("# TYPE tpce_latency summary\n") + buffer.WriteString("# HELP tpce_latency Latency metrics for TPC-E transactions\n") + buffer.WriteString(fmt.Sprintf("tpce_latency{%s,quantile=\"0.5\"} %s %d\n", labelString, metrics.P50Latency, now)) + buffer.WriteString(fmt.Sprintf("tpce_latency{%s,quantile=\"0.9\"} %s %d\n", labelString, metrics.P90Latency, now)) + buffer.WriteString(fmt.Sprintf("tpce_latency{%s,quantile=\"0.99\"} %s %d\n", labelString, metrics.P99Latency, now)) + buffer.WriteString(fmt.Sprintf("tpce_latency{%s,quantile=\"1.0\"} %s %d\n", labelString, metrics.PMaxLatency, now)) + buffer.WriteString(fmt.Sprintf("tpce_latency_sum{%s} %d %d\n", labelString, 0, now)) + buffer.WriteString(fmt.Sprintf("tpce_latency_count{%s} %s %d\n", labelString, countOfLatencies, now)) + buffer.WriteString("# EOF") + + metricsBytes := buffer.Bytes() + return metricsBytes +} diff --git a/pkg/cmd/roachtest/tests/tpchbench.go b/pkg/cmd/roachtest/tests/tpchbench.go index fa9de0a6580f..17a7b18c4b2e 100644 --- a/pkg/cmd/roachtest/tests/tpchbench.go +++ b/pkg/cmd/roachtest/tests/tpchbench.go @@ -16,6 +16,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/cluster" "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/option" "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/registry" + "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/roachtestutil" "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/spec" "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/test" "github.com/cockroachdb/cockroach/pkg/roachprod/install" @@ -78,15 +79,20 @@ func runTPCHBench(ctx context.Context, t test.Test, c cluster.Cluster, b tpchBen // run b.numRunsPerQuery number of times. maxOps := b.numRunsPerQuery * numQueries + labels := map[string]string{ + "max_ops": fmt.Sprintf("%d", maxOps), + "num_queries": fmt.Sprintf("%d", numQueries), + } + // Run with only one worker to get best-case single-query performance. cmd := fmt.Sprintf( "./workload run querybench --db=tpch --concurrency=1 --query-file=%s "+ - "--num-runs=%d --max-ops=%d {pgurl%s} "+ - "--histograms="+t.PerfArtifactsDir()+"/stats.json --histograms-max-latency=%s", + "--num-runs=%d --max-ops=%d {pgurl%s} %s --histograms-max-latency=%s", filename, b.numRunsPerQuery, maxOps, c.CRDBNodes(), + roachtestutil.GetWorkloadHistogramArgs(t, c, labels), b.maxLatency.String(), ) if err := c.RunE(ctx, option.WithNodes(c.WorkloadNode()), cmd); err != nil { diff --git a/pkg/cmd/roachtest/tests/ycsb.go b/pkg/cmd/roachtest/tests/ycsb.go index b3e2eac928a3..a8fe5e33ea0f 100644 --- a/pkg/cmd/roachtest/tests/ycsb.go +++ b/pkg/cmd/roachtest/tests/ycsb.go @@ -88,11 +88,17 @@ func registerYCSB(r registry.Registry) { defaultDuration := roachtestutil.IfLocal(c, "10s", "30m") args += roachtestutil.GetEnvWorkloadDurationValueOrDefault(defaultDuration) + + labels := map[string]string{ + "workload_ycsb_type": wl, + "concurrency": fmt.Sprintf("%d", conc), + "cpu": fmt.Sprintf("%d", cpus), + } + cmd := fmt.Sprintf( "./cockroach workload run ycsb --init --insert-count=1000000 --workload=%s --concurrency=%d"+ - " --splits=%d --histograms="+t.PerfArtifactsDir()+"/stats.json"+args+ - " {pgurl%s}", - wl, conc, len(c.CRDBNodes()), c.CRDBNodes()) + " --splits=%d %s %s {pgurl%s}", wl, conc, len(c.CRDBNodes()), + roachtestutil.GetWorkloadHistogramArgs(t, c, labels), args, c.CRDBNodes()) c.Run(ctx, option.WithNodes(c.WorkloadNode()), cmd) return nil }) diff --git a/pkg/workload/cli/run.go b/pkg/workload/cli/run.go index 9ea322e71627..309684df5172 100644 --- a/pkg/workload/cli/run.go +++ b/pkg/workload/cli/run.go @@ -432,7 +432,7 @@ func runRun(gen workload.Generator, urls []string, dbName string) error { publisher = histogram.CreateUdpPublisher(*individualOperationReceiverAddr) } - metricsExporter, file, err := maybeInitAndCreateExporter() + metricsExporter, file, err := maybeInitAndCreateExporter(gen) if err != nil { return errors.Wrap(err, "error creating metrics exporter") } @@ -666,7 +666,7 @@ func maybeLogRandomSeed(ctx context.Context, gen workload.Generator) { } } -func maybeInitAndCreateExporter() (exporter.Exporter, *os.File, error) { +func maybeInitAndCreateExporter(gen workload.Generator) (exporter.Exporter, *os.File, error) { if *histograms == "" { return nil, nil, nil } @@ -687,6 +687,9 @@ func maybeInitAndCreateExporter() (exporter.Exporter, *os.File, error) { } labels[strings.TrimSpace(parts[0])] = strings.TrimSpace(parts[1]) } + + // Append workload generator name as a tag + labels["workload"] = gen.Meta().Name openMetricsExporter := exporter.OpenMetricsExporter{} openMetricsExporter.SetLabels(&labels) metricsExporter = &openMetricsExporter diff --git a/pkg/workload/histogram/exporter/BUILD.bazel b/pkg/workload/histogram/exporter/BUILD.bazel index 1373cc5c1670..20f9e3e636fd 100644 --- a/pkg/workload/histogram/exporter/BUILD.bazel +++ b/pkg/workload/histogram/exporter/BUILD.bazel @@ -11,6 +11,7 @@ go_library( importpath = "github.com/cockroachdb/cockroach/pkg/workload/histogram/exporter", visibility = ["//visibility:public"], deps = [ + "//pkg/cmd/roachprod-microbench/util", "@com_github_codahale_hdrhistogram//:hdrhistogram", "@com_github_gogo_protobuf//proto", "@com_github_prometheus_client_model//go", diff --git a/pkg/workload/histogram/exporter/openmetrics_exporter.go b/pkg/workload/histogram/exporter/openmetrics_exporter.go index 389240b6d2e2..ec0b70c50c23 100644 --- a/pkg/workload/histogram/exporter/openmetrics_exporter.go +++ b/pkg/workload/histogram/exporter/openmetrics_exporter.go @@ -11,6 +11,7 @@ import ( "strings" "time" + "github.com/cockroachdb/cockroach/pkg/cmd/roachprod-microbench/util" "github.com/codahale/hdrhistogram" "github.com/gogo/protobuf/proto" prom "github.com/prometheus/client_model/go" @@ -94,6 +95,7 @@ func (o *OpenMetricsExporter) Close(f func() error) error { func (o *OpenMetricsExporter) emitGaugeMetric( name string, value float64, timestamp time.Time, ) error { + name = util.SanitizeMetricName(name) gaugeMetric := prom.MetricFamily{ Name: &name, Help: nil, @@ -139,8 +141,10 @@ func (o *OpenMetricsExporter) SetLabels(labels *map[string]string) { var labelValues []*prom.LabelPair for label, value := range *labels { - labelName := sanitizeOpenMetricsLabels(label) - labelValue := value + labelName := util.SanitizeKey(label) + + // In case the label value already has surrounding quotes, we should trim them + labelValue := util.SanitizeValue(strings.Trim(value, "\"")) labelPair := &prom.LabelPair{ Name: &labelName, Value: &labelValue, diff --git a/pkg/workload/histogram/exporter/util.go b/pkg/workload/histogram/exporter/util.go index 1dc33925fde0..6c4b756460b0 100644 --- a/pkg/workload/histogram/exporter/util.go +++ b/pkg/workload/histogram/exporter/util.go @@ -6,26 +6,18 @@ package exporter import ( - "regexp" "time" + "github.com/cockroachdb/cockroach/pkg/cmd/roachprod-microbench/util" "github.com/codahale/hdrhistogram" "github.com/gogo/protobuf/proto" prom "github.com/prometheus/client_model/go" ) var ( - invalidCharRegex = regexp.MustCompile(`[^a-zA-Z0-9_]`) - invalidFirstCharRegex = regexp.MustCompile(`^[^a-zA-Z_]`) - summaryQuantiles = []float64{50, 95, 99, 100} + summaryQuantiles = []float64{50, 95, 99, 100} ) -func sanitizeOpenMetricsLabels(input string) string { - sanitized := invalidCharRegex.ReplaceAllString(input, "_") - sanitized = invalidFirstCharRegex.ReplaceAllString(sanitized, "_") - return sanitized -} - // ConvertHdrHistogramToPrometheusMetricFamily converts a Hdr histogram into MetricFamily which is used // by expfmt.MetricFamilyToOpenMetrics to export openmetrics func ConvertHdrHistogramToPrometheusMetricFamily( @@ -52,8 +44,9 @@ func ConvertHdrHistogramToPrometheusMetricFamily( summary.Quantile = valueQuantiles summary.SampleCount = &totalCount timestampMs := proto.Int64(start.UTC().UnixMilli()) + sanitizedName := util.SanitizeMetricName(*name) return &prom.MetricFamily{ - Name: name, + Name: &sanitizedName, Type: prom.MetricType_SUMMARY.Enum(), Metric: []*prom.Metric{{ Summary: summary,