Skip to content

Commit

Permalink
tf: show prometheus/SD diff on failure instead of dump (istio#37520)
Browse files Browse the repository at this point in the history
* tf: show prometheus diff on failure instead of dump

Example output:
```
   stats.go:130: query "istio_requests_total" returned 2 series, but none matched our query exactly.
    stats.go:130: Series 0
    stats.go:130:   for label "reporter", wanted "destination" but got "source"
    stats.go:130: Series 1
    stats.go:130:   for label "reporter", wanted "destination" but got "source"
    stats.go:130:   for label "destination_app", wanted "server" but got "unknown"
    stats.go:130:   for label "destination_service", wanted "server.echo.svc.cluster.local" but got "server-no-sidecar.echo.svc.cluster.local"
    stats.go:130:   for label "destination_version", wanted "v1" but got "unknown"
    stats.go:130:   for label "destination_service_name", wanted "server" but got "server-no-sidecar"
```

* fix

* Fix outbound part

* tf: show smart diff of SD failures instead of dumping pages of text

* Fix fail early

* add names and sort

* always use fake GCE
  • Loading branch information
howardjohn authored Feb 25, 2022
1 parent bb63751 commit f212b68
Show file tree
Hide file tree
Showing 19 changed files with 518 additions and 219 deletions.
100 changes: 45 additions & 55 deletions pkg/test/framework/components/prometheus/kube.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (
"io"
"os"
"path/filepath"
"sort"
"strings"
"time"

Expand All @@ -29,15 +30,12 @@ import (
kubeApiMeta "k8s.io/apimachinery/pkg/apis/meta/v1"

istioKube "istio.io/istio/pkg/kube"
"istio.io/istio/pkg/test"
"istio.io/istio/pkg/test/env"
"istio.io/istio/pkg/test/framework/components/cluster"
"istio.io/istio/pkg/test/framework/components/istio"
"istio.io/istio/pkg/test/framework/resource"
testKube "istio.io/istio/pkg/test/kube"
"istio.io/istio/pkg/test/scopes"
"istio.io/istio/pkg/test/util/retry"
"istio.io/istio/pkg/test/util/tmpl"
)

const (
Expand All @@ -46,9 +44,6 @@ const (
)

var (
retryTimeout = retry.Timeout(time.Second * 120)
retryDelay = retry.Delay(time.Second * 1)

_ Instance = &kubeComponent{}
_ io.Closer = &kubeComponent{}
)
Expand Down Expand Up @@ -81,7 +76,7 @@ func installPrometheus(ctx resource.Context, ns string) error {
if err := ctx.ConfigKube().ApplyYAMLNoCleanup(ns, yaml); err != nil {
return err
}
ctx.Cleanup(func() {
ctx.ConditionalCleanup(func() {
_ = ctx.ConfigKube().DeleteYAML(ns, yaml)
})
return nil
Expand Down Expand Up @@ -156,54 +151,32 @@ func (c *kubeComponent) APIForCluster(cluster cluster.Cluster) prometheusApiV1.A
return c.api[cluster.Name()]
}

func (c *kubeComponent) Query(cluster cluster.Cluster, format string) (model.Value, error) {
value, err := retry.UntilComplete(func() (interface{}, bool, error) {
var err error
query, err := tmpl.Evaluate(format, map[string]string{})
if err != nil {
return nil, true, err
}

scopes.Framework.Debugf("Query running: %q", query)

v, _, err := c.api[cluster.Name()].Query(context.Background(), query, time.Now())
if err != nil {
return nil, false, fmt.Errorf("error querying Prometheus: %v", err)
}
scopes.Framework.Debugf("Query received: %v", v)
func (c *kubeComponent) Query(cluster cluster.Cluster, query Query) (model.Value, error) {
scopes.Framework.Debugf("Query running: %q", query)

switch v.Type() {
case model.ValScalar, model.ValString:
return v, true, nil
v, _, err := c.api[cluster.Name()].Query(context.Background(), query.String(), time.Now())
if err != nil {
return nil, fmt.Errorf("error querying Prometheus: %v", err)
}
scopes.Framework.Debugf("Query received: %v", v)

case model.ValVector:
value := v.(model.Vector)
if len(value) == 0 {
return nil, false, fmt.Errorf("value not found (query: %q)", query)
}
return v, true, nil
switch v.Type() {
case model.ValScalar, model.ValString:
return v, nil

default:
return nil, true, fmt.Errorf("unhandled value type: %v", v.Type())
case model.ValVector:
value := v.(model.Vector)
if len(value) == 0 {
return nil, fmt.Errorf("value not found (query: %v)", query)
}
}, retryTimeout, retryDelay)

var v model.Value
if value != nil {
v = value.(model.Value)
}
return v, err
}
return v, nil

func (c *kubeComponent) QueryOrFail(t test.Failer, cluster cluster.Cluster, format string) model.Value {
val, err := c.Query(cluster, format)
if err != nil {
t.Fatal(err)
default:
return nil, fmt.Errorf("unhandled value type: %v", v.Type())
}
return val
}

func (c *kubeComponent) QuerySum(cluster cluster.Cluster, query string) (float64, error) {
func (c *kubeComponent) QuerySum(cluster cluster.Cluster, query Query) (float64, error) {
val, err := c.Query(cluster, query)
if err != nil {
return 0, err
Expand All @@ -215,14 +188,6 @@ func (c *kubeComponent) QuerySum(cluster cluster.Cluster, query string) (float64
return got, nil
}

func (c *kubeComponent) QuerySumOrFail(t test.Failer, cluster cluster.Cluster, query string) float64 {
v, err := c.QuerySum(cluster, query)
if err != nil {
t.Fatal("failed QuerySum: %v", err)
}
return v
}

func Sum(val model.Value) (float64, error) {
if val.Type() != model.ValVector {
return 0, fmt.Errorf("value not a model.Vector; was %s", val.Type().String())
Expand All @@ -248,3 +213,28 @@ func (c *kubeComponent) Close() error {
}
return nil
}

type Query struct {
Metric string
Aggregation string
Labels map[string]string
}

func (q Query) String() string {
query := q.Metric + `{`

keys := []string{}
for k := range q.Labels {
keys = append(keys, k)
}
sort.Strings(keys)
for _, k := range keys {
v := q.Labels[k]
query += fmt.Sprintf(`%s=%q,`, k, v)
}
query += "}"
if q.Aggregation != "" {
query = fmt.Sprintf(`%s(%s)`, q.Aggregation, query)
}
return query
}
6 changes: 2 additions & 4 deletions pkg/test/framework/components/prometheus/prometheus.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,12 +31,10 @@ type Instance interface {
APIForCluster(cluster cluster.Cluster) v1.API

// Query Run the provided query against the given cluster
Query(cluster cluster.Cluster, query string) (prom.Value, error)
QueryOrFail(t test.Failer, cluster cluster.Cluster, query string) prom.Value
Query(cluster cluster.Cluster, query Query) (prom.Value, error)

// QuerySum is a help around Query to compute the sum
QuerySum(cluster cluster.Cluster, query string) (float64, error)
QuerySumOrFail(t test.Failer, cluster cluster.Cluster, query string) float64
QuerySum(cluster cluster.Cluster, query Query) (float64, error)
}

type Config struct {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ package sdsegress

import (
"context"
"fmt"
"net/http"
"testing"
"time"
Expand All @@ -33,6 +32,7 @@ import (
"istio.io/istio/pkg/test/framework/components/namespace"
"istio.io/istio/pkg/test/framework/components/prometheus"
"istio.io/istio/pkg/test/util/file"
"istio.io/istio/pkg/test/util/retry"
"istio.io/istio/tests/integration/security/util"
)

Expand Down Expand Up @@ -140,10 +140,17 @@ func applySetupConfig(ctx framework.TestContext, ns namespace.Instance) {
}
}

func getEgressRequestCountOrFail(ctx framework.TestContext, ns namespace.Instance, prom prometheus.Instance) int {
query := fmt.Sprintf("istio_requests_total{destination_app=\"%s\",source_workload_namespace=\"%s\"}",
egressName, ns.Name())
ctx.Helper()

return int(prom.QuerySumOrFail(ctx, ctx.Clusters().Default(), query))
func getEgressRequestCountOrFail(t framework.TestContext, ns namespace.Instance, prom prometheus.Instance) int {
t.Helper()

var res int
retry.UntilSuccessOrFail(t, func() error {
r, err := prom.QuerySum(t.Clusters().Default(), prometheus.Query{Metric: "istio_requests_total", Labels: map[string]string{
"destination_app": egressName,
"source_workload_namespace": ns.Name(),
}})
res = int(r)
return err
})
return res
}
13 changes: 6 additions & 7 deletions tests/integration/telemetry/outboundtrafficpolicy/helper.go
Original file line number Diff line number Diff line change
Expand Up @@ -154,11 +154,10 @@ type TestCase struct {
// prometheus to validate that expected telemetry information was gathered;
// as well as the http response code
type Expected struct {
Metric string
PromQueryFormat string
StatusCode int
Protocol string
RequestHeaders map[string]string
Query prometheus.Query
StatusCode int
Protocol string
RequestHeaders map[string]string
}

// TrafficPolicy is the mode of the outbound traffic policy to use
Expand Down Expand Up @@ -279,8 +278,8 @@ func RunExternalRequest(cases []*TestCase, prometheus prometheus.Instance, mode
},
})

if tc.Expected.Metric != "" {
promtest.ValidateMetric(t, ctx.Clusters().Default(), prometheus, tc.Expected.PromQueryFormat, tc.Expected.Metric, 1)
if tc.Expected.Query.Metric != "" {
promtest.ValidateMetric(t, ctx.Clusters().Default(), prometheus, tc.Expected.Query, 1)
}
})
}
Expand Down
Loading

0 comments on commit f212b68

Please sign in to comment.