From c9eb817e2e20409731bbef163005d274de93a848 Mon Sep 17 00:00:00 2001 From: Abhishek Ranjan Date: Thu, 9 Jan 2025 09:17:00 +0530 Subject: [PATCH] A72: add tracing event for name resolution delay --- clientconn.go | 5 +- stats/opentelemetry/client_metrics.go | 5 +- stats/opentelemetry/e2e_test.go | 164 +++++++++++++++++++++++--- 3 files changed, 157 insertions(+), 17 deletions(-) diff --git a/clientconn.go b/clientconn.go index c28627704be7..8aa4b4f6ca2b 100644 --- a/clientconn.go +++ b/clientconn.go @@ -680,7 +680,7 @@ func (cc *ClientConn) Connect() { // context expires. Returns nil unless the context expires first; otherwise // returns a status error based on the context. func (cc *ClientConn) waitForResolvedAddrs(ctx context.Context) error { - // Set the start time for name resolution if it's not already set + // Set the start time for name resolution if it's not already set. cc.mu.Lock() if !cc.nameResolutionInProgress { cc.nameResolutionStartTime = time.Now() @@ -691,6 +691,9 @@ func (cc *ClientConn) waitForResolvedAddrs(ctx context.Context) error { // This is on the RPC path, so we use a fast path to avoid the // more-expensive "select" below after the resolver has returned once. if cc.firstResolveEvent.HasFired() { + cc.mu.Lock() + cc.nameResolutionDelay = time.Now().Sub(cc.nameResolutionStartTime) + cc.mu.Unlock() return nil } select { diff --git a/stats/opentelemetry/client_metrics.go b/stats/opentelemetry/client_metrics.go index c6cc21057dfc..7dcd3768cf41 100644 --- a/stats/opentelemetry/client_metrics.go +++ b/stats/opentelemetry/client_metrics.go @@ -21,6 +21,7 @@ import ( "sync/atomic" "time" + "go.opentelemetry.io/otel/attribute" otelcodes "go.opentelemetry.io/otel/codes" "go.opentelemetry.io/otel/trace" "google.golang.org/grpc" @@ -194,7 +195,9 @@ func (h *clientStatsHandler) TagRPC(ctx context.Context, info *stats.RPCTagInfo) if h.options.isTracingEnabled() { if info.NameResolutionDelay > 0 { callSpan := trace.SpanFromContext(ctx) - callSpan.AddEvent("Delayed name resolution complete") + callSpan.AddEvent("Delayed name resolution complete", trace.WithAttributes( + attribute.Int64("duration(in ns)", info.NameResolutionDelay.Nanoseconds()), + )) ctx = trace.ContextWithSpan(ctx, callSpan) } ctx, ai = h.traceTagRPC(ctx, info, ai) diff --git a/stats/opentelemetry/e2e_test.go b/stats/opentelemetry/e2e_test.go index 331b3474d7e5..6826e899bc26 100644 --- a/stats/opentelemetry/e2e_test.go +++ b/stats/opentelemetry/e2e_test.go @@ -20,6 +20,7 @@ import ( "context" "fmt" "io" + "net" "testing" "time" @@ -46,6 +47,7 @@ import ( "go.opentelemetry.io/otel/sdk/trace" "go.opentelemetry.io/otel/sdk/trace/tracetest" "google.golang.org/grpc" + "google.golang.org/grpc/connectivity" "google.golang.org/grpc/credentials/insecure" "google.golang.org/grpc/encoding/gzip" experimental "google.golang.org/grpc/experimental/opentelemetry" @@ -58,6 +60,8 @@ import ( testgrpc "google.golang.org/grpc/interop/grpc_testing" testpb "google.golang.org/grpc/interop/grpc_testing" "google.golang.org/grpc/orca" + "google.golang.org/grpc/resolver" + "google.golang.org/grpc/resolver/manual" "google.golang.org/grpc/stats/opentelemetry" expstats "google.golang.org/grpc/stats/opentelemetry/experimental" "google.golang.org/grpc/stats/opentelemetry/internal/testutils" @@ -805,7 +809,16 @@ func (s) TestMetricsAndTracesOptionEnabled(t *testing.T) { name: "grpc.testing.TestService.UnaryCall", spanKind: oteltrace.SpanKindClient.String(), attributes: []attribute.KeyValue{}, - events: []trace.Event{}, + events: []trace.Event{ + { + Name: "Delayed name resolution complete", + Attributes: []attribute.KeyValue{ + { + Key: "duration(in ns)", + }, + }, + }, + }, }, { name: "grpc.testing.TestService.FullDuplexCall", @@ -834,7 +847,16 @@ func (s) TestMetricsAndTracesOptionEnabled(t *testing.T) { name: "grpc.testing.TestService.FullDuplexCall", spanKind: oteltrace.SpanKindClient.String(), attributes: []attribute.KeyValue{}, - events: []trace.Event{}, + events: []trace.Event{ + { + Name: "Delayed name resolution complete", + Attributes: []attribute.KeyValue{ + { + Key: "duration(in ns)", + }, + }, + }, + }, }, { name: "Attempt.grpc.testing.TestService.FullDuplexCall", @@ -915,9 +937,6 @@ func (s) TestMetricsAndTracesOptionEnabled(t *testing.T) { if got, want := att.Key, wantSI[index].events[eventIdx].Attributes[idx].Key; got != want { t.Errorf("Got attribute key for span name %q with event name %v, as %v, want %v", span.Name, event.Name, got, want) } - if got, want := att.Value, wantSI[index].events[eventIdx].Attributes[idx].Value; got != want { - t.Errorf("Got attribute value for span name %v with event name %v, as %v, want %v", span.Name, event.Name, got, want) - } } } } @@ -1090,7 +1109,16 @@ func (s) TestSpan(t *testing.T) { name: "grpc.testing.TestService.UnaryCall", spanKind: oteltrace.SpanKindClient.String(), attributes: []attribute.KeyValue{}, - events: []trace.Event{}, + events: []trace.Event{ + { + Name: "Delayed name resolution complete", + Attributes: []attribute.KeyValue{ + { + Key: "duration(in ns)", + }, + }, + }, + }, }, { name: "grpc.testing.TestService.FullDuplexCall", @@ -1119,7 +1147,16 @@ func (s) TestSpan(t *testing.T) { name: "grpc.testing.TestService.FullDuplexCall", spanKind: oteltrace.SpanKindClient.String(), attributes: []attribute.KeyValue{}, - events: []trace.Event{}, + events: []trace.Event{ + { + Name: "Delayed name resolution complete", + Attributes: []attribute.KeyValue{ + { + Key: "duration(in ns)", + }, + }, + }, + }, }, { name: "Attempt.grpc.testing.TestService.FullDuplexCall", @@ -1200,9 +1237,6 @@ func (s) TestSpan(t *testing.T) { if got, want := att.Key, wantSI[index].events[eventIdx].Attributes[idx].Key; got != want { t.Errorf("Got attribute key for span name %q with event name %v, as %v, want %v", span.Name, event.Name, got, want) } - if got, want := att.Value, wantSI[index].events[eventIdx].Attributes[idx].Value; got != want { - t.Errorf("Got attribute value for span name %v with event name %v, as %v, want %v", span.Name, event.Name, got, want) - } } } } @@ -1376,7 +1410,16 @@ func (s) TestSpan_WithW3CContextPropagator(t *testing.T) { name: "grpc.testing.TestService.UnaryCall", spanKind: oteltrace.SpanKindClient.String(), attributes: []attribute.KeyValue{}, - events: []trace.Event{}, + events: []trace.Event{ + { + Name: "Delayed name resolution complete", + Attributes: []attribute.KeyValue{ + { + Key: "duration(in ns)", + }, + }, + }, + }, }, { name: "grpc.testing.TestService.FullDuplexCall", @@ -1405,7 +1448,16 @@ func (s) TestSpan_WithW3CContextPropagator(t *testing.T) { name: "grpc.testing.TestService.FullDuplexCall", spanKind: oteltrace.SpanKindClient.String(), attributes: []attribute.KeyValue{}, - events: []trace.Event{}, + events: []trace.Event{ + { + Name: "Delayed name resolution complete", + Attributes: []attribute.KeyValue{ + { + Key: "duration(in ns)", + }, + }, + }, + }, }, { name: "Attempt.grpc.testing.TestService.FullDuplexCall", @@ -1485,9 +1537,6 @@ func (s) TestSpan_WithW3CContextPropagator(t *testing.T) { if got, want := att.Key, wantSI[index].events[eventIdx].Attributes[idx].Key; got != want { t.Errorf("Got attribute key for span name %q with event name %v, as %v, want %v", span.Name, event.Name, got, want) } - if got, want := att.Value, wantSI[index].events[eventIdx].Attributes[idx].Value; got != want { - t.Errorf("Got attribute value for span name %v with event name %v, as %v, want %v", span.Name, event.Name, got, want) - } } } } @@ -1536,3 +1585,88 @@ func (s) TestMetricsAndTracesDisabled(t *testing.T) { t.Fatalf("stream.Recv received an unexpected error: %v, expected an EOF error", err) } } + +// TestEventForNameResolutionDelay verifies that an event is emitted for name +// resolution delay during RPC calls. +func (s) TestEventForNameResolutionDelay(t *testing.T) { + mo, _ := defaultMetricsOptions(t, nil) + // Using defaultTraceOptions to set up OpenTelemetry with an in-memory exporter + to, spanExporter := defaultTraceOptions(t) + // Set the W3CContextPropagator as part of TracingOptions. + to.TextMapPropagator = propagation.NewCompositeTextMapPropagator(propagation.TraceContext{}) + otelOptions := opentelemetry.Options{} + if mo != nil { + otelOptions.MetricsOptions = *mo + } + if to != nil { + otelOptions.TraceOptions = *to + } + + lis1, err := net.Listen("tcp", "localhost:0") + if err != nil { + t.Fatalf("Error while listening. Err: %v", err) + } + s1 := grpc.NewServer(opentelemetry.ServerOption(otelOptions)) + defer s1.Stop() + + // Start the server with OpenTelemetry options + ts := &stubserver.StubServer{ + UnaryCallF: func(_ context.Context, in *testpb.SimpleRequest) (*testpb.SimpleResponse, error) { + return &testpb.SimpleResponse{Payload: &testpb.Payload{ + Body: make([]byte, len(in.GetPayload().GetBody())), + }}, nil + }, + FullDuplexCallF: func(stream testgrpc.TestService_FullDuplexCallServer) error { + for { + _, err := stream.Recv() + if err == io.EOF { + return nil + } + } + }, + } + + testgrpc.RegisterTestServiceServer(s1, ts) + go s1.Serve(lis1) + + ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) + defer cancel() + r := manual.NewBuilderWithScheme("whatever") + r.InitialState(resolver.State{Addresses: []resolver.Address{ + {Addr: lis1.Addr().String()}, + }}) + cc, err := grpc.DialContext(ctx, r.Scheme()+":///", grpc.WithResolvers(r), grpc.WithTransportCredentials(insecure.NewCredentials()), opentelemetry.DialOption(otelOptions)) + if err != nil { + t.Fatalf("Error creating client: %v", err) + } + defer cc.Close() + + client := testgrpc.NewTestServiceClient(cc) + + t.Log("Waiting for the ClientConn to enter READY state.") + itestutils.AwaitState(ctx, t, cc, connectivity.Ready) + + t.Log("Creating unary RPC to server.") + _, err = client.UnaryCall(ctx, &testpb.SimpleRequest{Payload: &testpb.Payload{ + Body: make([]byte, 10000), + }}) + if err != nil { + t.Fatalf("UnaryCall(_) = _, %v; want _, nil", err) + } + + // Get the spans from the exporter + spans := spanExporter.GetSpans() + if got, want := len(spans), 3; got != want { + t.Fatalf("Got %d spans, want %d", got, want) + } + + // Verify that events length for client should be 1, which will be + // of name resolution event. + if got, want := len(spans[2].Events), 1; got != want { + t.Fatalf("Got %d spans, want %d", got, want) + } + + if got, want := spans[2].Events[0].Name, "Delayed name resolution complete"; got != want { + t.Fatalf("Got %s spans, want %s", got, want) + } +}