From 72d63e365a3ab53090a1bae1e03237a15d72f814 Mon Sep 17 00:00:00 2001 From: Matej Gera <38492574+matej-g@users.noreply.github.com> Date: Thu, 17 Mar 2022 21:47:57 +0100 Subject: [PATCH] Tracing: Migrate Google Cloud (Stackdriver) client to OpenTelemetry (#4838) * Add migration utilities - Add method to create bridge tracer - Implement a sampler which enables us to force tracing Signed-off-by: Matej Gera * Migrate and rename Stackdriver to Google Cloud - Rename to Google Cloud, keep it backwards compatible - Refactor and move to OTEL exporter - Adjust factory to use bridge tracer for this provider Signed-off-by: Matej Gera * Adjust HTTP middleware - to ensure force tracing / populate trace ID header works for the bridge tracer as well Signed-off-by: Matej Gera * Update documentation Signed-off-by: Matej Gera * Copyright file headers Signed-off-by: Matej Gera * Fix doc formatting Signed-off-by: Matej Gera * Upgrade OTEL to 1.3.0 Signed-off-by: Matej Gera * Wrap bridge tracer to make propagation work - Currently, bridge tracer supports only HTTP headers as a carrier.However, our instrumentation e.g. for gRPC uses metatada.MD as a carrier instead, breaking the propagatin. This fix works around it by 'converting' the carrier to HTTP header. See code docs for details. Signed-off-by: Matej Gera * Remove leftover go.mod replace Signed-off-by: Matej Gera * Update version; fix constants naming Signed-off-by: Matej Gera --- docs/tracing.md | 10 +- examples/interactive/interactive_test.go | 2 +- go.mod | 12 +- go.sum | 47 ++++- pkg/tracing/client/factory.go | 27 +-- pkg/tracing/google_cloud/google_cloud.go | 85 +++++++++ pkg/tracing/google_cloud/google_cloud_test.go | 164 +++++++++++++++++ pkg/tracing/grpc.go | 2 + pkg/tracing/http.go | 19 +- pkg/tracing/migration/bridge.go | 129 ++++++++++++++ pkg/tracing/migration/sampler.go | 47 +++++ pkg/tracing/stackdriver/tracer_test.go | 165 ------------------ scripts/cfggen/main.go | 10 +- 13 files changed, 529 insertions(+), 190 deletions(-) create mode 100644 pkg/tracing/google_cloud/google_cloud.go create mode 100644 pkg/tracing/google_cloud/google_cloud_test.go create mode 100644 pkg/tracing/migration/bridge.go create mode 100644 pkg/tracing/migration/sampler.go delete mode 100644 pkg/tracing/stackdriver/tracer_test.go diff --git a/docs/tracing.md b/docs/tracing.md index 59a4b30b52..df8558d884 100644 --- a/docs/tracing.md +++ b/docs/tracing.md @@ -97,12 +97,16 @@ config: traceid_128bit: false ``` -### Stackdriver +### Google Cloud (formerly Stackdriver) Client for https://cloud.google.com/trace/ tracing. -```yaml mdox-exec="go run scripts/cfggen/main.go --name=stackdriver.Config" -type: STACKDRIVER +You will also need to ensure that the authentication with the API is working, follow [this guide](https://cloud.google.com/trace/docs/setup/go-ot#configure_your_platform) to set it up. + +*Note:* The `type` in the configuration below can have either value `GOOGLE_CLOUD` or `STACKDRIVER` - this is to ensure backwards compatibility. + +```yaml mdox-exec="go run scripts/cfggen/main.go --name=google_cloud.Config" +type: GOOGLE_CLOUD config: service_name: "" project_id: "" diff --git a/examples/interactive/interactive_test.go b/examples/interactive/interactive_test.go index 182fc92290..a635384cf4 100644 --- a/examples/interactive/interactive_test.go +++ b/examples/interactive/interactive_test.go @@ -143,7 +143,7 @@ func TestReadOnlyThanosSetup(t *testing.T) { testutil.Ok(t, e2e.StartAndWaitReady(j)) jaegerConfig, err := yaml.Marshal(tracingclient.TracingConfig{ - Type: tracingclient.JAEGER, + Type: tracingclient.Jaeger, Config: jaeger.Config{ ServiceName: "thanos", SamplerType: "const", diff --git a/go.mod b/go.mod index d1c2b2400b..8d11fb391a 100644 --- a/go.mod +++ b/go.mod @@ -7,6 +7,7 @@ require ( github.com/Azure/azure-storage-blob-go v0.13.0 github.com/Azure/go-autorest/autorest/adal v0.9.17 github.com/Azure/go-autorest/autorest/azure/auth v0.5.8 + github.com/GoogleCloudPlatform/opentelemetry-operations-go/exporter/trace v1.0.0 github.com/NYTimes/gziphandler v1.1.1 github.com/alecthomas/units v0.0.0-20210927113745-59d0afb8317a github.com/alicebob/miniredis/v2 v2.14.3 @@ -74,6 +75,11 @@ require ( github.com/weaveworks/common v0.0.0-20210913144402-035033b78a78 go.elastic.co/apm v1.11.0 go.elastic.co/apm/module/apmot v1.11.0 + go.opentelemetry.io/contrib/propagators/ot v1.4.0 + go.opentelemetry.io/otel v1.5.0 + go.opentelemetry.io/otel/bridge/opentracing v1.5.0 + go.opentelemetry.io/otel/sdk v1.5.0 + go.opentelemetry.io/otel/trace v1.5.0 go.uber.org/atomic v1.9.0 go.uber.org/automaxprocs v1.4.0 go.uber.org/goleak v1.1.12 @@ -132,8 +138,10 @@ require ( github.com/elastic/go-windows v1.0.1 // indirect github.com/envoyproxy/go-control-plane v0.10.1 // indirect github.com/envoyproxy/protoc-gen-validate v0.6.2 // indirect - github.com/felixge/httpsnoop v1.0.1 // indirect + github.com/felixge/httpsnoop v1.0.2 // indirect github.com/go-logfmt/logfmt v0.5.1 // indirect + github.com/go-logr/logr v1.2.2 // indirect + github.com/go-logr/stdr v1.2.2 // indirect github.com/go-openapi/analysis v0.20.0 // indirect github.com/go-openapi/errors v0.20.0 // indirect github.com/go-openapi/jsonpointer v0.19.5 // indirect @@ -151,7 +159,7 @@ require ( github.com/gogo/googleapis v1.4.0 // indirect github.com/golang-jwt/jwt/v4 v4.0.0 // indirect github.com/golang/protobuf v1.5.2 // indirect - github.com/google/go-cmp v0.5.6 // indirect + github.com/google/go-cmp v0.5.7 // indirect github.com/google/go-querystring v1.0.0 // indirect github.com/google/pprof v0.0.0-20211008130755-947d60d73cc0 // indirect github.com/google/uuid v1.2.0 // indirect diff --git a/go.sum b/go.sum index a27aef6d50..bb4f8b1440 100644 --- a/go.sum +++ b/go.sum @@ -24,6 +24,7 @@ cloud.google.com/go v0.81.0/go.mod h1:mk/AM35KwGk/Nm2YSeZbxXdrNK3KZOYHmLkOqC2V6E cloud.google.com/go v0.83.0/go.mod h1:Z7MJUsANfY0pYPdw0lbnivPx4/vhy/e2FEkSkF7vAVY= cloud.google.com/go v0.84.0/go.mod h1:RazrYuxIK6Kb7YrzzhPoLmCVzl7Sup4NrbKPg8KHSUM= cloud.google.com/go v0.87.0/go.mod h1:TpDYlFy7vuLzZMMZ+B6iRiELaY7z/gJPaqbMx6mlWcY= +cloud.google.com/go v0.88.0/go.mod h1:dnKwfYbP9hQhefiUvpbcAyoGSHUrOxR20JVElLiUvEY= cloud.google.com/go v0.90.0/go.mod h1:kRX0mNRHe0e2rC6oNakvwQqzyDmg57xJ+SZU1eT2aDQ= cloud.google.com/go v0.92.2/go.mod h1:8utlLll2EF5XMAV15woO4lSbWQlk8rer9aLOfLh7+YI= cloud.google.com/go v0.93.3/go.mod h1:8utlLll2EF5XMAV15woO4lSbWQlk8rer9aLOfLh7+YI= @@ -123,6 +124,8 @@ github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03 github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo= github.com/DATA-DOG/go-sqlmock v1.4.1/go.mod h1:f/Ixk793poVmq4qj/V1dPUg2JEAKC73Q5eFN3EC/SaM= github.com/DataDog/datadog-go v3.2.0+incompatible/go.mod h1:LButxg5PwREeZtORoXG3tL4fMGNddJ+vMq1mwgfaqoQ= +github.com/GoogleCloudPlatform/opentelemetry-operations-go/exporter/trace v1.0.0 h1:38fNtfhHY6bs22b/D6+hDzO6JR0rDzpGPD36dY2uPL4= +github.com/GoogleCloudPlatform/opentelemetry-operations-go/exporter/trace v1.0.0/go.mod h1:jE23wM1jvwSKgdGcoOkj5j9n1VWtncW36pL2bK1JU+0= github.com/HdrHistogram/hdrhistogram-go v1.1.0/go.mod h1:yDgFjdqOqDEKOvasDdhWNXYg9BVp4O+o5f6V/ehm6Oo= github.com/HdrHistogram/hdrhistogram-go v1.1.2 h1:5IcZpTvzydCQeHzK4Ef/D5rrSqwxob0t8PQPMybUNFM= github.com/HdrHistogram/hdrhistogram-go v1.1.2/go.mod h1:yDgFjdqOqDEKOvasDdhWNXYg9BVp4O+o5f6V/ehm6Oo= @@ -585,8 +588,9 @@ github.com/fatih/structtag v1.1.0 h1:6j4mUV/ES2duvnAzKMFkN6/A5mCaNYPD3xfbAkLLOF8 github.com/fatih/structtag v1.1.0/go.mod h1:mBJUNpUnHmRKrKlQQlmCrh5PuhftFbNv8Ys4/aAZl94= github.com/felixge/fgprof v0.9.1 h1:E6FUJ2Mlv043ipLOCFqo8+cHo9MhQ203E2cdEK/isEs= github.com/felixge/fgprof v0.9.1/go.mod h1:7/HK6JFtFaARhIljgP2IV8rJLIoHDoOYoUphsnGvqxE= -github.com/felixge/httpsnoop v1.0.1 h1:lvB5Jl89CsZtGIWuTcDM1E/vkVs49/Ml7JJe07l8SPQ= github.com/felixge/httpsnoop v1.0.1/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U= +github.com/felixge/httpsnoop v1.0.2 h1:+nS9g82KMXccJ/wp0zyRW9ZBHFETmMGtkk+2CTTrW4o= +github.com/felixge/httpsnoop v1.0.2/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U= github.com/fogleman/gg v1.2.1-0.20190220221249-0403632d5b90/go.mod h1:R/bRT+9gY/C5z7JzPU0zXsXHKM4/ayA+zqcVNZzPa1k= github.com/form3tech-oss/jwt-go v3.2.2+incompatible/go.mod h1:pbq4aXjuKjdthFRnoDwaVPLA+WlJuPGy+QneDUgJi2k= github.com/form3tech-oss/jwt-go v3.2.3+incompatible/go.mod h1:pbq4aXjuKjdthFRnoDwaVPLA+WlJuPGy+QneDUgJi2k= @@ -632,6 +636,10 @@ github.com/go-logfmt/logfmt v0.5.0/go.mod h1:wCYkCAKZfumFQihp8CzCvQ3paCTfi41vtzG github.com/go-logfmt/logfmt v0.5.1 h1:otpy5pqBCBZ1ng9RQ0dPu4PN7ba75Y/aA+UpowDyNVA= github.com/go-logfmt/logfmt v0.5.1/go.mod h1:WYhtIu8zTZfxdn5+rREduYbwxfcBr/Vr6KEVveWlfTs= github.com/go-logr/logr v0.2.0/go.mod h1:z6/tIYblkpsD+a4lm/fGIIU9mZ+XfAiaFtq7xTgseGU= +github.com/go-logr/logr v1.2.2 h1:ahHml/yUpnlb96Rp8HCvtYVPY8ZYpxq3g7UYchIYwbs= +github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= +github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= +github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE= github.com/go-openapi/analysis v0.0.0-20180825180245-b006789cd277/go.mod h1:k70tL6pCuVxPJOHXQ+wIac1FUrvNkHolPie/cLEU6hI= github.com/go-openapi/analysis v0.17.0/go.mod h1:IowGgpVeD0vNm45So8nr+IcQ3pxVtpRoBWb8PVZO0ik= github.com/go-openapi/analysis v0.18.0/go.mod h1:IowGgpVeD0vNm45So8nr+IcQ3pxVtpRoBWb8PVZO0ik= @@ -887,8 +895,9 @@ github.com/google/go-cmp v0.5.2/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/ github.com/google/go-cmp v0.5.3/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.4/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= -github.com/google/go-cmp v0.5.6 h1:BKbKCqvP6I+rmFHt06ZmyQtvB8xAkWdhFyr0ZUNZcxQ= github.com/google/go-cmp v0.5.6/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.5.7 h1:81/ik6ipDQS2aGcBfIN5dHDB36BwrStyeAQquSYCV4o= +github.com/google/go-cmp v0.5.7/go.mod h1:n+brtR0CgQNWTVd5ZUFpTBC8YFBDLK/h/bpaJ8/DtOE= github.com/google/go-github v17.0.0+incompatible/go.mod h1:zLgOLi98H3fifZn+44m+umXrS52loVEgC2AApnigrVQ= github.com/google/go-querystring v1.0.0 h1:Xkwi/a1rcvNg1PPYe5vI8GbeBY/jrVuDX5ASuANWTrk= github.com/google/go-querystring v1.0.0/go.mod h1:odCYkC5MyYFN7vkCjXpyrEuKhc/BUO6wN/zVPAxq5ck= @@ -916,6 +925,7 @@ github.com/google/pprof v0.0.0-20210122040257-d980be63207e/go.mod h1:kpwsk12EmLe github.com/google/pprof v0.0.0-20210226084205-cbba55b83ad5/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE= github.com/google/pprof v0.0.0-20210601050228-01bbb1931b22/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE= github.com/google/pprof v0.0.0-20210609004039-a478d1d731e9/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE= +github.com/google/pprof v0.0.0-20210715191844-86eeefc3e471/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE= github.com/google/pprof v0.0.0-20210720184732-4bb14d4b1be1/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE= github.com/google/pprof v0.0.0-20211008130755-947d60d73cc0 h1:zHs+jv3LO743/zFGcByu2KmpbliCU2AhjcGgrdTwSG4= github.com/google/pprof v0.0.0-20211008130755-947d60d73cc0/go.mod h1:KgnwoLYCZ8IQu3XUZ8Nc/bM9CCZFOyjUNOSygVozoDg= @@ -938,6 +948,8 @@ github.com/googleapis/gnostic v0.4.1/go.mod h1:LRhVm6pbyptWbWbuZ38d1eyptfvIytN3i github.com/googleapis/gnostic v0.5.1/go.mod h1:6U4PtQXGIEt/Z3h5MAT7FNofLnw9vXk2cUuW7uA/OeU= github.com/googleapis/gnostic v0.5.5 h1:9fHAtK0uDfpveeqqo1hkEZJcFvYXAiCN3UutL8F9xHw= github.com/googleapis/gnostic v0.5.5/go.mod h1:7+EbHbldMins07ALC74bsA81Ovc97DwqyJO1AENw9kA= +github.com/googleinterns/cloud-operations-api-mock v0.0.0-20200709193332-a1e58c29bdd3 h1:eHv/jVY/JNop1xg2J9cBb4EzyMpWZoNCP1BslSAIkOI= +github.com/googleinterns/cloud-operations-api-mock v0.0.0-20200709193332-a1e58c29bdd3/go.mod h1:h/KNeRx7oYU4SpA4SoY7W2/NxDKEEVuwA6j9A27L4OI= github.com/gophercloud/gophercloud v0.6.0/go.mod h1:GICNByuaEBibcjmjvI7QvYJSZEbGkcYwAR7EZK2WMqM= github.com/gophercloud/gophercloud v0.12.0/go.mod h1:gmC5oQqMDOMO1t1gq5DquX/yAU808e/4mzjjDA76+Ss= github.com/gophercloud/gophercloud v0.13.0/go.mod h1:VX0Ibx85B60B5XOrZr6kaNwrmPUzcmMpwxvQ1WQIIWM= @@ -1818,7 +1830,32 @@ go.opencensus.io v0.22.4/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw= go.opencensus.io v0.22.5/go.mod h1:5pWMHQbX5EPX2/62yrJeAkowc+lfs/XD7Uxpq3pI6kk= go.opencensus.io v0.23.0 h1:gqCw0LfLxScz8irSi8exQc7fyQ0fKQU/qnC/X8+V/1M= go.opencensus.io v0.23.0/go.mod h1:XItmlyltB5F7CS4xOC1DcqMoFqwtC6OG2xF7mCv7P7E= +go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.24.0 h1:qW6j1kJU24yo2xIu16Py4m4AXn1dd+s2uKllGnTFAm0= +go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.24.0/go.mod h1:7W3JSDYTtH3qKKHrS1fMiwLtK7iZFLPq1+7htfspX/E= +go.opentelemetry.io/contrib/propagators/ot v1.4.0 h1:sHp8P5+xmMORvsgKjIPPX4U97JUgSqY4xPWa6ncF1PA= +go.opentelemetry.io/contrib/propagators/ot v1.4.0/go.mod h1:FivzsGJqC7ND++UUOifWfkiuEOFXtVQ3fh2ZkqIJ9X4= go.opentelemetry.io/otel v0.11.0/go.mod h1:G8UCk+KooF2HLkgo8RHX9epABH/aRGYET7gQOqBVdB0= +go.opentelemetry.io/otel v1.0.0-RC3/go.mod h1:Ka5j3ua8tZs4Rkq4Ex3hwgBgOchyPVq5S6P2lz//nKQ= +go.opentelemetry.io/otel v1.0.0/go.mod h1:AjRVh9A5/5DE7S+mZtTR6t8vpKKryam+0lREnfmS4cg= +go.opentelemetry.io/otel v1.0.1/go.mod h1:OPEOD4jIT2SlZPMmwT6FqZz2C0ZNdQqiWcoK6M0SNFU= +go.opentelemetry.io/otel v1.4.0/go.mod h1:jeAqMFKy2uLIxCtKxoFj0FAL5zAPKQagc3+GtBWakzk= +go.opentelemetry.io/otel v1.5.0 h1:DhCU8oR2sJH9rfnwPdoV/+BJ7UIN5kXHL8DuSGrPU8E= +go.opentelemetry.io/otel v1.5.0/go.mod h1:Jm/m+rNp/z0eqJc74H7LPwQ3G87qkU/AnnAydAjSAHk= +go.opentelemetry.io/otel/bridge/opentracing v1.5.0 h1:fOaCCGOLhDRea4Hv+P+Z8d4JtPEf3SSuNJFT4diV5v8= +go.opentelemetry.io/otel/bridge/opentracing v1.5.0/go.mod h1:Pci42D1Wz/eZzWeKwGPgqu89bQeak3DdIeZhzGNFu8s= +go.opentelemetry.io/otel/internal/metric v0.23.0 h1:mPfzm9Iqhw7G2nDBmUAjFTfPqLZPbOW2k7QI57ITbaI= +go.opentelemetry.io/otel/internal/metric v0.23.0/go.mod h1:z+RPiDJe30YnCrOhFGivwBS+DU1JU/PiLKkk4re2DNY= +go.opentelemetry.io/otel/metric v0.23.0 h1:mYCcDxi60P4T27/0jchIDFa1WHEfQeU3zH9UEMpnj2c= +go.opentelemetry.io/otel/metric v0.23.0/go.mod h1:G/Nn9InyNnIv7J6YVkQfpc0JCfKBNJaERBGw08nqmVQ= +go.opentelemetry.io/otel/sdk v1.0.1/go.mod h1:HrdXne+BiwsOHYYkBE5ysIcv2bvdZstxzmCQhxTcZkI= +go.opentelemetry.io/otel/sdk v1.5.0 h1:QKhWBbcOC9fDCZKCfPFjWTWpfIlJR+i9xiUDYrLVmZs= +go.opentelemetry.io/otel/sdk v1.5.0/go.mod h1:CU4J1v+7iEljnm1G14QjdFWOXUyYLHVh0Lh+/BTYyFg= +go.opentelemetry.io/otel/trace v1.0.0-RC3/go.mod h1:VUt2TUYd8S2/ZRX09ZDFZQwn2RqfMB5MzO17jBojGxo= +go.opentelemetry.io/otel/trace v1.0.0/go.mod h1:PXTWqayeFUlJV1YDNhsJYB184+IvAH814St6o6ajzIs= +go.opentelemetry.io/otel/trace v1.0.1/go.mod h1:5g4i4fKLaX2BQpSBsxw8YYcgKpMMSW3x7ZTuYBr3sUk= +go.opentelemetry.io/otel/trace v1.4.0/go.mod h1:uc3eRsqDfWs9R7b92xbQbU42/eTNz4N+gLP8qJCi4aE= +go.opentelemetry.io/otel/trace v1.5.0 h1:AKQZ9zJsBRFAp7zLdyGNkqG2rToCDIt3i5tcLzQlbmU= +go.opentelemetry.io/otel/trace v1.5.0/go.mod h1:sq55kfhjXYr1zVSyexg0w1mpa03AYXR5eyTkB9NPPdE= go.opentelemetry.io/proto/otlp v0.7.0/go.mod h1:PqfVotwruBrMGOCsRd/89rSnXhoiJIqeYNgFYFoEGnI= go.uber.org/atomic v1.3.2/go.mod h1:gD2HeocX3+yG+ygLZcrzQJaqmWj9AIm7n08wl/qW/PE= go.uber.org/atomic v1.4.0/go.mod h1:gD2HeocX3+yG+ygLZcrzQJaqmWj9AIm7n08wl/qW/PE= @@ -2005,6 +2042,7 @@ golang.org/x/net v0.0.0-20210503060351-7fd8e65b6420/go.mod h1:9nx3DQGgdP8bBQD5qx golang.org/x/net v0.0.0-20210520170846-37e1c6afe023/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= golang.org/x/net v0.0.0-20210525063256-abc453219eb5/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= golang.org/x/net v0.0.0-20210614182718-04defd469f4e/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= +golang.org/x/net v0.0.0-20210716203947-853a461950ff/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= golang.org/x/net v0.0.0-20210726213435-c6fcb2dbf985/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= golang.org/x/net v0.0.0-20210805182204-aaa1db679c0d/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= golang.org/x/net v0.0.0-20210813160813-60bc85c4be6d/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= @@ -2161,6 +2199,7 @@ golang.org/x/sys v0.0.0-20210324051608-47abb6519492/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20210330210617-4fbd30eecc44/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210403161142-5e06dd20ab57/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210423185535-09eb48e85fd7/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210426230700-d19ff857e887/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210503080704-8803ae5d1324/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210510120138-977fb7262007/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= @@ -2273,6 +2312,7 @@ golang.org/x/tools v0.0.0-20200513201620-d5fe73897c97/go.mod h1:EkVYQZoAsY45+roY golang.org/x/tools v0.0.0-20200515010526-7d3b6ebf133d/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= golang.org/x/tools v0.0.0-20200618134242-20370b0cb4b2/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= +golang.org/x/tools v0.0.0-20200701151220-7cb253f4c4f8/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= golang.org/x/tools v0.0.0-20200721032237-77f530d86f9a/go.mod h1:njjCfa9FT2d7l9Bc6FUM5FLjQPp3cFF28FI3qnDFljA= golang.org/x/tools v0.0.0-20200729194436-6467de6f59a7/go.mod h1:njjCfa9FT2d7l9Bc6FUM5FLjQPp3cFF28FI3qnDFljA= golang.org/x/tools v0.0.0-20200804011535-6c149bb5ef0d/go.mod h1:njjCfa9FT2d7l9Bc6FUM5FLjQPp3cFF28FI3qnDFljA= @@ -2388,6 +2428,7 @@ google.golang.org/genproto v0.0.0-20200511104702-f5ebc3bea380/go.mod h1:55QSHmfG google.golang.org/genproto v0.0.0-20200513103714-09dca8ec2884/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= google.golang.org/genproto v0.0.0-20200515170657-fc4c6c6a6587/go.mod h1:YsZOwe1myG/8QRHRsmBRE1LrgQY60beZKjly0O1fX9U= google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013/go.mod h1:NbSheEEYHJ7i3ixzK3sjbqSGDJWnxyFXZblF3eUsNvo= +google.golang.org/genproto v0.0.0-20200605102947-12044bf5ea91/go.mod h1:jDfRM7FcilCzHH/e9qn6dsT145K34l5v+OpcnNgKAAA= google.golang.org/genproto v0.0.0-20200618031413-b414f8b61790/go.mod h1:jDfRM7FcilCzHH/e9qn6dsT145K34l5v+OpcnNgKAAA= google.golang.org/genproto v0.0.0-20200710124503-20a17af7bd0e/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no= google.golang.org/genproto v0.0.0-20200724131911-43cab4749ae7/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no= @@ -2417,6 +2458,8 @@ google.golang.org/genproto v0.0.0-20210608205507-b6d2f5bf0d7d/go.mod h1:UODoCrxH google.golang.org/genproto v0.0.0-20210624195500-8bfb893ecb84/go.mod h1:SzzZ/N+nwJDaO1kznhnlzqS8ocJICar6hYhVyhi++24= google.golang.org/genproto v0.0.0-20210713002101-d411969a0d9a/go.mod h1:AxrInvYm1dci+enl5hChSFPOmmUF1+uAa/UsgNRWd7k= google.golang.org/genproto v0.0.0-20210716133855-ce7ef5c701ea/go.mod h1:AxrInvYm1dci+enl5hChSFPOmmUF1+uAa/UsgNRWd7k= +google.golang.org/genproto v0.0.0-20210721163202-f1cecdd8b78a/go.mod h1:ob2IJxKrgPT52GcgX759i1sleT07tiKowYBGbczaW48= +google.golang.org/genproto v0.0.0-20210722135532-667f2b7c528f/go.mod h1:ob2IJxKrgPT52GcgX759i1sleT07tiKowYBGbczaW48= google.golang.org/genproto v0.0.0-20210728212813-7823e685a01f/go.mod h1:ob2IJxKrgPT52GcgX759i1sleT07tiKowYBGbczaW48= google.golang.org/genproto v0.0.0-20210805201207-89edb61ffb67/go.mod h1:ob2IJxKrgPT52GcgX759i1sleT07tiKowYBGbczaW48= google.golang.org/genproto v0.0.0-20210813162853-db860fec028c/go.mod h1:cFeNkxwySK631ADgubI+/XFU/xp8FD5KIVV4rj8UC5w= diff --git a/pkg/tracing/client/factory.go b/pkg/tracing/client/factory.go index 3de07746be..455c3afc31 100644 --- a/pkg/tracing/client/factory.go +++ b/pkg/tracing/client/factory.go @@ -16,18 +16,20 @@ import ( "gopkg.in/yaml.v2" "github.com/thanos-io/thanos/pkg/tracing/elasticapm" + "github.com/thanos-io/thanos/pkg/tracing/google_cloud" "github.com/thanos-io/thanos/pkg/tracing/jaeger" "github.com/thanos-io/thanos/pkg/tracing/lightstep" - "github.com/thanos-io/thanos/pkg/tracing/stackdriver" + "github.com/thanos-io/thanos/pkg/tracing/migration" ) type TracingProvider string const ( - STACKDRIVER TracingProvider = "STACKDRIVER" - JAEGER TracingProvider = "JAEGER" - ELASTIC_APM TracingProvider = "ELASTIC_APM" - LIGHTSTEP TracingProvider = "LIGHTSTEP" + Stackdriver TracingProvider = "STACKDRIVER" + GoogleCloud TracingProvider = "GOOGLE_CLOUD" + Jaeger TracingProvider = "JAEGER" + ElasticAPM TracingProvider = "ELASTIC_APM" + Lightstep TracingProvider = "LIGHTSTEP" ) type TracingConfig struct { @@ -53,13 +55,18 @@ func NewTracer(ctx context.Context, logger log.Logger, metrics *prometheus.Regis } switch strings.ToUpper(string(tracingConf.Type)) { - case string(STACKDRIVER): - return stackdriver.NewTracer(ctx, logger, config) - case string(JAEGER): + case string(Stackdriver), string(GoogleCloud): + tracerProvider, err := google_cloud.NewTracerProvider(ctx, logger, config) + if err != nil { + return nil, nil, err + } + tracer, closerFunc := migration.Bridge(tracerProvider, logger) + return tracer, closerFunc, nil + case string(Jaeger): return jaeger.NewTracer(ctx, logger, metrics, config) - case string(ELASTIC_APM): + case string(ElasticAPM): return elasticapm.NewTracer(config) - case string(LIGHTSTEP): + case string(Lightstep): return lightstep.NewTracer(ctx, config) default: return nil, nil, errors.Errorf("tracing with type %s is not supported", tracingConf.Type) diff --git a/pkg/tracing/google_cloud/google_cloud.go b/pkg/tracing/google_cloud/google_cloud.go new file mode 100644 index 0000000000..29fb758a6c --- /dev/null +++ b/pkg/tracing/google_cloud/google_cloud.go @@ -0,0 +1,85 @@ +// Copyright (c) The Thanos Authors. +// Licensed under the Apache License 2.0. + +package google_cloud + +import ( + "context" + "os" + + cloudtrace "github.com/GoogleCloudPlatform/opentelemetry-operations-go/exporter/trace" + "github.com/go-kit/log" + "github.com/go-kit/log/level" + "github.com/prometheus/common/version" + "github.com/thanos-io/thanos/pkg/tracing/migration" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/sdk/resource" + tracesdk "go.opentelemetry.io/otel/sdk/trace" + semconv "go.opentelemetry.io/otel/semconv/v1.4.0" + "gopkg.in/yaml.v2" +) + +// Config - YAML configuration. +type Config struct { + ServiceName string `yaml:"service_name"` + ProjectId string `yaml:"project_id"` + SampleFactor uint64 `yaml:"sample_factor"` +} + +// NewTracerProvider create tracer provider from YAML. +func NewTracerProvider(ctx context.Context, logger log.Logger, conf []byte) (*tracesdk.TracerProvider, error) { + config := Config{} + if err := yaml.Unmarshal(conf, &config); err != nil { + return nil, err + } + + exporter, err := cloudtrace.New( + cloudtrace.WithContext(ctx), + cloudtrace.WithProjectID(config.ProjectId), + ) + if err != nil { + return nil, err + } + + return newTracerProvider(ctx, logger, tracesdk.NewBatchSpanProcessor(exporter), + config.SampleFactor, config.ServiceName), nil +} + +func newTracerProvider(ctx context.Context, logger log.Logger, processor tracesdk.SpanProcessor, sampleFactor uint64, serviceName string) *tracesdk.TracerProvider { + // Even if resource.New returns error, the resource will be valid - log the error and continue. + resource, err := resource.New(ctx, resource.WithAttributes(collectAttributes(serviceName)...)) + if err != nil { + level.Warn(logger).Log("msg", "detecting resources for tracing provider failed", "err", err) + } + + fraction := 1 / float64(sampleFactor) + if sampleFactor == 0 { + fraction = 0 + } + + tp := tracesdk.NewTracerProvider( + tracesdk.WithSpanProcessor(processor), + tracesdk.WithSampler( + migration.SamplerWithOverride( + tracesdk.ParentBased(tracesdk.TraceIDRatioBased(fraction)), + migration.ForceTracingAttributeKey, + ), + ), + tracesdk.WithResource(resource), + ) + + return tp +} + +func collectAttributes(serviceName string) []attribute.KeyValue { + attr := []attribute.KeyValue{ + semconv.ServiceNameKey.String(serviceName), + attribute.String("binary_revision", version.Revision), + } + + if len(os.Args) > 1 { + attr = append(attr, attribute.String("binary_cmd", os.Args[1])) + } + + return attr +} diff --git a/pkg/tracing/google_cloud/google_cloud_test.go b/pkg/tracing/google_cloud/google_cloud_test.go new file mode 100644 index 0000000000..69ab9ec59c --- /dev/null +++ b/pkg/tracing/google_cloud/google_cloud_test.go @@ -0,0 +1,164 @@ +// Copyright (c) The Thanos Authors. +// Licensed under the Apache License 2.0. + +// This file includes unit tests that test only tiny logic in this package, but are here mainly as a showcase on how tracing can +// be configured. + +package google_cloud + +import ( + "context" + "testing" + + "github.com/go-kit/log" + "github.com/opentracing/opentracing-go" + "github.com/thanos-io/thanos/pkg/testutil" + "github.com/thanos-io/thanos/pkg/tracing" + "github.com/thanos-io/thanos/pkg/tracing/migration" + tracesdk "go.opentelemetry.io/otel/sdk/trace" + "go.opentelemetry.io/otel/sdk/trace/tracetest" +) + +func TestMain(m *testing.M) { + testutil.TolerantVerifyLeakMain(m) +} + +// This test shows that if sample factor will enable tracing on client process, even when it would be disabled on server +// it will be still enabled for all spans within this span. +func TestContextTracing_ClientEnablesTracing(t *testing.T) { + exp := tracetest.NewInMemoryExporter() + tracerOtel := newTracerProvider( + context.Background(), + log.NewNopLogger(), + tracesdk.NewSimpleSpanProcessor(exp), + 1, // always sample + "gcloud-test-client", + ) + tracer, _ := migration.Bridge(tracerOtel, log.NewNopLogger()) + + clientRoot, clientCtx := tracing.StartSpan(tracing.ContextWithTracer(context.Background(), tracer), "a") + + // Simulate Server process with different tracer, but with client span in context. + srvTracerOtel := newTracerProvider( + context.Background(), + log.NewNopLogger(), + tracesdk.NewSimpleSpanProcessor(exp), + 0, // never sample + "gcloud-test-server", + ) + srvTracer, _ := migration.Bridge(srvTracerOtel, log.NewNopLogger()) + + srvRoot, srvCtx := tracing.StartSpan(tracing.ContextWithTracer(clientCtx, srvTracer), "b") + srvChild, _ := tracing.StartSpan(srvCtx, "bb") + testutil.Equals(t, 0, len(exp.GetSpans())) + + srvChild.Finish() + testutil.Equals(t, 1, len(exp.GetSpans())) + testutil.Equals(t, 1, countSampledSpans(exp.GetSpans())) + + srvRoot.Finish() + testutil.Equals(t, 2, len(exp.GetSpans())) + testutil.Equals(t, 2, countSampledSpans(exp.GetSpans())) + + clientRoot.Finish() + testutil.Equals(t, 3, len(exp.GetSpans())) + testutil.Equals(t, 3, countSampledSpans(exp.GetSpans())) +} + +// This test shows that if sample factor will disable tracing on client process, when it would be enabled on server +// it will be still disabled for all spans within this span. +func TestContextTracing_ClientDisablesTracing(t *testing.T) { + exp := tracetest.NewInMemoryExporter() + tracerOtel := newTracerProvider( + context.Background(), + log.NewNopLogger(), + tracesdk.NewSimpleSpanProcessor(exp), + 0, // never sample + "gcloud-test-client", + ) + tracer, _ := migration.Bridge(tracerOtel, log.NewNopLogger()) + + clientRoot, clientCtx := tracing.StartSpan(tracing.ContextWithTracer(context.Background(), tracer), "a") + + // Simulate Server process with different tracer, but with client span in context. + srvTracerOtel := newTracerProvider( + context.Background(), + log.NewNopLogger(), + tracesdk.NewSimpleSpanProcessor(exp), + 0, // never sample + "gcloud-test-server", + ) + srvTracer, _ := migration.Bridge(srvTracerOtel, log.NewNopLogger()) + + srvRoot, srvCtx := tracing.StartSpan(tracing.ContextWithTracer(clientCtx, srvTracer), "b") + srvChild, _ := tracing.StartSpan(srvCtx, "bb") + testutil.Equals(t, 0, len(exp.GetSpans())) + + // Since we are not recording neither sampling, no spans should show up. + srvChild.Finish() + testutil.Equals(t, 0, len(exp.GetSpans())) + + srvRoot.Finish() + testutil.Equals(t, 0, len(exp.GetSpans())) + + clientRoot.Finish() + testutil.Equals(t, 0, len(exp.GetSpans())) +} + +// This test shows that if span will contain special baggage (for example from special HTTP header), even when sample +// factor will disable client & server tracing, it will be still enabled for all spans within this span. +func TestContextTracing_ForceTracing(t *testing.T) { + exp := tracetest.NewInMemoryExporter() + tracerOtel := newTracerProvider( + context.Background(), + log.NewNopLogger(), + tracesdk.NewSimpleSpanProcessor(exp), + 0, // never sample + "gcloud-test-client", + ) + tracer, _ := migration.Bridge(tracerOtel, log.NewNopLogger()) + + // Start the root span with the tag to force tracing. + clientRoot, clientCtx := tracing.StartSpan( + tracing.ContextWithTracer(context.Background(), tracer), + "a", + opentracing.Tag{Key: migration.ForceTracingAttributeKey, Value: "true"}, + ) + + // Simulate Server process with different tracer, but with client span in context. + srvTracerOtel := newTracerProvider( + context.Background(), + log.NewNopLogger(), + tracesdk.NewSimpleSpanProcessor(exp), + 0, // never sample + "gcloud-test-server", + ) + srvTracer, _ := migration.Bridge(srvTracerOtel, log.NewNopLogger()) + + srvRoot, srvCtx := tracing.StartSpan(tracing.ContextWithTracer(clientCtx, srvTracer), "b") + srvChild, _ := tracing.StartSpan(srvCtx, "bb") + testutil.Equals(t, 0, len(exp.GetSpans())) + + srvChild.Finish() + testutil.Equals(t, 1, len(exp.GetSpans())) + testutil.Equals(t, 1, countSampledSpans(exp.GetSpans())) + + srvRoot.Finish() + testutil.Equals(t, 2, len(exp.GetSpans())) + testutil.Equals(t, 2, countSampledSpans(exp.GetSpans())) + + clientRoot.Finish() + testutil.Equals(t, 3, len(exp.GetSpans())) + testutil.Equals(t, 3, countSampledSpans(exp.GetSpans())) +} + +func countSampledSpans(ss tracetest.SpanStubs) int { + var count int + for _, s := range ss { + if s.SpanContext.IsSampled() { + count++ + } + } + + return count +} diff --git a/pkg/tracing/grpc.go b/pkg/tracing/grpc.go index 78b4391bf8..2f638ed235 100644 --- a/pkg/tracing/grpc.go +++ b/pkg/tracing/grpc.go @@ -6,6 +6,7 @@ package tracing import ( "context" + "github.com/davecgh/go-spew/spew" grpc_middleware "github.com/grpc-ecosystem/go-grpc-middleware/v2" grpc_opentracing "github.com/grpc-ecosystem/go-grpc-middleware/v2/interceptors/tracing" opentracing "github.com/opentracing/opentracing-go" @@ -37,6 +38,7 @@ func StreamServerInterceptor(tracer opentracing.Tracer) grpc.StreamServerInterce return func(srv interface{}, stream grpc.ServerStream, info *grpc.StreamServerInfo, handler grpc.StreamHandler) error { // Add our own tracer. wrappedStream := grpc_middleware.WrapServerStream(stream) + spew.Println("wrapped ctx", stream.Context()) wrappedStream.WrappedContext = ContextWithTracer(stream.Context(), tracer) return interceptor(srv, wrappedStream, info, handler) diff --git a/pkg/tracing/http.go b/pkg/tracing/http.go index bb906eb54a..21b6663f4f 100644 --- a/pkg/tracing/http.go +++ b/pkg/tracing/http.go @@ -14,6 +14,7 @@ import ( "github.com/go-kit/log/level" "github.com/opentracing/opentracing-go" "github.com/opentracing/opentracing-go/ext" + "github.com/thanos-io/thanos/pkg/tracing/migration" ) // HTTPMiddleware returns an HTTP handler that injects the given tracer and starts a new server span. @@ -22,7 +23,6 @@ func HTTPMiddleware(tracer opentracing.Tracer, name string, logger log.Logger, n operationName := fmt.Sprintf("/%s HTTP[server]", name) return func(w http.ResponseWriter, r *http.Request) { - var span opentracing.Span wireContext, err := tracer.Extract( opentracing.HTTPHeaders, opentracing.HTTPHeadersCarrier(r.Header), @@ -31,7 +31,17 @@ func HTTPMiddleware(tracer opentracing.Tracer, name string, logger log.Logger, n level.Error(logger).Log("msg", "failed to extract tracer from request", "operationName", operationName, "err", err) } - span = tracer.StartSpan(operationName, ext.RPCServerOption(wireContext)) + opts := []opentracing.StartSpanOption{ext.RPCServerOption(wireContext)} + // Check for force tracing header and add it as a tag at the start of span. + // This is required for the OpenTelemetry sampler to force tracing. + if r.Header.Get(ForceTracingBaggageKey) != "" { + opts = append(opts, opentracing.Tag{Key: migration.ForceTracingAttributeKey, Value: "true"}) + } + + span := tracer.StartSpan( + operationName, + opts..., + ) ext.HTTPMethod.Set(span, r.Method) ext.HTTPUrl.Set(span, r.URL.String()) @@ -42,6 +52,11 @@ func HTTPMiddleware(tracer opentracing.Tracer, name string, logger log.Logger, n if traceID, ok := t.GetTraceIDFromSpanContext(span.Context()); ok { w.Header().Set(traceIDResponseHeader, traceID) } + } else { + // Alternative to set trace ID header, if bridge tracer is being used. + if traceID, ok := migration.GetTraceIDFromBridgeSpan(span); ok { + w.Header().Set(traceIDResponseHeader, traceID) + } } next.ServeHTTP(w, r.WithContext(opentracing.ContextWithSpan(ContextWithTracer(r.Context(), tracer), span))) diff --git a/pkg/tracing/migration/bridge.go b/pkg/tracing/migration/bridge.go new file mode 100644 index 0000000000..4ace2e85c5 --- /dev/null +++ b/pkg/tracing/migration/bridge.go @@ -0,0 +1,129 @@ +// Copyright (c) The Thanos Authors. +// Licensed under the Apache License 2.0. + +package migration + +import ( + "context" + "io" + "time" + + "github.com/go-kit/log" + "github.com/go-kit/log/level" + "github.com/opentracing/opentracing-go" + ot_propagator "go.opentelemetry.io/contrib/propagators/ot" + "go.opentelemetry.io/otel" + bridge "go.opentelemetry.io/otel/bridge/opentracing" + "go.opentelemetry.io/otel/propagation" + tracesdk "go.opentelemetry.io/otel/sdk/trace" + "go.opentelemetry.io/otel/trace" +) + +// Bridge is a method to facilitate migration from OpenTracing (OT) to +// OpenTelemetry (OTEL). It pairs an OTEL tracer with a so-called bridge +// tracer, which satisfies the OT Tracer interface. This makes it possible +// for OT instrumentation to work with an OTEL tracer. +// +// NOTE: After instrumentation migration is finished, this bridge should be +// removed. +func Bridge(tp *tracesdk.TracerProvider, l log.Logger) (opentracing.Tracer, io.Closer) { + compositePropagator := propagation.NewCompositeTextMapPropagator(ot_propagator.OT{}, propagation.TraceContext{}, propagation.Baggage{}) + otel.SetErrorHandler(otelErrHandler(func(err error) { + level.Error(l).Log("msg", "OpenTelemetry ErrorHandler", "err", err) + })) + otel.SetTextMapPropagator(compositePropagator) + otel.SetTracerProvider(tp) + + bridgeTracer, _ := bridge.NewTracerPair(tp.Tracer("")) + bridgeTracer.SetWarningHandler(func(warn string) { + level.Warn(l).Log("msg", "OpenTelemetry BridgeWarningHandler", "warn", warn) + }) + bridgeTracer.SetTextMapPropagator(propagation.TraceContext{}) + + tpShutdownFunc := func() error { + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + + return tp.Shutdown(ctx) + } + + return &bridgeTracerWrapper{bt: bridgeTracer}, shutdownAsCloser(tpShutdownFunc) +} + +func GetTraceIDFromBridgeSpan(span opentracing.Span) (string, bool) { + ctx := bridge.NewBridgeTracer().ContextWithSpanHook(context.Background(), span) + otelSpan := trace.SpanFromContext(ctx) + if otelSpan.SpanContext().IsSampled() && otelSpan.SpanContext().IsValid() { + return otelSpan.SpanContext().TraceID().String(), true + } + + return "", false +} + +type otelErrHandler func(err error) + +func (o otelErrHandler) Handle(err error) { + o(err) +} + +// Workaround to satisfy io.Closer interface. +type shutdownAsCloser func() error + +func (s shutdownAsCloser) Close() error { + return s() +} + +// This wrapper is necessary to enable proper trace propagation for gRPC +// calls between components. The bridge.BridgeTracer currently supports injection / +// extraction of only single carrier type which is opentracing.HTTPHeadersCarrier. +// (see https://github.com/open-telemetry/opentelemetry-go/blob/main/bridge/opentracing/bridge.go#L626) +// +// To work around this, this wrapper extends Inject / Extract methods to "convert" +// other carrier types to opentracing.HTTPHeadersCarrier, in order to propagate +// data correctly. This is currently, at minimum, required for proper functioning +// of propagation in the gRPC middleware, which uses metadata.MD as a carrier. +// (see https://github.com/grpc-ecosystem/go-grpc-middleware/blob/v2.0.0-rc.2/interceptors/tracing/client.go#L95) +type bridgeTracerWrapper struct { + bt *bridge.BridgeTracer +} + +func (b *bridgeTracerWrapper) StartSpan(operationName string, opts ...opentracing.StartSpanOption) opentracing.Span { + return b.bt.StartSpan(operationName, opts...) +} + +func (b *bridgeTracerWrapper) Inject(sm opentracing.SpanContext, format interface{}, carrier interface{}) error { + otCarrier := opentracing.HTTPHeadersCarrier{} + err := b.bt.Inject(sm, format, otCarrier) + if err != nil { + return err + } + + if tmw, ok := carrier.(opentracing.TextMapWriter); ok { + err := otCarrier.ForeachKey(func(key, val string) error { + tmw.Set(key, val) + return nil + }) + if err != nil { + return err + } + } + + return b.bt.Inject(sm, format, carrier) +} + +func (b *bridgeTracerWrapper) Extract(format interface{}, carrier interface{}) (opentracing.SpanContext, error) { + if tmr, ok := carrier.(opentracing.TextMapReader); ok { + otCarrier := opentracing.HTTPHeadersCarrier{} + err := tmr.ForeachKey(func(key, val string) error { + otCarrier.Set(key, val) + return nil + }) + if err != nil { + return nil, err + } + + return b.bt.Extract(format, otCarrier) + } + + return b.bt.Extract(format, carrier) +} diff --git a/pkg/tracing/migration/sampler.go b/pkg/tracing/migration/sampler.go new file mode 100644 index 0000000000..10e75d5921 --- /dev/null +++ b/pkg/tracing/migration/sampler.go @@ -0,0 +1,47 @@ +// Copyright (c) The Thanos Authors. +// Licensed under the Apache License 2.0. + +package migration + +import ( + "fmt" + + "go.opentelemetry.io/otel/attribute" + tracesdk "go.opentelemetry.io/otel/sdk/trace" +) + +// ForceTracingAttributeKey is used to signalize a span should be traced. +const ForceTracingAttributeKey = "thanos.force_tracing" + +type samplerWithOverride struct { + baseSampler tracesdk.Sampler + overrideKey attribute.Key +} + +// SamplerWithOverride creates a new sampler with the capability to override +// the sampling decision, if the span includes an attribute with the specified key. +// Otherwise the sampler delegates the decision to the wrapped base sampler. This +// is primarily used to enable forced tracing in Thanos components. +// Implements go.opentelemetry.io/otel/sdk/trace.Sampler interface. +func SamplerWithOverride(baseSampler tracesdk.Sampler, overrideKey attribute.Key) tracesdk.Sampler { + return samplerWithOverride{ + baseSampler, + overrideKey, + } +} + +func (s samplerWithOverride) ShouldSample(p tracesdk.SamplingParameters) tracesdk.SamplingResult { + for _, attr := range p.Attributes { + if attr.Key == s.overrideKey { + return tracesdk.SamplingResult{ + Decision: tracesdk.RecordAndSample, + } + } + } + + return s.baseSampler.ShouldSample(p) +} + +func (s samplerWithOverride) Description() string { + return fmt.Sprintf("SamplerWithOverride{%s}", string(s.overrideKey)) +} diff --git a/pkg/tracing/stackdriver/tracer_test.go b/pkg/tracing/stackdriver/tracer_test.go deleted file mode 100644 index f869e090fd..0000000000 --- a/pkg/tracing/stackdriver/tracer_test.go +++ /dev/null @@ -1,165 +0,0 @@ -// Copyright (c) The Thanos Authors. -// Licensed under the Apache License 2.0. - -// This file includes unit tests that test only tiny logic in this package, but are here mainly as a showcase on how tracing can -// be configured. - -package stackdriver - -import ( - "context" - "testing" - - "github.com/thanos-io/thanos/pkg/testutil" - "github.com/thanos-io/thanos/pkg/tracing" - - "github.com/opentracing/basictracer-go" -) - -func TestMain(m *testing.M) { - testutil.TolerantVerifyLeakMain(m) -} - -// This test shows that if sample factor will enable tracing on client process, even when it would be disabled on server -// it will be still enabled for all spans within this span. -func TestContextTracing_ClientEnablesTracing(t *testing.T) { - m := &basictracer.InMemorySpanRecorder{} - r := &forceRecorder{wrapped: m} - - clientTracer := &tracer{ - serviceName: "Test", - wrapped: basictracer.NewWithOptions(basictracer.Options{ - ShouldSample: func(traceID uint64) bool { - return true - }, - Recorder: r, - MaxLogsPerSpan: 100, - }), - } - - clientRoot, clientCtx := tracing.StartSpan(tracing.ContextWithTracer(context.Background(), clientTracer), "a") - - // Simulate Server process with different tracer, but with client span in context. - srvTracer := &tracer{ - serviceName: "Test", - wrapped: basictracer.NewWithOptions(basictracer.Options{ - ShouldSample: func(traceID uint64) bool { - return false - }, - Recorder: r, - MaxLogsPerSpan: 100, - }), - } - srvRoot, srvCtx := tracing.StartSpan(tracing.ContextWithTracer(clientCtx, srvTracer), "b") - srvChild, _ := tracing.StartSpan(srvCtx, "bb") - testutil.Equals(t, 0, len(m.GetSpans())) - - srvChild.Finish() - testutil.Equals(t, 1, len(m.GetSpans())) - testutil.Equals(t, 1, len(m.GetSampledSpans())) - - srvRoot.Finish() - testutil.Equals(t, 2, len(m.GetSpans())) - testutil.Equals(t, 2, len(m.GetSampledSpans())) - - clientRoot.Finish() - testutil.Equals(t, 3, len(m.GetSpans())) - testutil.Equals(t, 3, len(m.GetSampledSpans())) -} - -// This test shows that if sample factor will disable tracing on client process, when it would be enabled on server -// it will be still disabled for all spans within this span. -func TestContextTracing_ClientDisablesTracing(t *testing.T) { - m := &basictracer.InMemorySpanRecorder{} - r := &forceRecorder{wrapped: m} - - clientTracer := &tracer{ - serviceName: "Test", - wrapped: basictracer.NewWithOptions(basictracer.Options{ - ShouldSample: func(traceID uint64) bool { - return false - }, - Recorder: r, - MaxLogsPerSpan: 100, - }), - } - - clientRoot, clientCtx := tracing.StartSpan(tracing.ContextWithTracer(context.Background(), clientTracer), "a") - - // Simulate Server process with different tracer, but with client span in context. - srvTracer := &tracer{ - serviceName: "Test", - wrapped: basictracer.NewWithOptions(basictracer.Options{ - ShouldSample: func(traceID uint64) bool { - return true - }, - Recorder: r, - MaxLogsPerSpan: 100, - }), - } - srvRoot, srvCtx := tracing.StartSpan(tracing.ContextWithTracer(clientCtx, srvTracer), "b") - srvChild, _ := tracing.StartSpan(srvCtx, "bb") - testutil.Equals(t, 0, len(m.GetSpans())) - - srvChild.Finish() - testutil.Equals(t, 1, len(m.GetSpans())) - testutil.Equals(t, 0, len(m.GetSampledSpans())) - - srvRoot.Finish() - testutil.Equals(t, 2, len(m.GetSpans())) - testutil.Equals(t, 0, len(m.GetSampledSpans())) - - clientRoot.Finish() - testutil.Equals(t, 3, len(m.GetSpans())) - testutil.Equals(t, 0, len(m.GetSampledSpans())) -} - -// This test shows that if span will contain special baggage (for example from special HTTP header), even when sample -// factor will disable client & server tracing, it will be still enabled for all spans within this span. -func TestContextTracing_ForceTracing(t *testing.T) { - m := &basictracer.InMemorySpanRecorder{} - r := &forceRecorder{wrapped: m} - - clientTracer := &tracer{ - serviceName: "Test", - wrapped: basictracer.NewWithOptions(basictracer.Options{ - ShouldSample: func(traceID uint64) bool { - return false - }, - Recorder: r, - MaxLogsPerSpan: 100, - }), - } - - clientRoot, clientCtx := tracing.StartSpan(tracing.ContextWithTracer(context.Background(), clientTracer), "a") - - // Force tracing for this span and its children. - clientRoot.SetBaggageItem(tracing.ForceTracingBaggageKey, "Go for it") - - // Simulate Server process with different tracer, but with client span in context. - srvTracer := &tracer{ - serviceName: "Test", - wrapped: basictracer.NewWithOptions(basictracer.Options{ - ShouldSample: func(traceID uint64) bool { - return false - }, - Recorder: r, - MaxLogsPerSpan: 100, - }), - } - srvRoot, srvCtx := tracing.StartSpan(tracing.ContextWithTracer(clientCtx, srvTracer), "b") - srvChild, _ := tracing.StartSpan(srvCtx, "bb") - testutil.Equals(t, 0, len(m.GetSpans())) - - srvChild.Finish() - testutil.Equals(t, 1, len(m.GetSpans())) - testutil.Equals(t, 1, len(m.GetSampledSpans())) - - srvRoot.Finish() - testutil.Equals(t, 2, len(m.GetSpans())) - testutil.Equals(t, 2, len(m.GetSampledSpans())) - - clientRoot.Finish() - testutil.Equals(t, 3, len(m.GetSpans())) - testutil.Equals(t, 3, len(m.GetSampledSpans())) -} diff --git a/scripts/cfggen/main.go b/scripts/cfggen/main.go index 265d01c611..9571f159f1 100644 --- a/scripts/cfggen/main.go +++ b/scripts/cfggen/main.go @@ -36,9 +36,9 @@ import ( storecache "github.com/thanos-io/thanos/pkg/store/cache" trclient "github.com/thanos-io/thanos/pkg/tracing/client" "github.com/thanos-io/thanos/pkg/tracing/elasticapm" + "github.com/thanos-io/thanos/pkg/tracing/google_cloud" "github.com/thanos-io/thanos/pkg/tracing/jaeger" "github.com/thanos-io/thanos/pkg/tracing/lightstep" - "github.com/thanos-io/thanos/pkg/tracing/stackdriver" ) var ( @@ -57,10 +57,10 @@ var ( } tracingConfigs = map[trclient.TracingProvider]interface{}{ - trclient.JAEGER: jaeger.Config{}, - trclient.STACKDRIVER: stackdriver.Config{}, - trclient.ELASTIC_APM: elasticapm.Config{}, - trclient.LIGHTSTEP: lightstep.Config{}, + trclient.Jaeger: jaeger.Config{}, + trclient.GoogleCloud: google_cloud.Config{}, + trclient.ElasticAPM: elasticapm.Config{}, + trclient.Lightstep: lightstep.Config{}, } indexCacheConfigs = map[storecache.IndexCacheProvider]interface{}{ storecache.INMEMORY: storecache.InMemoryIndexCacheConfig{},