diff --git a/.rubocop.yml b/.rubocop.yml index 6edf9386c17..a25d7daaf13 100644 --- a/.rubocop.yml +++ b/.rubocop.yml @@ -61,7 +61,7 @@ Style/NumericLiterals: Enabled: false Metrics/ClassLength: - Max: 140 + Enabled: false Metrics/BlockLength: Max: 42 diff --git a/docs/GettingStarted.md b/docs/GettingStarted.md index 398a51b7078..8fd903f6767 100644 --- a/docs/GettingStarted.md +++ b/docs/GettingStarted.md @@ -1522,6 +1522,7 @@ run app | `headers` | Hash of HTTP request or response headers to add as tags to the `rack.request`. Accepts `request` and `response` keys with Array values e.g. `['Last-Modified']`. Adds `http.request.headers.*` and `http.response.headers.*` tags respectively. | `{ response: ['Content-Type', 'X-Request-ID'] }` | | `middleware_names` | Enable this if you want to use the last executed middleware class as the resource name for the `rack` span. If enabled alongside the `rails` instrumention, `rails` takes precedence by setting the `rack` resource name to the active `rails` controller when applicable. Requires `application` option to use. | `false` | | `quantize` | Hash containing options for quantization. May include `:query` or `:fragment`. | `{}` | +| `quantize.base` | Defines behavior for URL base (scheme, host, port). Removes URL base from `http.url` tag by default, leaving a path, and sets `http.base_url`. May be `:show` to keep URL base in `http.url` tag and not set `http.base_url` tag. Option must be nested inside the `quantize` option. | `nil` | | `quantize.query` | Hash containing options for query portion of URL quantization. May include `:show` or `:exclude`. See options below. Option must be nested inside the `quantize` option. | `{}` | | `quantize.query.show` | Defines which values should always be shown. Shows no values by default. May be an Array of strings, or `:all` to show all values. Option must be nested inside the `query` option. | `nil` | | `quantize.query.exclude` | Defines which values should be removed entirely. Excludes nothing by default. May be an Array of strings, or `:all` to remove the query string entirely. Option must be nested inside the `query` option. | `nil` | @@ -1529,32 +1530,42 @@ run app | `request_queuing` | Track HTTP request time spent in the queue of the frontend server. See [HTTP request queuing](#http-request-queuing) for setup details. Set to `true` to enable. | `false` | | `web_service_name` | Service name for frontend server request queuing spans. (e.g. `'nginx'`) | `'web-server'` | +Deprecation notice: `quantize.base` will change its default from `:exclude` to `:show` in a future version. Voluntarily moving to `:show` is recommended. + **Configuring URL quantization behavior** ```ruby Datadog.configure do |c| - # Default behavior: all values are quantized, fragment is removed. - # http://example.com/path?category_id=1&sort_by=asc#featured --> http://example.com/path?category_id&sort_by - # http://example.com/path?categories[]=1&categories[]=2 --> http://example.com/path?categories[] + # Default behavior: all values are quantized, base is removed, fragment is removed. + # http://example.com/path?category_id=1&sort_by=asc#featured --> /path?category_id&sort_by + # http://example.com:8080/path?categories[]=1&categories[]=2 --> /path?categories[] + + # Remove URL base (scheme, host, port) + # http://example.com/path?category_id=1&sort_by=asc#featured --> /path?category_id&sort_by#featured + c.tracing.instrument :rack, quantize: { base: :exclude } + + # Show URL base + # http://example.com/path?category_id=1&sort_by=asc#featured --> http://example.com/path?category_id&sort_by#featured + c.tracing.instrument :rack, quantize: { base: :show } # Show values for any query string parameter matching 'category_id' exactly - # http://example.com/path?category_id=1&sort_by=asc#featured --> http://example.com/path?category_id=1&sort_by + # http://example.com/path?category_id=1&sort_by=asc#featured --> /path?category_id=1&sort_by c.tracing.instrument :rack, quantize: { query: { show: ['category_id'] } } # Show all values for all query string parameters - # http://example.com/path?category_id=1&sort_by=asc#featured --> http://example.com/path?category_id=1&sort_by=asc + # http://example.com/path?category_id=1&sort_by=asc#featured --> /path?category_id=1&sort_by=asc c.tracing.instrument :rack, quantize: { query: { show: :all } } # Totally exclude any query string parameter matching 'sort_by' exactly - # http://example.com/path?category_id=1&sort_by=asc#featured --> http://example.com/path?category_id + # http://example.com/path?category_id=1&sort_by=asc#featured --> /path?category_id c.tracing.instrument :rack, quantize: { query: { exclude: ['sort_by'] } } # Remove the query string entirely - # http://example.com/path?category_id=1&sort_by=asc#featured --> http://example.com/path + # http://example.com/path?category_id=1&sort_by=asc#featured --> /path c.tracing.instrument :rack, quantize: { query: { exclude: :all } } # Show URL fragments - # http://example.com/path?category_id=1&sort_by=asc#featured --> http://example.com/path?category_id&sort_by#featured + # http://example.com/path?category_id=1&sort_by=asc#featured --> /path?category_id&sort_by#featured c.tracing.instrument :rack, quantize: { fragment: :show } end ``` diff --git a/lib/datadog/appsec/configuration/settings.rb b/lib/datadog/appsec/configuration/settings.rb index 46eab9538f1..5b033f59a9e 100644 --- a/lib/datadog/appsec/configuration/settings.rb +++ b/lib/datadog/appsec/configuration/settings.rb @@ -5,7 +5,6 @@ module AppSec module Configuration # Configuration settings, acting as an integration registry # TODO: as with Configuration, this is a trivial implementation - # rubocop:disable Metrics/ClassLength class Settings class << self def boolean @@ -188,7 +187,6 @@ def reset! initialize end end - # rubocop:enable Metrics/ClassLength end end end diff --git a/lib/datadog/core/configuration/agent_settings_resolver.rb b/lib/datadog/core/configuration/agent_settings_resolver.rb index 06082121a67..b9ea7948f58 100644 --- a/lib/datadog/core/configuration/agent_settings_resolver.rb +++ b/lib/datadog/core/configuration/agent_settings_resolver.rb @@ -18,8 +18,6 @@ module Configuration # # Whenever there is a conflict (different configurations are provided in different orders), it MUST warn the users # about it and pick a value based on the following priority: code > environment variable > defaults. - # - # rubocop:disable Metrics/ClassLength class AgentSettingsResolver AgentSettings = \ Struct.new( @@ -359,7 +357,6 @@ def adapter(kind_or_custom_adapter, *args, **kwargs) end end end - # rubocop:enable Metrics/ClassLength end end end diff --git a/lib/datadog/core/configuration/components.rb b/lib/datadog/core/configuration/components.rb index 2431ed91114..028bc046b7a 100644 --- a/lib/datadog/core/configuration/components.rb +++ b/lib/datadog/core/configuration/components.rb @@ -18,7 +18,6 @@ module Datadog module Core module Configuration # Global components for the trace library. - # rubocop:disable Metrics/ClassLength class Components class << self def build_health_metrics(settings) @@ -431,7 +430,6 @@ def shutdown!(replacement = nil) telemetry.emit_closing! unless replacement end end - # rubocop:enable Metrics/ClassLength end end end diff --git a/lib/datadog/core/configuration/settings.rb b/lib/datadog/core/configuration/settings.rb index 1f2b763bea5..695684c9b7c 100644 --- a/lib/datadog/core/configuration/settings.rb +++ b/lib/datadog/core/configuration/settings.rb @@ -15,7 +15,6 @@ module Configuration # Global configuration settings for the trace library. # @public_api # rubocop:disable Metrics/BlockLength - # rubocop:disable Metrics/ClassLength # rubocop:disable Layout/LineLength class Settings include Base @@ -713,9 +712,7 @@ def initialize(*_) end end end - # rubocop:enable Metrics/BlockLength - # rubocop:enable Metrics/ClassLength # rubocop:enable Layout/LineLength end end diff --git a/lib/datadog/profiling/collectors/old_stack.rb b/lib/datadog/profiling/collectors/old_stack.rb index 743b0947150..38b54e57fcc 100644 --- a/lib/datadog/profiling/collectors/old_stack.rb +++ b/lib/datadog/profiling/collectors/old_stack.rb @@ -15,7 +15,7 @@ module Collectors # Runs on its own background thread. # # This class has the prefix "Old" because it will be deprecated by the new native CPU Profiler - class OldStack < Core::Worker # rubocop:disable Metrics/ClassLength + class OldStack < Core::Worker include Core::Workers::Polling DEFAULT_MAX_TIME_USAGE_PCT = 2.0 diff --git a/lib/datadog/tracing/contrib/rack/middlewares.rb b/lib/datadog/tracing/contrib/rack/middlewares.rb index 9c4e2cf9c14..258843bade5 100644 --- a/lib/datadog/tracing/contrib/rack/middlewares.rb +++ b/lib/datadog/tracing/contrib/rack/middlewares.rb @@ -123,18 +123,6 @@ def call(env) # rubocop:disable Metrics/PerceivedComplexity # rubocop:disable Metrics/MethodLength def set_request_tags!(trace, request_span, env, status, headers, response, original_env) - # http://www.rubydoc.info/github/rack/rack/file/SPEC - # The source of truth in Rack is the PATH_INFO key that holds the - # URL for the current request; but some frameworks may override that - # value, especially during exception handling. - # - # Because of this, we prefer to use REQUEST_URI, if available, which is the - # relative path + query string, and doesn't mutate. - # - # REQUEST_URI is only available depending on what web server is running though. - # So when its not available, we want the original, unmutated PATH_INFO, which - # is just the relative path without query strings. - url = env['REQUEST_URI'] || original_env['PATH_INFO'] request_header_collection = Header::RequestHeaderCollection.new(env) request_headers_tags = parse_request_headers(request_header_collection) response_headers_tags = parse_response_headers(headers || {}) @@ -176,14 +164,35 @@ def set_request_tags!(trace, request_span, env, status, headers, response, origi request_span.set_tag(Tracing::Metadata::Ext::HTTP::TAG_METHOD, env['REQUEST_METHOD']) end + url = parse_url(env, original_env) + if request_span.get_tag(Tracing::Metadata::Ext::HTTP::TAG_URL).nil? - options = configuration[:quantize] + options = configuration[:quantize] || {} + + # Quantization::HTTP.url base defaults to :show, but we are transitioning + options[:base] ||= :exclude + request_span.set_tag( Tracing::Metadata::Ext::HTTP::TAG_URL, Contrib::Utils::Quantization::HTTP.url(url, options) ) end + if request_span.get_tag(Tracing::Metadata::Ext::HTTP::TAG_BASE_URL).nil? + options = configuration[:quantize] + + unless options[:base] == :show + base_url = Contrib::Utils::Quantization::HTTP.base_url(url) + + unless base_url.empty? + request_span.set_tag( + Tracing::Metadata::Ext::HTTP::TAG_BASE_URL, + base_url + ) + end + end + end + if request_span.get_tag(Tracing::Metadata::Ext::HTTP::TAG_CLIENT_IP).nil? Tracing::ClientIp.set_client_ip_tag( request_span, @@ -192,19 +201,6 @@ def set_request_tags!(trace, request_span, env, status, headers, response, origi ) end - if request_span.get_tag(Tracing::Metadata::Ext::HTTP::TAG_BASE_URL).nil? - request_obj = ::Rack::Request.new(env) - - base_url = if request_obj.respond_to?(:base_url) - request_obj.base_url - else - # Compatibility for older Rack versions - request_obj.url.chomp(request_obj.fullpath) - end - - request_span.set_tag(Tracing::Metadata::Ext::HTTP::TAG_BASE_URL, base_url) - end - if request_span.get_tag(Tracing::Metadata::Ext::HTTP::TAG_STATUS_CODE).nil? && status request_span.set_tag(Tracing::Metadata::Ext::HTTP::TAG_STATUS_CODE, status) end @@ -238,6 +234,46 @@ def configuration Datadog.configuration.tracing[:rack] end + def parse_url(env, original_env) + request_obj = ::Rack::Request.new(env) + + # scheme, host, and port + base_url = if request_obj.respond_to?(:base_url) + request_obj.base_url + else + # Compatibility for older Rack versions + request_obj.url.chomp(request_obj.fullpath) + end + + # https://github.com/rack/rack/blob/main/SPEC.rdoc + # + # The source of truth in Rack is the PATH_INFO key that holds the + # URL for the current request; but some frameworks may override that + # value, especially during exception handling. + # + # Because of this, we prefer to use REQUEST_URI, if available, which is the + # relative path + query string, and doesn't mutate. + # + # REQUEST_URI is only available depending on what web server is running though. + # So when its not available, we want the original, unmutated PATH_INFO, which + # is just the relative path without query strings. + # + # SCRIPT_NAME is the first part of the request URL path, so that + # the application can know its virtual location. It should be + # prepended to PATH_INFO to reflect the correct user visible path. + request_uri = env['REQUEST_URI'].to_s + fullpath = if request_uri.empty? + query_string = original_env['QUERY_STRING'].to_s + path = original_env['SCRIPT_NAME'].to_s + original_env['PATH_INFO'].to_s + + query_string.empty? ? path : "#{path}?#{query_string}" + else + request_uri + end + + base_url + fullpath + end + def parse_user_agent_header(headers) headers.get(Tracing::Metadata::Ext::HTTP::HEADER_USER_AGENT) end diff --git a/lib/datadog/tracing/contrib/utils/quantization/http.rb b/lib/datadog/tracing/contrib/utils/quantization/http.rb index 30d6d8efc89..6f042df350f 100644 --- a/lib/datadog/tracing/contrib/utils/quantization/http.rb +++ b/lib/datadog/tracing/contrib/utils/quantization/http.rb @@ -22,6 +22,14 @@ def url(url, options = {}) options[:placeholder] || PLACEHOLDER end + def base_url(url, options = {}) + URI.parse(url).tap do |uri| + uri.path = '' + uri.query = nil + uri.fragment = nil + end.to_s + end + def url!(url, options = {}) options ||= {} @@ -32,8 +40,14 @@ def url!(url, options = {}) uri.query = (!query.nil? && query.empty? ? nil : query) end - # Remove any URI framents + # Remove any URI fragments uri.fragment = nil unless options[:fragment] == :show + + if options[:base] == :exclude + uri.host = nil + uri.port = nil + uri.scheme = nil + end end.to_s end diff --git a/lib/datadog/tracing/span_operation.rb b/lib/datadog/tracing/span_operation.rb index d49042d7ef4..0df9829cd35 100644 --- a/lib/datadog/tracing/span_operation.rb +++ b/lib/datadog/tracing/span_operation.rb @@ -18,7 +18,6 @@ module Tracing # It gives a Span a context which can be used to # build a Span. When completed, it yields the Span. # - # rubocop:disable Metrics/ClassLength # @public_api class SpanOperation include Metadata @@ -516,6 +515,5 @@ def duration_nano alias :span_type :type alias :span_type= :type= end - # rubocop:enable Metrics/ClassLength end end diff --git a/lib/datadog/tracing/trace_operation.rb b/lib/datadog/tracing/trace_operation.rb index 44246e0c33a..9188beee576 100644 --- a/lib/datadog/tracing/trace_operation.rb +++ b/lib/datadog/tracing/trace_operation.rb @@ -22,7 +22,6 @@ module Tracing # For async support, a {Datadog::Tracing::TraceOperation} should be employed # per execution context (e.g. Thread, etc.) # - # rubocop:disable Metrics/ClassLength # @public_api class TraceOperation include Metadata::Tagging @@ -454,6 +453,5 @@ def build_trace(spans, partial = false) ) end end - # rubocop:enable Metrics/ClassLength end end diff --git a/lib/datadog/tracing/trace_segment.rb b/lib/datadog/tracing/trace_segment.rb index aba260fff63..cd75f60e782 100644 --- a/lib/datadog/tracing/trace_segment.rb +++ b/lib/datadog/tracing/trace_segment.rb @@ -11,7 +11,6 @@ module Datadog module Tracing # Serializable construct representing a trace # @public_api - # rubocop:disable Metrics/ClassLength class TraceSegment TAG_NAME = 'name'.freeze TAG_RESOURCE = 'resource'.freeze @@ -203,6 +202,5 @@ def service_tag meta[TAG_SERVICE] end end - # rubocop:enable Metrics/ClassLength end end diff --git a/lib/datadog/tracing/tracer.rb b/lib/datadog/tracing/tracer.rb index f475a2eda91..b3b93d55fa3 100644 --- a/lib/datadog/tracing/tracer.rb +++ b/lib/datadog/tracing/tracer.rb @@ -23,7 +23,6 @@ module Tracing # example, a trace can be used to track the entire time spent processing a complicated web request. # Even though the request may require multiple resources and machines to handle the request, all # of these function calls and sub-requests would be encapsulated within a single trace. - # rubocop:disable Metrics/ClassLength class Tracer attr_reader \ :trace_flush, @@ -528,6 +527,5 @@ def skip_trace(name) end end end - # rubocop:enable Metrics/ClassLength end end diff --git a/spec/datadog/tracing/contrib/rack/integration_test_spec.rb b/spec/datadog/tracing/contrib/rack/integration_test_spec.rb index 490d18a497d..fbd7419936f 100644 --- a/spec/datadog/tracing/contrib/rack/integration_test_spec.rb +++ b/spec/datadog/tracing/contrib/rack/integration_test_spec.rb @@ -18,6 +18,8 @@ end end + after { Datadog.registry[:rack].reset_configuration! } + context 'for an application' do let(:app) do app_routes = routes @@ -101,10 +103,24 @@ it_behaves_like 'a rack GET 200 span' - it do - expect(span.get_tag('http.url')).to eq('/success/') - expect(span.get_tag('http.base_url')).to eq('http://example.org') - expect(span).to be_root_span + context 'and default quantization' do + let(:rack_options) { { quantize: {} } } + + it do + expect(span.get_tag('http.url')).to eq('/success/') + expect(span.get_tag('http.base_url')).to eq('http://example.org') + expect(span).to be_root_span + end + end + + context 'and quantization activated for URL base' do + let(:rack_options) { { quantize: { base: :show } } } + + it do + expect(span.get_tag('http.url')).to eq('http://example.org/success/') + expect(span.get_tag('http.base_url')).to be_nil + expect(span).to be_root_span + end end it { expect(trace.resource).to eq('GET 200') } @@ -113,14 +129,28 @@ context 'with query string parameters' do let(:route) { '/success?foo=bar' } - it_behaves_like 'a rack GET 200 span' + context 'and default quantization' do + let(:rack_options) { { quantize: {} } } - it do - # Since REQUEST_URI isn't available in Rack::Test by default (comes from WEBrick/Puma) - # it reverts to PATH_INFO, which doesn't have query string parameters. - expect(span.get_tag('http.url')).to eq('/success') - expect(span.get_tag('http.base_url')).to eq('http://example.org') - expect(span).to be_root_span + it_behaves_like 'a rack GET 200 span' + + it do + expect(span.get_tag('http.url')).to eq('/success?foo') + expect(span.get_tag('http.base_url')).to eq('http://example.org') + expect(span).to be_root_span + end + end + + context 'and quantization activated for the query' do + let(:rack_options) { { quantize: { query: { show: ['foo'] } } } } + + it_behaves_like 'a rack GET 200 span' + + it do + expect(span.get_tag('http.url')).to eq('/success?foo=bar') + expect(span.get_tag('http.base_url')).to eq('http://example.org') + expect(span).to be_root_span + end end end @@ -128,7 +158,7 @@ subject(:response) { get '/success?foo=bar', {}, 'REQUEST_URI' => '/success?foo=bar' } context 'and default quantization' do - let(:rack_options) { super().merge(quantize: {}) } + let(:rack_options) { { quantize: {} } } it_behaves_like 'a rack GET 200 span' @@ -143,7 +173,7 @@ end context 'and quantization activated for the query' do - let(:rack_options) { super().merge(quantize: { query: { show: ['foo'] } }) } + let(:rack_options) { { quantize: { query: { show: ['foo'] } } } } it_behaves_like 'a rack GET 200 span' diff --git a/spec/datadog/tracing/contrib/sinatra/multi_app_spec.rb b/spec/datadog/tracing/contrib/sinatra/multi_app_spec.rb index e6067ae822b..7189465c392 100644 --- a/spec/datadog/tracing/contrib/sinatra/multi_app_spec.rb +++ b/spec/datadog/tracing/contrib/sinatra/multi_app_spec.rb @@ -74,7 +74,7 @@ spans.each do |span| if span.name == Datadog::Tracing::Contrib::Rack::Ext::SPAN_REQUEST expect(span.resource).to eq('GET /endpoint') - expect(span.get_tag(Datadog::Tracing::Metadata::Ext::HTTP::TAG_URL)).to eq('/endpoint') + expect(span.get_tag(Datadog::Tracing::Metadata::Ext::HTTP::TAG_URL)).to eq('/one/endpoint') next end @@ -95,7 +95,7 @@ spans.each do |span| if span.name == Datadog::Tracing::Contrib::Rack::Ext::SPAN_REQUEST expect(span.resource).to eq('GET /endpoint') - expect(span.get_tag(Datadog::Tracing::Metadata::Ext::HTTP::TAG_URL)).to eq('/endpoint') + expect(span.get_tag(Datadog::Tracing::Metadata::Ext::HTTP::TAG_URL)).to eq('/two/endpoint') next end @@ -120,7 +120,7 @@ spans.each do |span| if span.name == Datadog::Tracing::Contrib::Rack::Ext::SPAN_REQUEST expect(span.resource).to eq('GET /one/endpoint') - expect(span.get_tag(Datadog::Tracing::Metadata::Ext::HTTP::TAG_URL)).to eq('/endpoint') + expect(span.get_tag(Datadog::Tracing::Metadata::Ext::HTTP::TAG_URL)).to eq('/one/endpoint') next end @@ -141,7 +141,7 @@ spans.each do |span| if span.name == Datadog::Tracing::Contrib::Rack::Ext::SPAN_REQUEST expect(span.resource).to eq('GET /two/endpoint') - expect(span.get_tag(Datadog::Tracing::Metadata::Ext::HTTP::TAG_URL)).to eq('/endpoint') + expect(span.get_tag(Datadog::Tracing::Metadata::Ext::HTTP::TAG_URL)).to eq('/two/endpoint') next end diff --git a/spec/datadog/tracing/contrib/utils/quantization/http_spec.rb b/spec/datadog/tracing/contrib/utils/quantization/http_spec.rb index 228c1f7db00..3ad41c4a124 100644 --- a/spec/datadog/tracing/contrib/utils/quantization/http_spec.rb +++ b/spec/datadog/tracing/contrib/utils/quantization/http_spec.rb @@ -47,12 +47,24 @@ it { is_expected.to eq('http://example.com/path') } end - context 'with show: :all' do + context 'with fragment: :show' do let(:options) { { fragment: :show } } it { is_expected.to eq('http://example.com/path?category_id&sort_by#featured') } end + context 'with base: :show' do + let(:options) { { base: :show } } + + it { is_expected.to eq('http://example.com/path?category_id&sort_by') } + end + + context 'with base: :exclude' do + let(:options) { { base: :exclude } } + + it { is_expected.to eq('/path?category_id&sort_by') } + end + context 'with Unicode characters' do # URLs do not permit unencoded non-ASCII characters in the URL. let(:url) { 'http://example.com/path?繋がってて' }