DataDog · p-datadog · Nov 12, 2024 · Nov 7, 2024 · Nov 7, 2024 · Nov 7, 2024
@@ -73,7 +73,11 @@ def run_benchmark
     settings = Datadog.configuration
     # We benchmark untargeted and targeted trace points; untargeted ones
     # are prohibited by default, permit them.
-    settings.dynamic_instrumentation.untargeted_trace_points = true
+    begin
+      settings.dynamic_instrumentation.internal.untargeted_trace_points = true
+    rescue NoMethodError
+      settings.dynamic_instrumentation.untargeted_trace_points = true
+    end
     redactor = Datadog::DI::Redactor.new(settings)
     serializer = Datadog::DI::Serializer.new(settings, redactor)
     logger = Logger.new(STDERR)

@@ -1,5 +1,7 @@
 # frozen_string_literal: true
 
+# rubocop:disable Lint/AssignmentInCondition
+
 module Datadog
   module DI
     # Tracks loaded Ruby code by source file and maintains a map from
@@ -77,6 +79,23 @@ def start
                 registry[path] = tp.instruction_sequence
               end
             end
+          # Since this method normally is called from customer applications,
+          # rescue any exceptions that might not be handled to not break said
+          # customer applications.
+          rescue => exc
+            # TODO we do not have DI.component defined yet, remove steep:ignore
+            # before release.
+            if component = DI.component # steep:ignore
+              raise if component.settings.dynamic_instrumentation.internal.propagate_all_exceptions
+              component.logger.warn("Unhandled exception in script_compiled trace point: #{exc.class}: #{exc}")
+              component.telemetry&.report(exc, description: "Unhandled exception in script_compiled trace point")
+              # TODO test this path
+            else
+              # If we don't have a component, we cannot log anything properly.
+              # Do not just print a warning to avoid spamming customer logs.
+              # Don't reraise the exception either.
+              # TODO test this path
+            end
           end
         end
       end
@@ -112,15 +131,18 @@ def active?
       def iseqs_for_path_suffix(suffix)
         registry_lock.synchronize do
           exact = registry[suffix]
-          return [exact] if exact
+          return [suffix, exact] if exact
 
           inexact = []
           registry.each do |path, iseq|
             if Utils.path_matches_suffix?(path, suffix)
-              inexact << iseq
+              inexact << [path, iseq]
             end
           end
-          inexact
+          if inexact.length > 1
+            raise Error::MultiplePathsMatch, "Multiple paths matched requested suffix"
+          end
+          inexact.first
         end
       end
 
@@ -164,3 +186,5 @@ def clear
     end
   end
 end
+
+# rubocop:enable Lint/AssignmentInCondition
@@ -12,8 +12,6 @@ def self.extended(base)
 
         def self.add_settings!(base)
           base.class_eval do
-            # The setting has "internal" prefix to prevent it from being
-            # prematurely turned on by customers.
             settings :dynamic_instrumentation do
               option :enabled do |o|
                 o.type :bool
@@ -26,48 +24,6 @@ def self.add_settings!(base)
                 o.default false
               end
 
-              # This option instructs dynamic instrumentation to use
-              # untargeted trace points when installing line probes and
-              # code tracking is not active.
-              # WARNING: untargeted trace points carry a massive performance
-              # penalty for the entire file in which a line probe is placed.
-              #
-              # If this option is set to false, which is the default,
-              # dynamic instrumentation will add probes that reference
-              # unknown files to the list of pending probes, and when
-              # the respective files are loaded, the line probes will be
-              # installed using targeted trace points. If the file in
-              # question is already loaded when the probe is received
-              # (for example, it is in a third-party library loaded during
-              # application boot), and code tracking was not active when
-              # the file was loaded, such files will not be instrumentable
-              # via line probes.
-              #
-              # If this option is set to true
-              #
-              # activated, DI will in
-              # activated or because the files being targeted have beenIf true and code tracking is not enabled, dynamic instrumentation
-              # will use untargeted trace points.
-              # If false and code tracking is not enabled, dynamic
-              # instrumentation will not instrument any files loaded
-              # WARNING: these trace points will greatly degrade performance
-              # of all code in the instrumented files.
-              option :untargeted_trace_points do |o|
-                o.type :bool
-                o.default false
-              end
-
-              # If true, all of the catch-all rescue blocks in DI
-              # will propagate the exceptions onward.
-              # WARNING: for internal Datadog use only - this will break
-              # the DI product and potentially the library in general in
-              # a multitude of ways, cause resource leakage, permanent
-              # performance decreases, etc.
-              option :propagate_all_exceptions do |o|
-                o.type :bool
-                o.default false
-              end
-
               # An array of variable and key names to redact in addition to
               # the built-in list of identifiers.
               #
@@ -154,6 +110,75 @@ def self.add_settings!(base)
                 o.type :int
                 o.default 20
               end
+
+              # Settings in the 'internal' group are for internal Datadog
+              # use only, and are needed to test dynamic instrumentation or
+              # experiment with features not released to customers.
+              settings :internal do
+                # This option instructs dynamic instrumentation to use
+                # untargeted trace points when installing line probes and
+                # code tracking is not active.
+                # WARNING: untargeted trace points carry a massive performance
+                # penalty for the entire file in which a line probe is placed.
+                #
+                # If this option is set to false, which is the default,
+                # dynamic instrumentation will add probes that reference
+                # unknown files to the list of pending probes, and when
+                # the respective files are loaded, the line probes will be
+                # installed using targeted trace points. If the file in
+                # question is already loaded when the probe is received
+                # (for example, it is in a third-party library loaded during
+                # application boot), and code tracking was not active when
+                # the file was loaded, such files will not be instrumentable
+                # via line probes.
+                #
+                # If this option is set to true, dynamic instrumentation will
+                # install untargeted trace points for all line probes,
+                # regardless of whether the referenced file is loaded.
+                # This permits instrumenting code which was loaded prior to
+                # code tracking being activated and instrumenting lines when
+                # code tracking is not activated at all. However, untargeted
+                # trace points are extremely slow and will greatly degrade
+                # performance of *all* code executed while they are installed,
+                # not just the instrumentation target.
+                option :untargeted_trace_points do |o|
+                  o.type :bool
+                  o.default false
+                end
+
+                # If true, all of the catch-all rescue blocks in DI
+                # will propagate the exceptions onward.
+                # WARNING: for internal Datadog use only - this will break
+                # the DI product and potentially the library in general in
+                # a multitude of ways, cause resource leakage, permanent
+                # performance decreases, etc.
+                option :propagate_all_exceptions do |o|
+                  o.type :bool
+                  o.default false
+                end
+
+                # Minimum interval, in seconds, between probe status and
+                # snapshot submissions to the agent. Probe notifier worker will
+                # batch together payloads submitted during each interval.
+                # A longer interval reduces the overhead imposed by dynamic
+                # instrumentation on the application, but also increases the
+                # time when application code cannot run (when the batches are
+                # being sent out by the probe notifier worker) and creates a
+                # possibility of dropping payloads if the queue gets too long.
+                option :min_send_interval do |o|
+                  o.type :int
+                  o.default 3
+                end
+
+                # Enable dynamic instrumentation in development environments.
+                # Currently DI does not fully implement support for code
+                # unloading and reloading, and is not supported in
+                # non-production environments.
+                option :development do |o|
+                  o.type :bool
+                  o.default false
+                end
+              end
             end
           end
         end

@@ -26,6 +26,23 @@ class AgentCommunicationError < Error
       # that does not in fact exist anywhere (e.g. due to a misspelling).
       class DITargetNotDefined < Error
       end
+
+      # Raised when trying to install a probe whose installation failed
+      # earlier in the same process. This exception should contain the
+      # original exception report from initial installation attempt.
+      class ProbePreviouslyFailed < Error
+      end
+
+      # Raised when installing a line probe and multiple files match the
+      # specified path suffix.
+      # A probe must be installed into one file only, since UI only
+      # supports one instrumented location for a probe.
+      # If multiple files match, UI cannot properly render the data from
+      # all of them, and arbitrarily choosing one file may be not what the
+      # user intended. Instrumentation will fail when multiple files match
+      # and the user will need to make their suffix more precise.
+      class MultiplePathsMatch < Error
+      end
     end
   end
 end
@@ -54,10 +54,11 @@ module DI
     #
     # @api private
     class Instrumenter
-      def initialize(settings, serializer, logger, code_tracker: nil)
+      def initialize(settings, serializer, logger, code_tracker: nil, telemetry: nil)
         @settings = settings
         @serializer = serializer
         @logger = logger
+        @telemetry = telemetry
         @code_tracker = code_tracker
 
         @lock = Mutex.new
@@ -66,6 +67,7 @@ def initialize(settings, serializer, logger, code_tracker: nil)
       attr_reader :settings
       attr_reader :serializer
       attr_reader :logger
+      attr_reader :telemetry
       attr_reader :code_tracker
 
       # This is a substitute for Thread::Backtrace::Location
@@ -172,12 +174,12 @@ def hook_line(probe, &block)
         # we use mock objects and the methods may be mocked with
         # individual invocations, yielding different return values on
         # different calls to the same method.
-        permit_untargeted_trace_points = settings.dynamic_instrumentation.untargeted_trace_points
+        permit_untargeted_trace_points = settings.dynamic_instrumentation.internal.untargeted_trace_points
 
         iseq = nil
         if code_tracker
-          iseq = code_tracker.iseqs_for_path_suffix(probe.file).first # steep:ignore
-          unless iseq
+          ret = code_tracker.iseqs_for_path_suffix(probe.file) # steep:ignore
+          unless ret
             if permit_untargeted_trace_points
               # Continue withoout targeting the trace point.
               # This is going to cause a serious performance penalty for
@@ -204,6 +206,10 @@ def hook_line(probe, &block)
           raise Error::DITargetNotDefined, "File not in code tracker registry: #{probe.file}"
         end
 
+        if ret
+          actual_path, iseq = ret
+        end
+
         # If trace point is not targeted, we only need one trace point per file.
         # Creating a trace point for each probe does work but the performance
         # penalty will be taken for each trace point defined in the file.
@@ -217,18 +223,26 @@ def hook_line(probe, &block)
         # this optimization just yet and create a trace point for each probe.
 
         tp = TracePoint.new(:line) do |tp|
-          # If trace point is not targeted, we must verify that the invocation
-          # is the file & line that we want, because untargeted trace points
-          # are invoked for *each* line of Ruby executed.
-          if iseq || tp.lineno == probe.line_no && probe.file_matches?(tp.path)
-            if rate_limiter.nil? || rate_limiter.allow?
-              # & is to stop steep complaints, block is always present here.
-              block&.call(probe: probe, trace_point: tp, caller_locations: caller_locations)
+          begin
+            # If trace point is not targeted, we must verify that the invocation
+            # is the file & line that we want, because untargeted trace points
+            # are invoked for *each* line of Ruby executed.
+            if iseq || tp.lineno == probe.line_no && probe.file_matches?(tp.path)
+              if rate_limiter.nil? || rate_limiter.allow?
+                # & is to stop steep complaints, block is always present here.
+                block&.call(probe: probe, trace_point: tp, caller_locations: caller_locations)
+              end
             end
+          rescue => exc
+            raise if settings.dynamic_instrumentation.internal.propagate_all_exceptions
+            logger.warn("Unhandled exception in line trace point: #{exc.class}: #{exc}")
+            telemetry&.report(exc, description: "Unhandled exception in line trace point")
+            # TODO test this path
           end
         rescue => exc
           raise if settings.dynamic_instrumentation.propagate_all_exceptions
           logger.warn("Unhandled exception in line trace point: #{exc.class}: #{exc}")
+          telemetry&.report(exc, description: "Unhandled exception in line trace point")
           # TODO test this path
         end
 
@@ -244,6 +258,8 @@ def hook_line(probe, &block)
           end
 
           probe.instrumentation_trace_point = tp
+          # actual_path could be nil if we don't use targeted trace points.
+          probe.instrumented_path = actual_path
 
           if iseq
             tp.enable(target: iseq, target_line: line_no)

@@ -71,6 +71,8 @@ def initialize(id:, type:,
 
         @rate_limit = rate_limit || (@capture_snapshot ? 1 : 5000)
         @rate_limiter = Datadog::Core::TokenBucket.new(@rate_limit)
+
+        @emitting_notified = false
       end
 
       attr_reader :id
@@ -157,6 +159,16 @@ def file_matches?(path)
       # Line trace point for line probes. Normally this would be a targeted
       # trace point.
       attr_accessor :instrumentation_trace_point
+
+      # Actual path to the file instrumented by the probe, for line probes,
+      # when code tracking is available and line trace point is targeted.
+      # For untargeted line trace points instrumented path will be nil.
+      attr_accessor :instrumented_path
+
+      attr_writer :emitting_notified
+      def emitting_notified?
+        !!@emitting_notified
+      end
     end
   end
 end