triton-inference-server · oandreeva-nv · Jul 5, 2024 · Feb 27, 2023 · Mar 8, 2023 · Mar 8, 2023
diff --git a/docs/user_guide/trace.md b/docs/user_guide/trace.md
@@ -623,6 +623,22 @@ Then, you can specify headers in the `infer` method. For references, please
 look at our [tests](https://github.com/triton-inference-server/server/blob/main/qa/L0_trace/opentelemetry_unittest.py),
 e.g. [http context propagation test](https://github.com/triton-inference-server/server/blob/main/qa/L0_trace/opentelemetry_unittest.py#L494-L508).
 
+### Custom Backend Tracing
+
+In the case when a custom activity needs to be traced in the backend, please
+use `TRITONSERVER_InferenceTraceReportActivity` API. For examples, please
+refer to the [identity backend](https://github.com/triton-inference-server/identity_backend/blob/main/src/identity.cc).
+
+In `openTelemetry` trace mode, if one wishes to start a new span, make sure
+that the name of your custom activity ends with `_START`. To end the new span,
+make sure that corresponding activity ends with `_END`. For example, in the
+identity backend, we start a `CUSTOM_ACTIVITY` span, by [reporting](https://github.com/triton-inference-server/identity_backend/blob/oandreeva-custom-trace-activity/src/identity.cc#L872-L876)
+`CUSTOM_ACTIVITY_START` event; and we close this span by [reporting](https://github.com/triton-inference-server/identity_backend/blob/oandreeva-custom-trace-activity/src/identity.cc#L880-L883)
+`CUSTOM_ACTIVITY_END` event.
+
+Please note, that it is user's responsibility to make sure that all custom started
+spans are properly ended.
+
 ### Limitations
 
 - OpenTelemetry trace mode is not supported on Windows systems.

diff --git a/qa/L0_trace/opentelemetry_unittest.py b/qa/L0_trace/opentelemetry_unittest.py
@@ -115,12 +115,14 @@ def setUp(self):
         self.bls_model_name = "bls_simple"
         self.trace_context_model = "trace_context"
         self.non_decoupled_model_name_ = "repeat_int32"
+        self.identity_model = "custom_identity_int32"
         self.test_models = [
             self.simple_model_name,
             self.ensemble_model_name,
             self.bls_model_name,
             self.non_decoupled_model_name_,
             self.cancel_queue_model_name,
+            self.identity_model,
         ]
         self.root_span = "InferRequest"
         self._user_data = UserData()
@@ -219,6 +221,7 @@ def _check_events(self, span_name, events, is_cancelled):
             self.assertFalse(
                 all(entry in events for entry in root_events_http + root_events_grpc)
             )
+            self.assertEquals(len(events), len(compute_events))
 
         elif span_name == self.root_span:
             # Check that root span has INFER_RESPONSE_COMPLETE, _RECV/_WAITREAD
@@ -230,23 +233,52 @@ def _check_events(self, span_name, events, is_cancelled):
             if "HTTP" in events:
                 self.assertTrue(all(entry in events for entry in root_events_http))
                 self.assertFalse(all(entry in events for entry in root_events_grpc))
+                self.assertEquals(len(events), len(root_events_http))
 
             elif "GRPC" in events:
                 self.assertTrue(all(entry in events for entry in root_events_grpc))
                 self.assertFalse(all(entry in events for entry in root_events_http))
+                self.assertEquals(len(events), len(root_events_grpc))
 
             if is_cancelled == False:
                 self.assertFalse(all(entry in events for entry in request_events))
                 self.assertFalse(all(entry in events for entry in compute_events))
 
         elif span_name in self.test_models:
+            if span_name == self.identity_model:
+                request_events.append("CUSTOM_SINGLE_ACTIVITY")
             # Check that all request related events (and only them)
             # are recorded in request span
             self.assertTrue(all(entry in events for entry in request_events))
             self.assertFalse(
                 all(entry in events for entry in root_events_http + root_events_grpc)
             )
             self.assertFalse(all(entry in events for entry in compute_events))
+            self.assertEquals(len(events), len(request_events))
+
+        elif span_name.startswith("CUSTOM_ACTIVITY"):
+            custom_activity_events = []
+            if len(span_name) > len("CUSTOM_ACTIVITY"):
+                custom_activity_events.append(str(span_name + "_START"))
+                custom_activity_events.append(str(span_name + "_END"))
+                # Check `custom_identity_int32` config file,
+                # parameter `single_activity_frequency` identifies
+                # which custom spans contain "CUSTOM_SINGLE_ACTIVITY" event
+                if int(span_name[-1]) % 3 == 0:
+                    custom_activity_events.append("CUSTOM_SINGLE_ACTIVITY")
+            else:
+                custom_activity_events = [
+                    "CUSTOM_ACTIVITY_START",
+                    "CUSTOM_ACTIVITY_END",
+                ]
+
+            self.assertTrue(
+                all(entry in events for entry in custom_activity_events),
+                "Span " + span_name,
+            )
+            self.assertEquals(
+                len(events), len(custom_activity_events), "Span " + span_name
+            )
 
     def _test_resource_attributes(self, attributes):
         """
@@ -487,6 +519,52 @@ def _test_simple_trace(self, headers=None):
             expected_parent_span_dict=expected_parent_span_dict,
         )
 
+    def _test_custom_identity_trace(self, headers=None):
+        """
+        Helper function, that specifies expected parameters to evaluate trace,
+        collected from running 1 inference request for `custom_identity_int32`
+        model.
+        Number of custom spans defined by the identity backend.
+        `CUSTOM_ACTIVITY` span will always be there,
+        `CUSTOM_ACTIVITY<N>` defined by `config.pbtxt parameters`.
+        """
+        expected_number_of_spans = 10
+        expected_counts = dict(
+            {
+                "compute": 1,
+                self.identity_model: 1,
+                self.root_span: 1,
+                "CUSTOM_ACTIVITY": 1,
+                "CUSTOM_ACTIVITY0": 1,
+                "CUSTOM_ACTIVITY1": 1,
+                "CUSTOM_ACTIVITY2": 1,
+                "CUSTOM_ACTIVITY3": 1,
+                "CUSTOM_ACTIVITY4": 1,
+                "CUSTOM_ACTIVITY5": 1,
+            }
+        )
+        expected_parent_span_dict = dict(
+            {
+                "InferRequest": ["custom_identity_int32"],
+                "custom_identity_int32": [
+                    "CUSTOM_ACTIVITY",
+                    "CUSTOM_ACTIVITY0",
+                    "compute",
+                ],
+                "CUSTOM_ACTIVITY0": ["CUSTOM_ACTIVITY1"],
+                "CUSTOM_ACTIVITY1": ["CUSTOM_ACTIVITY2"],
+                "CUSTOM_ACTIVITY2": ["CUSTOM_ACTIVITY3"],
+                "CUSTOM_ACTIVITY3": ["CUSTOM_ACTIVITY4"],
+                "CUSTOM_ACTIVITY4": ["CUSTOM_ACTIVITY5"],
+            }
+        )
+        self._test_trace(
+            headers=headers,
+            expected_number_of_spans=expected_number_of_spans,
+            expected_counts=expected_counts,
+            expected_parent_span_dict=expected_parent_span_dict,
+        )
+
     def _test_non_decoupled_trace(self, headers=None):
         """
         Helper function, that collects trace for non decoupled model and verifies it.
@@ -944,6 +1022,33 @@ def test_trace_context_exposed_to_pbe(self):
         context_pattern = re.compile(r"\d{2}-[0-9a-f]{32}-[0-9a-f]{16}-\d{2}")
         self.assertIsNotNone(re.match(context_pattern, context["traceparent"]))
 
+    def test_custom_backend_tracing(self):
+        """
+        Tests custom activities reported from identity backend.
+        """
+        input0_ = np.array([[4]], dtype=np.int32)
+        with httpclient.InferenceServerClient("localhost:8000", verbose=True) as client:
+            inputs = []
+            inputs.append(httpclient.InferInput("INPUT0", [1, 1], "INT32"))
+            inputs[0].set_data_from_numpy(input0_)
+            client.infer(self.identity_model, inputs=inputs)
+        self._test_custom_identity_trace()
+
+    def test_custom_backend_tracing_context_propagation(self):
+        """
+        Tests custom activities reported from identity backend.
+        """
+        input0_ = np.array([[4]], dtype=np.int32)
+        with httpclient.InferenceServerClient("localhost:8000", verbose=True) as client:
+            inputs = []
+            inputs.append(httpclient.InferInput("INPUT0", [1, 1], "INT32"))
+            inputs[0].set_data_from_numpy(input0_)
+            client.infer(
+                self.identity_model, inputs=inputs, headers=self.client_headers
+            )
+
+        self._test_custom_identity_trace(headers=self.client_headers)
+
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/qa/L0_trace/test.sh b/qa/L0_trace/test.sh
@@ -97,6 +97,16 @@ cp -r $DATADIR/$MODELBASE $MODELSDIR/simple && \
 cp -r ../L0_decoupled/models/repeat_int32 $MODELSDIR
 sed -i "s/decoupled: True/decoupled: False/" $MODELSDIR/repeat_int32/config.pbtxt
 
+# set up identity model
+mkdir -p $MODELSDIR/custom_identity_int32/1 && (cd $MODELSDIR/custom_identity_int32 && \
+    echo 'name: "custom_identity_int32"' >> config.pbtxt && \
+    echo 'backend: "identity"' >> config.pbtxt && \
+    echo 'max_batch_size: 1024' >> config.pbtxt && \
+    echo -e 'input [{ name: "INPUT0" \n data_type: TYPE_INT32 \n dims: [ -1 ] }]' >> config.pbtxt && \
+    echo -e 'output [{ name: "OUTPUT0" \n data_type: TYPE_INT32 \n dims: [ -1 ] }]' >> config.pbtxt && \
+    echo 'instance_group [{ kind: KIND_CPU }]' >> config.pbtxt && \
+    echo -e 'parameters [{ key: "execute_delay_ms" \n value: { string_value: "500" } }, { key: "enable_custom_tracing" \n value: { string_value: "true" } }]' >> config.pbtxt)
+
 RET=0
 
 # Helpers =======================================
@@ -742,6 +752,60 @@ wait $SERVER_PID
 
 set +e
 
+# Custom backend tracing
+SERVER_ARGS="--model-control-mode=explicit --model-repository=$MODELSDIR
+            --load-model=custom_identity_int32 --trace-config=level=TIMESTAMPS \
+            --trace-config=triton,file=custom_tracing_triton.log \
+            --trace-config=rate=1 --trace-config=mode=triton"
+SERVER_LOG="./custom_backend_tracing.log"
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+# Send 1 inference request, should expect 3 custom activities:
+# CUSTOM_SINGLE_ACTIVITY, CUSTOM_ACTIVITY_START, CUSTOM_ACTIVITY_END
+rm -f ./curl.out
+data='{"inputs":[{"name":"INPUT0","datatype":"INT32","shape":[1,1],"data":[4]}]}'
+set +e
+code=`curl -s -w %{http_code} -o ./curl.out -X POST localhost:8000/v2/models/custom_identity_int32/infer -d ${data}`
+set -e
+if [ "$code" != "200" ]; then
+    cat ./curl.out
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+set +e
+
+
+$TRACE_SUMMARY -t custom_tracing_triton.log > summary_custom_tracing_triton.log
+
+if [ `grep -c "CUSTOM_SINGLE_ACTIVITY" summary_custom_tracing_triton.log` != "1" ]; then
+    cat summary_custom_tracing_triton.log
+    echo -e "\n***\n*** Test Failed: Unexpected number of traced "CUSTOM_ACTIVITY" events.\n***"
+    RET=1
+fi
+
+if [ `grep -c "CUSTOM_ACTIVITY_START" summary_custom_tracing_triton.log` != "1" ]; then
+    cat summary_custom_tracing_triton.log
+    echo -e "\n***\n*** Test Failed: Unexpected number of traced "CUSTOM_ACTIVITY_START" events.\n***"
+    RET=1
+fi
+
+if [ `grep -c "CUSTOM_ACTIVITY_END" summary_custom_tracing_triton.log` != "1" ]; then
+    cat summary_custom_tracing_triton.log
+    echo -e "\n***\n*** Test Failed: Unexpected number of traced "CUSTOM_ACTIVITY_END" events.\n***"
+    RET=1
+fi
+
 # Check opentelemetry trace exporter sends proper info.
 # A helper python script starts listening on $OTLP_PORT, where
 # OTLP exporter sends traces.
@@ -758,7 +822,7 @@ rm collected_traces.json*
 # Unittests then check that produced spans have expected format and events
 OPENTELEMETRY_TEST=opentelemetry_unittest.py
 OPENTELEMETRY_LOG="opentelemetry_unittest.log"
-EXPECTED_NUM_TESTS="17"
+EXPECTED_NUM_TESTS="19"
 
 # Set up repo and args for SageMaker
 export SAGEMAKER_TRITON_DEFAULT_MODEL_NAME="simple"
@@ -772,10 +836,20 @@ cp -r $DATADIR/$MODELBASE/* ${MODEL_PATH} && \
 # Add model to test trace context exposed to python backend
 mkdir -p $MODELSDIR/trace_context/1 && cp ./trace_context.py $MODELSDIR/trace_context/1/model.py
 
+# set up identity model
+rm -r ${MODELSDIR}/custom_identity_int32
+mkdir -p $MODELSDIR/custom_identity_int32/1 && (cd $MODELSDIR/custom_identity_int32 && \
+    echo 'name: "custom_identity_int32"' >> config.pbtxt && \
+    echo 'backend: "identity"' >> config.pbtxt && \
+    echo 'max_batch_size: 1024' >> config.pbtxt && \
+    echo -e 'input [{ name: "INPUT0" \n data_type: TYPE_INT32 \n dims: [ -1 ] }]' >> config.pbtxt && \
+    echo -e 'output [{ name: "OUTPUT0" \n data_type: TYPE_INT32 \n dims: [ -1 ] }]' >> config.pbtxt && \
+    echo 'instance_group [{ kind: KIND_CPU }]' >> config.pbtxt && \
+    echo -e 'parameters [{ key: "execute_delay_ms" \n value: { string_value: "500" } }, { key: "enable_custom_tracing" \n value: { string_value: "true" } }, { key: "nested_span_count" \n value: { string_value: "6" } }, { key: "single_activity_frequency" \n value: { string_value: "3" } }]' >> config.pbtxt)
 
 SERVER_ARGS="--allow-sagemaker=true --model-control-mode=explicit \
                 --load-model=simple --load-model=ensemble_add_sub_int32_int32_int32 \
-                --load-model=repeat_int32 \
+                --load-model=repeat_int32 --load-model=custom_identity_int32\
                 --load-model=input_all_required \
                 --load-model=dynamic_batch \
                 --load-model=bls_simple --trace-config=level=TIMESTAMPS \
@@ -1164,5 +1238,4 @@ set -e
 kill $SERVER_PID
 wait $SERVER_PID
 set +e
-
 exit $RET