Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[feat]:Custom Backend Tracing #7403

Merged
merged 15 commits into from
Jul 5, 2024
16 changes: 16 additions & 0 deletions docs/user_guide/trace.md
Original file line number Diff line number Diff line change
Expand Up @@ -623,6 +623,22 @@ Then, you can specify headers in the `infer` method. For references, please
look at our [tests](https://github.com/triton-inference-server/server/blob/main/qa/L0_trace/opentelemetry_unittest.py),
e.g. [http context propagation test](https://github.com/triton-inference-server/server/blob/main/qa/L0_trace/opentelemetry_unittest.py#L494-L508).

### Custom Backend Tracing

In the case when a custom activity needs to be traced in the backend, please
use `TRITONSERVER_InferenceTraceReportActivity` API. For examples, please
refer to the [identity backend](https://github.com/triton-inference-server/identity_backend/blob/main/src/identity.cc).

In `openTelemetry` trace mode, if one wishes to start a new span, make sure
that the name of your custom activity ends with `_START`. To end the new span,
make sure that corresponding activity ends with `_END`. For example, in the
identity backend, we start a `CUSTOM_ACTIVITY` span, by [reporting](https://github.com/triton-inference-server/identity_backend/blob/oandreeva-custom-trace-activity/src/identity.cc#L872-L876)
`CUSTOM_ACTIVITY_START` event; and we close this span by [reporting](https://github.com/triton-inference-server/identity_backend/blob/oandreeva-custom-trace-activity/src/identity.cc#L880-L883)
`CUSTOM_ACTIVITY_END` event.

Please note, that it is user's responsibility to make sure that all custom started
spans are properly ended.

### Limitations

- OpenTelemetry trace mode is not supported on Windows systems.
Expand Down
105 changes: 105 additions & 0 deletions qa/L0_trace/opentelemetry_unittest.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,12 +115,14 @@ def setUp(self):
self.bls_model_name = "bls_simple"
self.trace_context_model = "trace_context"
self.non_decoupled_model_name_ = "repeat_int32"
self.identity_model = "custom_identity_int32"
self.test_models = [
self.simple_model_name,
self.ensemble_model_name,
self.bls_model_name,
self.non_decoupled_model_name_,
self.cancel_queue_model_name,
self.identity_model,
]
self.root_span = "InferRequest"
self._user_data = UserData()
Expand Down Expand Up @@ -219,6 +221,7 @@ def _check_events(self, span_name, events, is_cancelled):
self.assertFalse(
all(entry in events for entry in root_events_http + root_events_grpc)
)
self.assertEquals(len(events), len(compute_events))

elif span_name == self.root_span:
# Check that root span has INFER_RESPONSE_COMPLETE, _RECV/_WAITREAD
Expand All @@ -230,23 +233,52 @@ def _check_events(self, span_name, events, is_cancelled):
if "HTTP" in events:
self.assertTrue(all(entry in events for entry in root_events_http))
self.assertFalse(all(entry in events for entry in root_events_grpc))
self.assertEquals(len(events), len(root_events_http))

elif "GRPC" in events:
self.assertTrue(all(entry in events for entry in root_events_grpc))
self.assertFalse(all(entry in events for entry in root_events_http))
self.assertEquals(len(events), len(root_events_grpc))

if is_cancelled == False:
self.assertFalse(all(entry in events for entry in request_events))
self.assertFalse(all(entry in events for entry in compute_events))

elif span_name in self.test_models:
if span_name == self.identity_model:
request_events.append("CUSTOM_SINGLE_ACTIVITY")
# Check that all request related events (and only them)
# are recorded in request span
self.assertTrue(all(entry in events for entry in request_events))
self.assertFalse(
all(entry in events for entry in root_events_http + root_events_grpc)
)
self.assertFalse(all(entry in events for entry in compute_events))
self.assertEquals(len(events), len(request_events))

elif span_name.startswith("CUSTOM_ACTIVITY"):
custom_activity_events = []
if len(span_name) > len("CUSTOM_ACTIVITY"):
custom_activity_events.append(str(span_name + "_START"))
custom_activity_events.append(str(span_name + "_END"))
# Check `custom_identity_int32` config file,
# parameter `single_activity_frequency` identifies
# which custom spans contain "CUSTOM_SINGLE_ACTIVITY" event
if int(span_name[-1]) % 3 == 0:
custom_activity_events.append("CUSTOM_SINGLE_ACTIVITY")
else:
custom_activity_events = [
"CUSTOM_ACTIVITY_START",
"CUSTOM_ACTIVITY_END",
]

self.assertTrue(
all(entry in events for entry in custom_activity_events),
"Span " + span_name,
)
self.assertEquals(
len(events), len(custom_activity_events), "Span " + span_name
)

def _test_resource_attributes(self, attributes):
"""
Expand Down Expand Up @@ -487,6 +519,52 @@ def _test_simple_trace(self, headers=None):
expected_parent_span_dict=expected_parent_span_dict,
)

def _test_custom_identity_trace(self, headers=None):
"""
Helper function, that specifies expected parameters to evaluate trace,
collected from running 1 inference request for `custom_identity_int32`
model.
Number of custom spans defined by the identity backend.
`CUSTOM_ACTIVITY` span will always be there,
`CUSTOM_ACTIVITY<N>` defined by `config.pbtxt parameters`.
"""
expected_number_of_spans = 10
expected_counts = dict(
{
"compute": 1,
self.identity_model: 1,
self.root_span: 1,
"CUSTOM_ACTIVITY": 1,
"CUSTOM_ACTIVITY0": 1,
"CUSTOM_ACTIVITY1": 1,
"CUSTOM_ACTIVITY2": 1,
"CUSTOM_ACTIVITY3": 1,
"CUSTOM_ACTIVITY4": 1,
"CUSTOM_ACTIVITY5": 1,
}
)
expected_parent_span_dict = dict(
{
"InferRequest": ["custom_identity_int32"],
"custom_identity_int32": [
"CUSTOM_ACTIVITY",
"CUSTOM_ACTIVITY0",
"compute",
],
"CUSTOM_ACTIVITY0": ["CUSTOM_ACTIVITY1"],
"CUSTOM_ACTIVITY1": ["CUSTOM_ACTIVITY2"],
"CUSTOM_ACTIVITY2": ["CUSTOM_ACTIVITY3"],
"CUSTOM_ACTIVITY3": ["CUSTOM_ACTIVITY4"],
"CUSTOM_ACTIVITY4": ["CUSTOM_ACTIVITY5"],
}
)
self._test_trace(
headers=headers,
expected_number_of_spans=expected_number_of_spans,
expected_counts=expected_counts,
expected_parent_span_dict=expected_parent_span_dict,
)

def _test_non_decoupled_trace(self, headers=None):
"""
Helper function, that collects trace for non decoupled model and verifies it.
Expand Down Expand Up @@ -944,6 +1022,33 @@ def test_trace_context_exposed_to_pbe(self):
context_pattern = re.compile(r"\d{2}-[0-9a-f]{32}-[0-9a-f]{16}-\d{2}")
self.assertIsNotNone(re.match(context_pattern, context["traceparent"]))

def test_custom_backend_tracing(self):
"""
Tests custom activities reported from identity backend.
"""
input0_ = np.array([[4]], dtype=np.int32)
with httpclient.InferenceServerClient("localhost:8000", verbose=True) as client:
inputs = []
inputs.append(httpclient.InferInput("INPUT0", [1, 1], "INT32"))
inputs[0].set_data_from_numpy(input0_)
client.infer(self.identity_model, inputs=inputs)
self._test_custom_identity_trace()

def test_custom_backend_tracing_context_propagation(self):
"""
Tests custom activities reported from identity backend.
"""
input0_ = np.array([[4]], dtype=np.int32)
with httpclient.InferenceServerClient("localhost:8000", verbose=True) as client:
inputs = []
inputs.append(httpclient.InferInput("INPUT0", [1, 1], "INT32"))
inputs[0].set_data_from_numpy(input0_)
client.infer(
self.identity_model, inputs=inputs, headers=self.client_headers
)

self._test_custom_identity_trace(headers=self.client_headers)


if __name__ == "__main__":
unittest.main()
79 changes: 76 additions & 3 deletions qa/L0_trace/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,16 @@ cp -r $DATADIR/$MODELBASE $MODELSDIR/simple && \
cp -r ../L0_decoupled/models/repeat_int32 $MODELSDIR
sed -i "s/decoupled: True/decoupled: False/" $MODELSDIR/repeat_int32/config.pbtxt

# set up identity model
mkdir -p $MODELSDIR/custom_identity_int32/1 && (cd $MODELSDIR/custom_identity_int32 && \
echo 'name: "custom_identity_int32"' >> config.pbtxt && \
echo 'backend: "identity"' >> config.pbtxt && \
echo 'max_batch_size: 1024' >> config.pbtxt && \
echo -e 'input [{ name: "INPUT0" \n data_type: TYPE_INT32 \n dims: [ -1 ] }]' >> config.pbtxt && \
echo -e 'output [{ name: "OUTPUT0" \n data_type: TYPE_INT32 \n dims: [ -1 ] }]' >> config.pbtxt && \
echo 'instance_group [{ kind: KIND_CPU }]' >> config.pbtxt && \
echo -e 'parameters [{ key: "execute_delay_ms" \n value: { string_value: "500" } }, { key: "enable_custom_tracing" \n value: { string_value: "true" } }]' >> config.pbtxt)

RET=0

# Helpers =======================================
Expand Down Expand Up @@ -742,6 +752,60 @@ wait $SERVER_PID

set +e

# Custom backend tracing
SERVER_ARGS="--model-control-mode=explicit --model-repository=$MODELSDIR
--load-model=custom_identity_int32 --trace-config=level=TIMESTAMPS \
--trace-config=triton,file=custom_tracing_triton.log \
--trace-config=rate=1 --trace-config=mode=triton"
SERVER_LOG="./custom_backend_tracing.log"
run_server
if [ "$SERVER_PID" == "0" ]; then
echo -e "\n***\n*** Failed to start $SERVER\n***"
cat $SERVER_LOG
exit 1
fi

# Send 1 inference request, should expect 3 custom activities:
# CUSTOM_SINGLE_ACTIVITY, CUSTOM_ACTIVITY_START, CUSTOM_ACTIVITY_END
rm -f ./curl.out
data='{"inputs":[{"name":"INPUT0","datatype":"INT32","shape":[1,1],"data":[4]}]}'
set +e
code=`curl -s -w %{http_code} -o ./curl.out -X POST localhost:8000/v2/models/custom_identity_int32/infer -d ${data}`
set -e
if [ "$code" != "200" ]; then
cat ./curl.out
echo -e "\n***\n*** Test Failed\n***"
RET=1
fi

set -e

kill $SERVER_PID
wait $SERVER_PID

set +e


$TRACE_SUMMARY -t custom_tracing_triton.log > summary_custom_tracing_triton.log

if [ `grep -c "CUSTOM_SINGLE_ACTIVITY" summary_custom_tracing_triton.log` != "1" ]; then
cat summary_custom_tracing_triton.log
echo -e "\n***\n*** Test Failed: Unexpected number of traced "CUSTOM_ACTIVITY" events.\n***"
RET=1
fi

if [ `grep -c "CUSTOM_ACTIVITY_START" summary_custom_tracing_triton.log` != "1" ]; then
cat summary_custom_tracing_triton.log
echo -e "\n***\n*** Test Failed: Unexpected number of traced "CUSTOM_ACTIVITY_START" events.\n***"
RET=1
fi

if [ `grep -c "CUSTOM_ACTIVITY_END" summary_custom_tracing_triton.log` != "1" ]; then
cat summary_custom_tracing_triton.log
echo -e "\n***\n*** Test Failed: Unexpected number of traced "CUSTOM_ACTIVITY_END" events.\n***"
RET=1
fi

# Check opentelemetry trace exporter sends proper info.
# A helper python script starts listening on $OTLP_PORT, where
# OTLP exporter sends traces.
Expand All @@ -758,7 +822,7 @@ rm collected_traces.json*
# Unittests then check that produced spans have expected format and events
OPENTELEMETRY_TEST=opentelemetry_unittest.py
OPENTELEMETRY_LOG="opentelemetry_unittest.log"
EXPECTED_NUM_TESTS="17"
EXPECTED_NUM_TESTS="19"

# Set up repo and args for SageMaker
export SAGEMAKER_TRITON_DEFAULT_MODEL_NAME="simple"
Expand All @@ -772,10 +836,20 @@ cp -r $DATADIR/$MODELBASE/* ${MODEL_PATH} && \
# Add model to test trace context exposed to python backend
mkdir -p $MODELSDIR/trace_context/1 && cp ./trace_context.py $MODELSDIR/trace_context/1/model.py

# set up identity model
rm -r ${MODELSDIR}/custom_identity_int32
mkdir -p $MODELSDIR/custom_identity_int32/1 && (cd $MODELSDIR/custom_identity_int32 && \
echo 'name: "custom_identity_int32"' >> config.pbtxt && \
echo 'backend: "identity"' >> config.pbtxt && \
echo 'max_batch_size: 1024' >> config.pbtxt && \
echo -e 'input [{ name: "INPUT0" \n data_type: TYPE_INT32 \n dims: [ -1 ] }]' >> config.pbtxt && \
echo -e 'output [{ name: "OUTPUT0" \n data_type: TYPE_INT32 \n dims: [ -1 ] }]' >> config.pbtxt && \
echo 'instance_group [{ kind: KIND_CPU }]' >> config.pbtxt && \
echo -e 'parameters [{ key: "execute_delay_ms" \n value: { string_value: "500" } }, { key: "enable_custom_tracing" \n value: { string_value: "true" } }, { key: "nested_span_count" \n value: { string_value: "6" } }, { key: "single_activity_frequency" \n value: { string_value: "3" } }]' >> config.pbtxt)

SERVER_ARGS="--allow-sagemaker=true --model-control-mode=explicit \
--load-model=simple --load-model=ensemble_add_sub_int32_int32_int32 \
--load-model=repeat_int32 \
--load-model=repeat_int32 --load-model=custom_identity_int32\
--load-model=input_all_required \
--load-model=dynamic_batch \
--load-model=bls_simple --trace-config=level=TIMESTAMPS \
Expand Down Expand Up @@ -1164,5 +1238,4 @@ set -e
kill $SERVER_PID
wait $SERVER_PID
set +e

exit $RET
Loading
Loading