Adding the support tracing of child models invoked from a BLS model (#…

…6063) * Adding tests for bls * Added fixme, cleaned previous commit * Removed unused imports * Fixing commit tree: Refactor code, so that OTel tracer provider is initialized only once Added resource cmd option, testig Added docs * Clean up * Update docs/user_guide/trace.md Co-authored-by: Ryan McCormick <rmccormick@nvidia.com> * Revision * Update doc * Clean up * Added ostream exporter to OpenTelemetry for testing purposes; refactored trace tests * Added opentelemetry trace collector set up to tests; refactored otel exporter tests to use OTel collector instead of netcat * Revising according to comments * Added comment regarding 'parent_span_id' * Added permalink * Adjusted test --------- Co-authored-by: Ryan McCormick <rmccormick@nvidia.com>
triton-inference-server · Aug 7, 2023 · c3a2f6b · c3a2f6b
1 parent 36d80fe
commit c3a2f6b
Show file tree

Hide file tree

Showing 7 changed files with 532 additions and 153 deletions.
diff --git a/docs/user_guide/trace.md b/docs/user_guide/trace.md
@@ -427,11 +427,38 @@ The meaning of the trace timestamps is:
 
   * BACKEND_OUTPUT: The tensor in the response of a backend.
 
+## Tracing for BLS models
+
+Triton does not collect traces for child models invoked from
+[BLS](https://github.com/triton-inference-server/python_backend/tree/main#business-logic-scripting)
+models by default.
+
+To include child models into collected traces, user needs to provide the `trace`
+argument (as shown in the example below), when constructing an InferenceRequest object.
+This helps Triton associate the child model with the parent model's trace (`request.trace()`).
+
+```python
+
+import triton_python_backend_utils as pb_utils
+
+
+class TritonPythonModel:
+  ...
+    def execute(self, requests):
+      ...
+      for request in requests:
+        ...
+        inference_request = pb_utils.InferenceRequest(
+            model_name='model_name',
+            requested_output_names=['REQUESTED_OUTPUT_1', 'REQUESTED_OUTPUT_2'],
+            inputs=[<pb_utils.Tensor object>], trace = request.trace())
+
+```
+
 ## OpenTelemetry trace support
 
-Triton provides an option to generate and export traces
-for standalone and ensemble models
-using [OpenTelemetry APIs and SDKs](https://opentelemetry.io/).
+Triton provides an option to generate and export traces using
+[OpenTelemetry APIs and SDKs](https://opentelemetry.io/).
 
 To specify OpenTelemetry mode for tracing, specify the `--trace-config`
 flag as follows:
@@ -477,16 +504,30 @@ The following table shows available OpenTelemetry trace APIs settings for
       trace data.
     </td>
     </tr>
+    <tr>
+    <td><code>resource</code></td>
+    <td><code>service.name=triton-inference-server</code></td>
+    <td>
+      Key-value pairs to be used as resource attributes. <br/>
+      Should be specified following the provided template:<br/>
+      <code>--trace-config opentelemetry,resource=<<text>key</text>>=<<text>value</text>></code><br/>
+      For example:<br/>
+      <code>--trace-config opentelemetry,resource=service.name=triton</code><br/>
+      <code>--trace-config opentelemetry,resource=service.version=1</code><br/>
+      Alternatively, key-vaue attributes can be specified through <br/>
+      <a href="https://opentelemetry.io/docs/concepts/sdk-configuration/general-sdk-configuration/#otel_resource_attributes">
+      OTEL_RESOURCE_ATTRIBUTES</a>
+      environment variable.
+    </td>
+    </tr>
   </tbody>
 </table>
 
+
 ### Limitations
 
 - OpenTelemetry trace mode is not supported on Windows systems.
 
-- Tracing [BLS](https://github.com/triton-inference-server/python_backend/tree/main#business-logic-scripting)
-models is not supported.
-
 - Triton supports only
 [OTLP/HTTP Exporter](https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/protocol/otlp.md#otlphttp)
 and allows specification of only url for this exporter through

diff --git a/qa/L0_trace/opentelemetry_unittest.py b/qa/L0_trace/opentelemetry_unittest.py
@@ -28,39 +28,75 @@
 
 sys.path.append("../common")
 import json
-import time
+import re
 import unittest
 
 import numpy as np
 import test_util as tu
 import tritonclient.grpc as grpcclient
 import tritonclient.http as httpclient
 
-EXPECTED_NUM_SPANS = 10
+EXPECTED_NUM_SPANS = 16
+# OpenTelemetry OStream exporter sets `parent_span_id` to "0000000000000000",
+# if current span is a root span, i.e. there is no parent span.
+# https://github.com/open-telemetry/opentelemetry-cpp/blob/b7fd057185c4ed2dff507b859cbe058b7609fb4a/exporters/ostream/src/span_exporter.cc#L78C54-L78C68
+NO_PARENT_SPAN = "0000000000000000"
 
 
 class OpenTelemetryTest(tu.TestResultCollector):
     def setUp(self):
-        while True:
-            with open("trace_collector.log", "rt") as f:
-                data = f.read()
-                if data.count("resource_spans") != EXPECTED_NUM_SPANS:
-                    time.sleep(5)
-                    continue
-                else:
-                    break
-
-        data = data.split("\n")
-        full_spans = [
-            entry.split("POST")[0] for entry in data if "resource_spans" in entry
-        ]
-        self.spans = []
-        for span in full_spans:
-            span = json.loads(span)
-            self.spans.append(span["resource_spans"][0]["scope_spans"][0]["spans"][0])
+        # Extracted spans are in json-like format, thus data needs to be
+        # post-processed, so that `json` could accept it for further
+        # processing
+        with open("trace_collector.log", "rt") as f:
+            data = f.read()
+            # Removing new lines and tabs around `{`
+            json_string = re.sub("\n\t{\n\t", "{", data)
+            # `resources` field is a dictionary, so adding `{` and`}`
+            # in the next 2 transformations, `instr-lib` is a next field,
+            # so whatever goes before it, belongs to `resources`.
+            json_string = re.sub(
+                "resources     : \n\t", "resources     : {\n\t", json_string
+            )
+            json_string = re.sub(
+                "\n  instr-lib     :", "}\n  instr-lib     :", json_string
+            )
+            # `json`` expects "key":"value" format, some fields in the
+            # data have empty string as value, so need to add `"",`
+            json_string = re.sub(": \n\t", ':"",', json_string)
+            json_string = re.sub(": \n", ':"",', json_string)
+            # Extracted data missing `,' after each key-value pair,
+            # which `json` exppects
+            json_string = re.sub("\n|\n\t", ",", json_string)
+            # Removing tabs
+            json_string = re.sub("\t", "", json_string)
+            # `json` expects each key and value have `"`'s, so adding them to
+            # every word/number/alpha-numeric entry
+            json_string = re.sub(r"\b([\w.-]+)\b", r'"\1"', json_string)
+            # `span kind`` represents one key
+            json_string = re.sub('"span" "kind"', '"span kind"', json_string)
+            # Removing extra `,`
+            json_string = re.sub("{,", "{", json_string)
+            json_string = re.sub(",}", "}", json_string)
+            # Adding `,` between dictionary entries
+            json_string = re.sub("}{", "},{", json_string)
+            # `events` is a list of dictionaries, `json` will accept it in the
+            # form of "events" : [{....}, {.....}, ...]
+            json_string = re.sub(
+                '"events"        : {', '"events"        : [{', json_string
+            )
+            # Closing `events`' list of dictionaries
+            json_string = re.sub('},  "links"', '}],  "links"', json_string)
+            # Last 2 symbols are not needed
+            json_string = json_string[:-2]
+            # Since now `json_string` is a string, which represents dictionaries,
+            # we  put it into one dictionary, so that `json` could read it as one.
+            json_string = '{ "spans" :[' + json_string + "] }"
+            self.spans = json.loads(json_string)["spans"]
 
         self.simple_model_name = "simple"
         self.ensemble_model_name = "ensemble_add_sub_int32_int32_int32"
+        self.bls_model_name = "bls_simple"
         self.root_span = "InferRequest"
 
     def _check_events(self, span_name, events):
@@ -121,32 +157,38 @@ def _check_parent(self, child_span, parent_span):
         # Check that child and parent span have the same trace_id
         # and child's `parent_span_id` is the same as parent's `span_id`
         self.assertEqual(child_span["trace_id"], parent_span["trace_id"])
-        self.assertIn(
-            "parent_span_id",
-            child_span,
+        self.assertNotEqual(
+            child_span["parent_span_id"],
+            NO_PARENT_SPAN,
             "child span does not have parent span id specified",
         )
-        self.assertEqual(child_span["parent_span_id"], parent_span["span_id"])
+        self.assertEqual(
+            child_span["parent_span_id"],
+            parent_span["span_id"],
+            "child {} , parent {}".format(child_span, parent_span),
+        )
 
     def test_spans(self):
         parsed_spans = []
 
         # Check that collected spans have proper events recorded
         for span in self.spans:
             span_name = span["name"]
-            self._check_events(span_name, json.dumps(span["events"]))
+            self._check_events(span_name, str(span["events"]))
             parsed_spans.append(span_name)
 
-        # There should be 6 spans in total:
-        # 3 for http request, 3 for grpc request, 4 for ensemble
-        self.assertEqual(len(self.spans), 10)
-        # We should have 3 compute spans
-        self.assertEqual(parsed_spans.count("compute"), 3)
-        # 4 request spans (3 named simple - same as our model name, 1 ensemble)
-        self.assertEqual(parsed_spans.count(self.simple_model_name), 3)
-        self.assertEqual(parsed_spans.count(self.ensemble_model_name), 1)
-        # 3 root spans
-        self.assertEqual(parsed_spans.count(self.root_span), 3)
+        # There should be 16 spans in total:
+        # 3 for http request, 3 for grpc request, 4 for ensemble, 6 for bls
+        self.assertEqual(len(self.spans), EXPECTED_NUM_SPANS)
+        # We should have 5 compute spans
+        self.assertEqual(parsed_spans.count("compute"), 5)
+        # 7 request spans
+        # (4 named simple - same as our model name, 2 ensemble, 1 bls)
+        self.assertEqual(parsed_spans.count(self.simple_model_name), 4)
+        self.assertEqual(parsed_spans.count(self.ensemble_model_name), 2)
+        self.assertEqual(parsed_spans.count(self.bls_model_name), 1)
+        # 4 root spans
+        self.assertEqual(parsed_spans.count(self.root_span), 4)
 
     def test_nested_spans(self):
         # First 3 spans in `self.spans` belong to HTTP request
@@ -157,30 +199,33 @@ def test_nested_spans(self):
         for child, parent in zip(self.spans[:3], self.spans[1:3]):
             self._check_parent(child, parent)
 
-        # root_span should not have `parent_span_id` field
-        self.assertNotIn(
-            "parent_span_id", self.spans[2], "root span has a parent_span_id specified"
-        )
-
         # Next 3 spans in `self.spans` belong to GRPC request
         # Order of spans and their relationship described earlier
         for child, parent in zip(self.spans[3:6], self.spans[4:6]):
             self._check_parent(child, parent)
 
-        # root_span should not have `parent_span_id` field
-        self.assertNotIn(
-            "parent_span_id", self.spans[5], "root span has a parent_span_id specified"
-        )
-
-        # Final 4 spans in `self.spans` belong to ensemble request
+        # Next 4 spans in `self.spans` belong to ensemble request
         # Order of spans: compute span - request span - request span - root span
         for child, parent in zip(self.spans[6:10], self.spans[7:10]):
             self._check_parent(child, parent)
 
-        # root_span should not have `parent_span_id` field
-        self.assertNotIn(
-            "parent_span_id", self.spans[9], "root span has a parent_span_id specified"
-        )
+        # Final 6 spans in `self.spans` belong to bls with ensemble request
+        # Order of spans:
+        # compute span - request span (simple) - request span (ensemble)-
+        # - compute (for bls) - request (bls) - root span
+        # request span (ensemble) and compute (for bls) are children of
+        # request (bls)
+        children = self.spans[10:]
+        parents = (self.spans[11:13], self.spans[14], self.spans[14:])
+        for child, parent in zip(children, parents[0]):
+            self._check_parent(child, parent)
+
+    def test_resource_attributes(self):
+        for span in self.spans:
+            self.assertIn("test.key", span["resources"])
+            self.assertEqual("test.value", span["resources"]["test.key"])
+            self.assertIn("service.name", span["resources"])
+            self.assertEqual("test_triton", span["resources"]["service.name"])
 
 
 def prepare_data(client):
@@ -214,6 +259,16 @@ def prepare_traces():
     inputs = prepare_data(httpclient)
     triton_client_http.infer("ensemble_add_sub_int32_int32_int32", inputs)
 
+    send_bls_request(model_name="ensemble_add_sub_int32_int32_int32")
+
+
+def send_bls_request(model_name="simple"):
+    with httpclient.InferenceServerClient("localhost:8000") as client:
+        inputs = prepare_data(httpclient)
+        inputs.append(httpclient.InferInput("MODEL_NAME", [1], "BYTES"))
+        inputs[-1].set_data_from_numpy(np.array([model_name], dtype=np.object_))
+        client.infer("bls_simple", inputs)
+
 
 if __name__ == "__main__":
     unittest.main()