triton-inference-server · Tabrizian · Apr 24, 2023 · Apr 19, 2023 · Apr 21, 2023 · Apr 24, 2023
diff --git a/src/python/library/tritonclient/grpc/__init__.py b/src/python/library/tritonclient/grpc/__init__.py
diff --git a/src/python/library/tritonclient/grpc/_client.py b/src/python/library/tritonclient/grpc/_client.py
diff --git a/src/python/library/tritonclient/grpc/_infer_input.py b/src/python/library/tritonclient/grpc/_infer_input.py
@@ -0,0 +1,197 @@
+# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+from tritonclient.grpc import service_pb2
+from ._utils import raise_error
+from tritonclient.utils import *
+import numpy as np
+
+
+class InferInput:
+    """An object of InferInput class is used to describe
+    input tensor for an inference request.
+
+    Parameters
+    ----------
+    name : str
+        The name of input whose data will be described by this object
+    shape : list
+        The shape of the associated input.
+    datatype : str
+        The datatype of the associated input.
+
+    """
+
+    def __init__(self, name, shape, datatype):
+        self._input = service_pb2.ModelInferRequest().InferInputTensor()
+        self._input.name = name
+        self._input.ClearField('shape')
+        self._input.shape.extend(shape)
+        self._input.datatype = datatype
+        self._raw_content = None
+
+    def name(self):
+        """Get the name of input associated with this object.
+
+        Returns
+        -------
+        str
+            The name of input
+        """
+        return self._input.name
+
+    def datatype(self):
+        """Get the datatype of input associated with this object.
+
+        Returns
+        -------
+        str
+            The datatype of input
+        """
+        return self._input.datatype
+
+    def shape(self):
+        """Get the shape of input associated with this object.
+
+        Returns
+        -------
+        list
+            The shape of input
+        """
+        return self._input.shape
+
+    def set_shape(self, shape):
+        """Set the shape of input.
+
+        Parameters
+        ----------
+        shape : list
+            The shape of the associated input.
+        """
+        self._input.ClearField('shape')
+        self._input.shape.extend(shape)
+
+    def set_data_from_numpy(self, input_tensor):
+        """Set the tensor data from the specified numpy array for
+        input associated with this object.
+
+        Parameters
+        ----------
+        input_tensor : numpy array
+            The tensor data in numpy array format
+
+        Raises
+        ------
+        InferenceServerException
+            If failed to set data for the tensor.
+        """
+        if not isinstance(input_tensor, (np.ndarray,)):
+            raise_error("input_tensor must be a numpy array")
+        # DLIS-3986: Special handling for bfloat16 until Numpy officially supports it
+        if self._input.datatype == "BF16":
+            if input_tensor.dtype != triton_to_np_dtype(self._input.datatype):
+                raise_error(
+                    "got unexpected datatype {} from numpy array, expected {} for BF16 type"
+                    .format(input_tensor.dtype,
+                            triton_to_np_dtype(self._input.datatype)))
+        else:
+            dtype = np_to_triton_dtype(input_tensor.dtype)
+            if self._input.datatype != dtype:
+                raise_error(
+                    "got unexpected datatype {} from numpy array, expected {}".
+                    format(dtype, self._input.datatype))
+        valid_shape = True
+        if len(self._input.shape) != len(input_tensor.shape):
+            valid_shape = False
+        for i in range(len(self._input.shape)):
+            if self._input.shape[i] != input_tensor.shape[i]:
+                valid_shape = False
+        if not valid_shape:
+            raise_error(
+                "got unexpected numpy array shape [{}], expected [{}]".format(
+                    str(input_tensor.shape)[1:-1],
+                    str(self._input.shape)[1:-1]))
+
+        self._input.parameters.pop('shared_memory_region', None)
+        self._input.parameters.pop('shared_memory_byte_size', None)
+        self._input.parameters.pop('shared_memory_offset', None)
+
+        if self._input.datatype == "BYTES":
+            serialized_output = serialize_byte_tensor(input_tensor)
+            if serialized_output.size > 0:
+                self._raw_content = serialized_output.item()
+            else:
+                self._raw_content = b''
+        elif self._input.datatype == "BF16":
+            serialized_output = serialize_bf16_tensor(input_tensor)
+            if serialized_output.size > 0:
+                self._raw_content = serialized_output.item()
+            else:
+                self._raw_content = b''
+        else:
+            self._raw_content = input_tensor.tobytes()
+
+    def set_shared_memory(self, region_name, byte_size, offset=0):
+        """Set the tensor data from the specified shared memory region.
+
+        Parameters
+        ----------
+        region_name : str
+            The name of the shared memory region holding tensor data.
+        byte_size : int
+            The size of the shared memory region holding tensor data.
+        offset : int
+            The offset, in bytes, into the region where the data for
+            the tensor starts. The default value is 0.
+
+        """
+        self._input.ClearField("contents")
+        self._raw_content = None
+
+        self._input.parameters[
+            'shared_memory_region'].string_param = region_name
+        self._input.parameters[
+            'shared_memory_byte_size'].int64_param = byte_size
+        if offset != 0:
+            self._input.parameters['shared_memory_offset'].int64_param = offset
+
+    def _get_tensor(self):
+        """Retrieve the underlying InferInputTensor message.
+        Returns
+        -------
+        protobuf message
+            The underlying InferInputTensor protobuf message.
+        """
+        return self._input
+
+    def _get_content(self):
+        """Retrieve the contents for this tensor in raw bytes.
+        Returns
+        -------
+        bytes
+            The associated contents for this tensor in raw bytes.
+        """
+        return self._raw_content
diff --git a/src/python/library/tritonclient/grpc/_infer_result.py b/src/python/library/tritonclient/grpc/_infer_result.py
@@ -0,0 +1,153 @@
+# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import rapidjson as json
+from tritonclient.utils import *
+from google.protobuf.json_format import MessageToJson
+
+
+class InferResult:
+    """An object of InferResult class holds the response of
+    an inference request and provide methods to retrieve
+    inference results.
+
+    Parameters
+    ----------
+    result : protobuf message
+        The ModelInferResponse returned by the server
+    """
+
+    def __init__(self, result):
+        self._result = result
+
+    def as_numpy(self, name):
+        """Get the tensor data for output associated with this object
+        in numpy format
+
+        Parameters
+        ----------
+        name : str
+            The name of the output tensor whose result is to be retrieved.
+
+        Returns
+        -------
+        numpy array
+            The numpy array containing the response data for the tensor or
+            None if the data for specified tensor name is not found.
+        """
+        index = 0
+        for output in self._result.outputs:
+            if output.name == name:
+                shape = []
+                for value in output.shape:
+                    shape.append(value)
+
+                datatype = output.datatype
+                if index < len(self._result.raw_output_contents):
+                    if datatype == 'BYTES':
+                        # String results contain a 4-byte string length
+                        # followed by the actual string characters. Hence,
+                        # need to decode the raw bytes to convert into
+                        # array elements.
+                        np_array = deserialize_bytes_tensor(
+                            self._result.raw_output_contents[index])
+                    elif datatype == "BF16":
+                        np_array = deserialize_bf16_tensor(
+                            self._result.raw_output_contents[index])
+                    else:
+                        np_array = np.frombuffer(
+                            self._result.raw_output_contents[index],
+                            dtype=triton_to_np_dtype(datatype))
+                elif len(output.contents.bytes_contents) != 0:
+                    np_array = np.array(output.contents.bytes_contents,
+                                        copy=False)
+                else:
+                    np_array = np.empty(0)
+                np_array = np_array.reshape(shape)
+                return np_array
+            else:
+                index += 1
+        return None
+
+    def get_output(self, name, as_json=False):
+        """Retrieves the InferOutputTensor corresponding to the
+        named ouput.
+
+        Parameters
+        ----------
+        name : str
+            The name of the tensor for which Output is to be
+            retrieved.
+        as_json : bool
+            If True then returns response as a json dict, otherwise
+            as a protobuf message. Default value is False.
+            The returned json is generated from the protobuf message
+            using MessageToJson and as a result int64 values are
+            represented as string. It is the caller's responsibility
+            to convert these strings back to int64 values as
+            necessary.
+
+        Returns
+        -------
+        protobuf message or dict
+            If a InferOutputTensor with specified name is present in
+            ModelInferResponse then returns it as a protobuf messsage
+            or dict, otherwise returns None.
+        """
+        for output in self._result.outputs:
+            if output.name == name:
+                if as_json:
+                    MessageToJson(output, preserving_proto_field_name=True)
+                else:
+                    return output
+
+        return None
+
+    def get_response(self, as_json=False):
+        """Retrieves the complete ModelInferResponse as a
+        json dict object or protobuf message
+
+        Parameters
+        ----------
+        as_json : bool
+            If True then returns response as a json dict, otherwise
+            as a protobuf message. Default value is False.
+            The returned json is generated from the protobuf message
+            using MessageToJson and as a result int64 values are
+            represented as string. It is the caller's responsibility
+            to convert these strings back to int64 values as
+            necessary.
+
+        Returns
+        -------
+        protobuf message or dict
+            The underlying ModelInferResponse as a protobuf message or dict.
+        """
+        if as_json:
+            return json.loads(
+                MessageToJson(self._result, preserving_proto_field_name=True))
+        else:
+            return self._result