Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Restructure the GRPC client code #292

Merged
merged 3 commits into from
Apr 24, 2023
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2,261 changes: 10 additions & 2,251 deletions src/python/library/tritonclient/grpc/__init__.py

Large diffs are not rendered by default.

1,684 changes: 1,684 additions & 0 deletions src/python/library/tritonclient/grpc/_client.py

Large diffs are not rendered by default.

197 changes: 197 additions & 0 deletions src/python/library/tritonclient/grpc/_infer_input.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,197 @@
# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of NVIDIA CORPORATION nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

from tritonclient.grpc import service_pb2
from ._utils import raise_error
from tritonclient.utils import *
import numpy as np


class InferInput:
"""An object of InferInput class is used to describe
input tensor for an inference request.

Parameters
----------
name : str
The name of input whose data will be described by this object
shape : list
The shape of the associated input.
datatype : str
The datatype of the associated input.

"""

def __init__(self, name, shape, datatype):
self._input = service_pb2.ModelInferRequest().InferInputTensor()
self._input.name = name
self._input.ClearField('shape')
self._input.shape.extend(shape)
self._input.datatype = datatype
self._raw_content = None

def name(self):
"""Get the name of input associated with this object.

Returns
-------
str
The name of input
"""
return self._input.name

def datatype(self):
"""Get the datatype of input associated with this object.

Returns
-------
str
The datatype of input
"""
return self._input.datatype

def shape(self):
"""Get the shape of input associated with this object.

Returns
-------
list
The shape of input
"""
return self._input.shape

def set_shape(self, shape):
"""Set the shape of input.

Parameters
----------
shape : list
The shape of the associated input.
"""
self._input.ClearField('shape')
self._input.shape.extend(shape)

def set_data_from_numpy(self, input_tensor):
"""Set the tensor data from the specified numpy array for
input associated with this object.

Parameters
----------
input_tensor : numpy array
The tensor data in numpy array format

Raises
------
InferenceServerException
If failed to set data for the tensor.
"""
if not isinstance(input_tensor, (np.ndarray,)):
raise_error("input_tensor must be a numpy array")
# DLIS-3986: Special handling for bfloat16 until Numpy officially supports it
if self._input.datatype == "BF16":
if input_tensor.dtype != triton_to_np_dtype(self._input.datatype):
raise_error(
"got unexpected datatype {} from numpy array, expected {} for BF16 type"
.format(input_tensor.dtype,
triton_to_np_dtype(self._input.datatype)))
else:
dtype = np_to_triton_dtype(input_tensor.dtype)
if self._input.datatype != dtype:
raise_error(
"got unexpected datatype {} from numpy array, expected {}".
format(dtype, self._input.datatype))
valid_shape = True
if len(self._input.shape) != len(input_tensor.shape):
valid_shape = False
for i in range(len(self._input.shape)):
if self._input.shape[i] != input_tensor.shape[i]:
valid_shape = False
if not valid_shape:
raise_error(
"got unexpected numpy array shape [{}], expected [{}]".format(
str(input_tensor.shape)[1:-1],
str(self._input.shape)[1:-1]))

self._input.parameters.pop('shared_memory_region', None)
self._input.parameters.pop('shared_memory_byte_size', None)
self._input.parameters.pop('shared_memory_offset', None)

if self._input.datatype == "BYTES":
serialized_output = serialize_byte_tensor(input_tensor)
if serialized_output.size > 0:
self._raw_content = serialized_output.item()
else:
self._raw_content = b''
elif self._input.datatype == "BF16":
serialized_output = serialize_bf16_tensor(input_tensor)
if serialized_output.size > 0:
self._raw_content = serialized_output.item()
else:
self._raw_content = b''
else:
self._raw_content = input_tensor.tobytes()

def set_shared_memory(self, region_name, byte_size, offset=0):
"""Set the tensor data from the specified shared memory region.

Parameters
----------
region_name : str
The name of the shared memory region holding tensor data.
byte_size : int
The size of the shared memory region holding tensor data.
offset : int
The offset, in bytes, into the region where the data for
the tensor starts. The default value is 0.

"""
self._input.ClearField("contents")
self._raw_content = None

self._input.parameters[
'shared_memory_region'].string_param = region_name
self._input.parameters[
'shared_memory_byte_size'].int64_param = byte_size
if offset != 0:
self._input.parameters['shared_memory_offset'].int64_param = offset

def _get_tensor(self):
"""Retrieve the underlying InferInputTensor message.
Returns
-------
protobuf message
The underlying InferInputTensor protobuf message.
"""
return self._input

def _get_content(self):
"""Retrieve the contents for this tensor in raw bytes.
Returns
-------
bytes
The associated contents for this tensor in raw bytes.
"""
return self._raw_content
153 changes: 153 additions & 0 deletions src/python/library/tritonclient/grpc/_infer_result.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of NVIDIA CORPORATION nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import rapidjson as json
from tritonclient.utils import *
from google.protobuf.json_format import MessageToJson


class InferResult:
"""An object of InferResult class holds the response of
an inference request and provide methods to retrieve
inference results.

Parameters
----------
result : protobuf message
The ModelInferResponse returned by the server
"""

def __init__(self, result):
self._result = result

def as_numpy(self, name):
"""Get the tensor data for output associated with this object
in numpy format

Parameters
----------
name : str
The name of the output tensor whose result is to be retrieved.

Returns
-------
numpy array
The numpy array containing the response data for the tensor or
None if the data for specified tensor name is not found.
"""
index = 0
for output in self._result.outputs:
if output.name == name:
shape = []
for value in output.shape:
shape.append(value)

datatype = output.datatype
if index < len(self._result.raw_output_contents):
if datatype == 'BYTES':
# String results contain a 4-byte string length
# followed by the actual string characters. Hence,
# need to decode the raw bytes to convert into
# array elements.
np_array = deserialize_bytes_tensor(
self._result.raw_output_contents[index])
elif datatype == "BF16":
np_array = deserialize_bf16_tensor(
self._result.raw_output_contents[index])
else:
np_array = np.frombuffer(
self._result.raw_output_contents[index],
dtype=triton_to_np_dtype(datatype))
elif len(output.contents.bytes_contents) != 0:
np_array = np.array(output.contents.bytes_contents,
copy=False)
else:
np_array = np.empty(0)
np_array = np_array.reshape(shape)
return np_array
else:
index += 1
return None

def get_output(self, name, as_json=False):
"""Retrieves the InferOutputTensor corresponding to the
named ouput.

Parameters
----------
name : str
The name of the tensor for which Output is to be
retrieved.
as_json : bool
If True then returns response as a json dict, otherwise
as a protobuf message. Default value is False.
The returned json is generated from the protobuf message
using MessageToJson and as a result int64 values are
represented as string. It is the caller's responsibility
to convert these strings back to int64 values as
necessary.

Returns
-------
protobuf message or dict
If a InferOutputTensor with specified name is present in
ModelInferResponse then returns it as a protobuf messsage
or dict, otherwise returns None.
"""
for output in self._result.outputs:
if output.name == name:
if as_json:
MessageToJson(output, preserving_proto_field_name=True)
else:
return output

return None

def get_response(self, as_json=False):
"""Retrieves the complete ModelInferResponse as a
json dict object or protobuf message

Parameters
----------
as_json : bool
If True then returns response as a json dict, otherwise
as a protobuf message. Default value is False.
The returned json is generated from the protobuf message
using MessageToJson and as a result int64 values are
represented as string. It is the caller's responsibility
to convert these strings back to int64 values as
necessary.

Returns
-------
protobuf message or dict
The underlying ModelInferResponse as a protobuf message or dict.
"""
if as_json:
return json.loads(
MessageToJson(self._result, preserving_proto_field_name=True))
else:
return self._result
Loading