Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for TensorRT v10 (multiple api calls have changed) #11166

Merged
merged 9 commits into from
May 22, 2024
48 changes: 37 additions & 11 deletions frigate/detectors/plugins/tensorrt.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
import tensorrt as trt
from cuda import cuda
remz1337 marked this conversation as resolved.
Show resolved Hide resolved

TRT_VERSION = int(trt.__version__[0 : trt.__version__.find(".")])

TRT_SUPPORT = True
except ModuleNotFoundError:
TRT_SUPPORT = False
Expand Down Expand Up @@ -88,20 +90,46 @@ def _load_engine(self, model_path):
with open(model_path, "rb") as f, trt.Runtime(self.trt_logger) as runtime:
return runtime.deserialize_cuda_engine(f.read())

def _binding_is_input(self, binding):
if TRT_VERSION < 10:
return self.engine.binding_is_input(binding)
else:
return binding == "input"

def _get_binding_dims(self, binding):
if TRT_VERSION < 10:
return self.engine.get_binding_shape(binding)
else:
return self.engine.get_tensor_shape(binding)

def _get_binding_dtype(self, binding):
if TRT_VERSION < 10:
return self.engine.get_binding_dtype(binding)
else:
return self.engine.get_tensor_dtype(binding)

def _execute(self):
if TRT_VERSION < 10:
return self.context.execute_async_v2(
bindings=self.bindings, stream_handle=self.stream
)
else:
return self.context.execute_v2(self.bindings)

def _get_input_shape(self):
"""Get input shape of the TensorRT YOLO engine."""
binding = self.engine[0]
assert self.engine.binding_is_input(binding)
binding_dims = self.engine.get_binding_shape(binding)
assert self._binding_is_input(binding)
binding_dims = self._get_binding_dims(binding)
if len(binding_dims) == 4:
return (
tuple(binding_dims[2:]),
trt.nptype(self.engine.get_binding_dtype(binding)),
trt.nptype(self._get_binding_dtype(binding)),
)
elif len(binding_dims) == 3:
return (
tuple(binding_dims[1:]),
trt.nptype(self.engine.get_binding_dtype(binding)),
trt.nptype(self._get_binding_dtype(binding)),
)
else:
raise ValueError(
Expand All @@ -115,7 +143,7 @@ def _allocate_buffers(self):
bindings = []
output_idx = 0
for binding in self.engine:
binding_dims = self.engine.get_binding_shape(binding)
binding_dims = self._get_binding_dims(binding)
if len(binding_dims) == 4:
# explicit batch case (TensorRT 7+)
size = trt.volume(binding_dims)
Expand All @@ -126,21 +154,21 @@ def _allocate_buffers(self):
raise ValueError(
"bad dims of binding %s: %s" % (binding, str(binding_dims))
)
nbytes = size * self.engine.get_binding_dtype(binding).itemsize
nbytes = size * self._get_binding_dtype(binding).itemsize
# Allocate host and device buffers
err, host_mem = cuda.cuMemHostAlloc(
nbytes, Flags=cuda.CU_MEMHOSTALLOC_DEVICEMAP
)
assert err is cuda.CUresult.CUDA_SUCCESS, f"cuMemAllocHost returned {err}"
logger.debug(
f"Allocated Tensor Binding {binding} Memory {nbytes} Bytes ({size} * {self.engine.get_binding_dtype(binding)})"
f"Allocated Tensor Binding {binding} Memory {nbytes} Bytes ({size} * {self._get_binding_dtype(binding)})"
)
err, device_mem = cuda.cuMemAlloc(nbytes)
assert err is cuda.CUresult.CUDA_SUCCESS, f"cuMemAlloc returned {err}"
# Append the device buffer to device bindings.
bindings.append(int(device_mem))
# Append to the appropriate list.
if self.engine.binding_is_input(binding):
if self._binding_is_input(binding):
logger.debug(f"Input has Shape {binding_dims}")
inputs.append(HostDeviceMem(host_mem, device_mem, nbytes, size))
else:
Expand Down Expand Up @@ -170,9 +198,7 @@ def _do_inference(self):
]

# Run inference.
if not self.context.execute_async_v2(
bindings=self.bindings, stream_handle=self.stream
):
if not self._execute():
logger.warn("Execute returned false")

# Transfer predictions back from the GPU.
Expand Down
Loading