triton-inference-server · CoderHam · Mar 25, 2022 · Mar 24, 2022 · Mar 24, 2022 · Mar 25, 2022
diff --git a/build.py b/build.py
@@ -1037,10 +1037,14 @@ def dockerfile_prepare_container_linux(argmap, backends, enable_gpu,
 ENV LD_LIBRARY_PATH /opt/tritonserver/backends/onnxruntime:${LD_LIBRARY_PATH}
 '''
 
-    ort_dependencies = "libgomp1" if 'onnxruntime' in backends else ""
-    pytorch_dependencies = ""
+    backend_dependencies = ""
+    # libgomp1 is needed by both onnxruntime and pytorch backends
+    if ('onnxruntime' in backends) or ('pytorch' in backends):
+        backend_dependencies = "libgomp1"
+
+    # libgfortran5 is needed by pytorch backend on ARM
     if ('pytorch' in backends) and (target_machine == 'aarch64'):
-        pytorch_dependencies = "libgfortran5"
+        backend_dependencies += " libgfortran5"
 
     df += '''
 ENV TF_ADJUST_HUE_FUSED         1
@@ -1075,11 +1079,9 @@ def dockerfile_prepare_container_linux(argmap, backends, enable_gpu,
             dirmngr \
             libnuma-dev \
             curl \
-            {ort_dependencies} {pytorch_dependencies} && \
+            {backend_dependencies} && \
     rm -rf /var/lib/apt/lists/*
-'''.format(gpu_enabled=gpu_enabled,
-           ort_dependencies=ort_dependencies,
-           pytorch_dependencies=pytorch_dependencies)
+'''.format(gpu_enabled=gpu_enabled, backend_dependencies=backend_dependencies)
 
     if enable_gpu:
         df += install_dcgm_libraries(argmap['DCGM_VERSION'], target_machine)
@@ -1094,7 +1096,7 @@ def dockerfile_prepare_container_linux(argmap, backends, enable_gpu,
     elif 'pytorch' in backends:
         # Add dependencies for pytorch backend. Note: Even though the build is
         # cpu-only, the version of pytorch we are using depends upon libraries
-        # like cuda and cudnn. Since these dependencies are not present in ubuntu 
+        # like cuda and cudnn. Since these dependencies are not present in ubuntu
         # base image, we must copy these from the Triton min container ourselves.
         df += '''
 RUN mkdir -p /usr/local/cuda/lib64/stubs

diff --git a/compose.py b/compose.py
@@ -1,5 +1,5 @@
 #!/usr/bin/env python3
-# Copyright 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions
@@ -66,10 +66,22 @@ def start_dockerfile(ddir, images, argmap, dockerfile_name, backends):
 ARG TRITON_VERSION={}
 ARG TRITON_CONTAINER_VERSION={}
 
-FROM {} as full
-FROM {}
+FROM {} AS full
 '''.format(argmap['TRITON_VERSION'], argmap['TRITON_CONTAINER_VERSION'],
-           images["full"], images["min"])
+           images["full"])
+
+    # PyTorch backend needs extra CUDA and other dependencies during runtime
+    # that are missing in the CPU only base container. These dependencies
+    # must be copied from the Triton Min image
+    if not FLAGS.enable_gpu and ('pytorch' in backends):
+        df += '''
+FROM {} AS min_container
+
+'''.format(images["gpu_min"])
+
+    df += '''
+FROM {}
+'''.format(images["min"])
 
     import build
     df += build.dockerfile_prepare_container_linux(argmap, backends,
@@ -374,7 +386,7 @@ def create_argmap(images):
             images[parts[0]] = parts[1]
     else:
         get_container_version_if_not_specified()
-        if (FLAGS.enable_gpu):
+        if FLAGS.enable_gpu:
             images = {
                 "full":
                     "nvcr.io/nvidia/tritonserver:{}-py3".format(
@@ -395,6 +407,12 @@ def create_argmap(images):
         len(images) != 2,
         "Need to both specify 'full' and 'min' images if at all")
 
+    # For cpu-only image we need to copy some cuda libraries and dependencies
+    # since we are using a PyTorch container that is not CPU-only
+    if 'pytorch' in FLAGS.backend:
+        images["gpu_min"] = "nvcr.io/nvidia/tritonserver:{}-py3-min".format(
+            FLAGS.container_version)
+
     argmap = create_argmap(images)
 
     start_dockerfile(FLAGS.work_dir, images, argmap, dockerfile_name,