Use PyTorch WAR for unblocking TF1 CPU-only build (triton-inference-s…

…erver#4140) * Fix CPU only build for pytorch backend on SBSA * Use PyTorch WAR for unblocking TF1 CPU-only build
xulizhi321 · Apr 1, 2022 · 4f1043a · 4f1043a
1 parent b30700f
commit 4f1043a
Showing 1 changed file with 25 additions and 20 deletions.
diff --git a/build.py b/build.py
@@ -932,10 +932,11 @@ def create_dockerfile_linux(ddir, dockerfile_name, argmap, backends, repoagents,
 '''.format(argmap['TRITON_VERSION'], argmap['TRITON_CONTAINER_VERSION'],
            argmap['BASE_IMAGE'])
 
-    # PyTorch backend needs extra CUDA and other dependencies during runtime
-    # that are missing in the CPU only base container. These dependencies
-    # must be copied from the Triton Min image
-    if not FLAGS.enable_gpu and ('pytorch' in backends):
+    # PyTorch and TensorFlow1 backend need extra CUDA and other dependencies
+    # during runtime that are missing in the CPU only base container. These
+    # dependencies must be copied from the Triton Min image
+    if not FLAGS.enable_gpu and \
+        (('pytorch' in backends) or ('tensorflow1' in backends)):
         df += '''
 ############################################################################
 ##  Triton Min image
@@ -1080,11 +1081,14 @@ def dockerfile_prepare_container_linux(argmap, backends, enable_gpu,
  && rm -f ${_CUDA_COMPAT_PATH}/lib
 '''
 
-    elif 'pytorch' in backends:
-        # Add dependencies for pytorch backend. Note: Even though the build is
-        # cpu-only, the version of pytorch we are using depends upon libraries
-        # like cuda and cudnn. Since these dependencies are not present in ubuntu
-        # base image, we must copy these from the Triton min container ourselves.
+    elif ('pytorch' in backends) or ('tensorflow1' in backends):
+        cuda_arch = 'sbsa' if target_machine == 'aarch64' else 'x86_64'
+        libs_arch = 'aarch64' if target_machine == 'aarch64' else 'x86_64'
+        # Add extra dependencies for tensorflow1/pytorch backend.
+        # Note: Even though the build is cpu-only, the version of tensorflow1/
+        # pytorch we are using depend upon libraries like cuda and cudnn. Since
+        # these dependencies are not present in the ubuntu base image,
+        # we must copy these from the Triton min container ourselves.
         df += '''
 RUN mkdir -p /usr/local/cuda/lib64/stubs
 COPY --from=min_container /usr/local/cuda/lib64/stubs/libcusparse.so /usr/local/cuda/lib64/stubs/libcusparse.so.11
@@ -1094,19 +1098,19 @@ def dockerfile_prepare_container_linux(argmap, backends, enable_gpu,
 COPY --from=min_container /usr/local/cuda/lib64/stubs/libcublas.so /usr/local/cuda/lib64/stubs/libcublas.so.11
 COPY --from=min_container /usr/local/cuda/lib64/stubs/libcublasLt.so /usr/local/cuda/lib64/stubs/libcublasLt.so.11
 
-RUN mkdir -p /usr/local/cuda/targets/x86_64-linux/lib
-COPY --from=min_container /usr/local/cuda-11.6/targets/x86_64-linux/lib/libcudart.so.11.0 /usr/local/cuda/targets/x86_64-linux/lib/.
-COPY --from=min_container /usr/local/cuda-11.6/targets/x86_64-linux/lib/libcupti.so.11.6 /usr/local/cuda/targets/x86_64-linux/lib/.
-COPY --from=min_container /usr/local/cuda-11.6/targets/x86_64-linux/lib/libnvToolsExt.so.1 /usr/local/cuda/targets/x86_64-linux/lib/.
+RUN mkdir -p /usr/local/cuda/targets/{cuda_arch}-linux/lib
+COPY --from=min_container /usr/local/cuda-11.6/targets/{cuda_arch}-linux/lib/libcudart.so.11.0 /usr/local/cuda/targets/{cuda_arch}-linux/lib/.
+COPY --from=min_container /usr/local/cuda-11.6/targets/{cuda_arch}-linux/lib/libcupti.so.11.6 /usr/local/cuda/targets/{cuda_arch}-linux/lib/.
+COPY --from=min_container /usr/local/cuda-11.6/targets/{cuda_arch}-linux/lib/libnvToolsExt.so.1 /usr/local/cuda/targets/{cuda_arch}-linux/lib/.
 
-COPY --from=min_container /usr/lib/x86_64-linux-gnu/libnccl.so.2 /usr/lib/x86_64-linux-gnu/libnccl.so.2
-COPY --from=min_container /usr/lib/x86_64-linux-gnu/libcudnn.so.8 /usr/lib/x86_64-linux-gnu/libcudnn.so.8
+COPY --from=min_container /usr/lib/{libs_arch}-linux-gnu/libnccl.so.2 /usr/lib/{libs_arch}-linux-gnu/libnccl.so.2
+COPY --from=min_container /usr/lib/{libs_arch}-linux-gnu/libcudnn.so.8 /usr/lib/{libs_arch}-linux-gnu/libcudnn.so.8
 
 RUN apt-get update && \
         apt-get install -y --no-install-recommends openmpi-bin
 
-ENV LD_LIBRARY_PATH /usr/local/cuda/targets/x86_64-linux/lib:/usr/local/cuda/lib64/stubs:${LD_LIBRARY_PATH}
-'''
+ENV LD_LIBRARY_PATH /usr/local/cuda/targets/{cuda_arch}-linux/lib:/usr/local/cuda/lib64/stubs:${{LD_LIBRARY_PATH}}
+'''.format(cuda_arch=cuda_arch, libs_arch=libs_arch)
 
     # Add dependencies needed for python backend
     if 'python' in backends:
@@ -1255,9 +1259,10 @@ def container_build(images, backends, repoagents, endpoints):
     }
 
     # For cpu-only image we need to copy some cuda libraries and dependencies
-    # since we are using a PyTorch container that is not CPU-only
-    if not FLAGS.enable_gpu and ('pytorch' in backends) and \
-            (target_platform() != 'windows'):
+    # since we are using a PyTorch/TensorFlow1 container that is not CPU-only
+    if not FLAGS.enable_gpu and \
+        (('pytorch' in backends) or ('tensorflow1' in backends)) \
+        and (target_platform() != 'windows'):
         dockerfileargmap[
             'GPU_BASE_IMAGE'] = 'nvcr.io/nvidia/tritonserver:{}-py3-min'.format(
                 FLAGS.upstream_container_version)