Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Custom container composition script #3039

Merged
merged 48 commits into from
Jul 23, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
48 commits
Select commit Hold shift + click to select a range
41a2ee1
initial commit, able to specify backends
jbkyang-nvi Jun 16, 2021
5cf56a6
testing
jbkyang-nvi Jun 22, 2021
1f58666
testing
jbkyang-nvi Jun 22, 2021
5790f4f
another test
jbkyang-nvi Jun 23, 2021
d76bd6d
working version of unit test. Need to fix reliance on identity backend
jbkyang-nvi Jun 23, 2021
bdbce38
changed version for testing
jbkyang-nvi Jun 23, 2021
12c24f7
fixing some tests
jbkyang-nvi Jun 24, 2021
c1cdc3d
addressed comments
jbkyang-nvi Jun 24, 2021
e1461b4
fixing more things
jbkyang-nvi Jun 24, 2021
fb9fa2f
return infer test to original status
jbkyang-nvi Jun 24, 2021
21cd7b4
remove extra space
jbkyang-nvi Jun 24, 2021
4a4d49a
addressed comments
jbkyang-nvi Jun 25, 2021
ad0a15b
removed global variables for backend and repoagent
jbkyang-nvi Jun 25, 2021
a4734c9
addessed comments
jbkyang-nvi Jun 29, 2021
d04a225
addressed comments
jbkyang-nvi Jun 29, 2021
970264e
changed docs
jbkyang-nvi Jun 29, 2021
151924c
addressed comments
jbkyang-nvi Jun 30, 2021
f25aaf5
fix search pattern
jbkyang-nvi Jun 30, 2021
4a291ed
added dry run and updated docs
jbkyang-nvi Jun 30, 2021
e2b16d3
addressed comments
jbkyang-nvi Jun 30, 2021
d42f956
addressed comments
jbkyang-nvi Jul 1, 2021
080faea
addressed documentation comments
jbkyang-nvi Jul 2, 2021
b34a725
fix build.py
jbkyang-nvi Jul 8, 2021
048bfd1
add workdir before copy
jbkyang-nvi Jul 8, 2021
dad9be5
adjusted order so entrypoint is inserted correctly
jbkyang-nvi Jul 9, 2021
5382383
setting perms correctly
jbkyang-nvi Jul 9, 2021
60fbfb5
fix user permission issue
jbkyang-nvi Jul 9, 2021
2ac9e3d
addressed comments and removed extra instance of set path
jbkyang-nvi Jul 9, 2021
d2639f1
fix compose script
jbkyang-nvi Jul 9, 2021
f3cb329
fix issue with docker inspect
jbkyang-nvi Jul 11, 2021
a06784a
remove extra definition of
jbkyang-nvi Jul 12, 2021
3165e91
adding comments and error messages to find discrepency between local …
jbkyang-nvi Jul 12, 2021
5508fd9
added error messages
jbkyang-nvi Jul 12, 2021
dbea483
remove extra env in dockerfile
jbkyang-nvi Jul 12, 2021
da3b716
adjusted build so more similar to old build.py
jbkyang-nvi Jul 13, 2021
344fb50
removed log message
jbkyang-nvi Jul 14, 2021
adf7e9c
add images flag so users can specify the image
jbkyang-nvi Jul 16, 2021
32da9db
remove wrong variable name
jbkyang-nvi Jul 16, 2021
3144fba
addressed comments
jbkyang-nvi Jul 20, 2021
279f9d4
addressed comments
jbkyang-nvi Jul 20, 2021
8d72b6d
changed function name and add comments
jbkyang-nvi Jul 20, 2021
e1eced4
rebased and reformatted
jbkyang-nvi Jul 21, 2021
46b3be4
fix build.py problem
jbkyang-nvi Jul 21, 2021
12b78b9
fix again
jbkyang-nvi Jul 22, 2021
da3c92e
add default DCGM for older container versions
jbkyang-nvi Jul 22, 2021
f3a25d2
change default version to 2.2.8
jbkyang-nvi Jul 22, 2021
b0748af
updated documentation
jbkyang-nvi Jul 22, 2021
ad8b523
fixed documentation
jbkyang-nvi Jul 22, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
187 changes: 102 additions & 85 deletions build.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,13 +65,13 @@
# incorrectly load the other version of the openvino libraries.
#
TRITON_VERSION_MAP = {
'2.13.0dev':
('21.08dev', # triton container
'21.06', # upstream container
'1.8.0', # ORT
'2021.2.200', # ORT OpenVINO
'2021.2', # Standalone OpenVINO
'2.2.8') # DCGM version
'2.13.0dev': (
'21.08dev', # triton container
'21.06', # upstream container
'1.8.0', # ORT
'2021.2.200', # ORT OpenVINO
'2021.2', # Standalone OpenVINO
'2.2.8') # DCGM version
}

EXAMPLE_BACKENDS = ['identity', 'square', 'repeat']
Expand Down Expand Up @@ -474,13 +474,16 @@ def dali_cmake_args():
]


def install_dcgm_libraries():
dcgm_version = ''
if FLAGS.version not in TRITON_VERSION_MAP:
fail('unable to determine default repo-tag, DCGM version not known for {}'.format(FLAGS.version))
def install_dcgm_libraries(dcgm_version):
if dcgm_version == '':
fail(
'unable to determine default repo-tag, DCGM version not known for {}'
.format(FLAGS.version))
return ''
else:
dcgm_version = TRITON_VERSION_MAP[FLAGS.version][5]
return '''

return '''
ENV DCGM_VERSION {}
# Install DCGM. Steps from https://developer.nvidia.com/dcgm#Downloads
RUN apt-get update && apt-get install -y --no-install-recommends software-properties-common
RUN wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/cuda-ubuntu2004.pin \
Expand All @@ -489,7 +492,7 @@ def install_dcgm_libraries():
&& add-apt-repository "deb https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/ /"
RUN apt-get update \
&& apt-get install -y datacenter-gpu-manager=1:{}
'''.format(dcgm_version)
'''.format(dcgm_version, dcgm_version)


def fil_cmake_args(images):
Expand All @@ -503,7 +506,20 @@ def fil_cmake_args(images):
return cargs


def create_dockerfile_buildbase(ddir, dockerfile_name, argmap, backends):
def get_container_versions(version, container_version,
upstream_container_version):
if container_version is None:
if version not in TRITON_VERSION_MAP:
fail('container version not known for {}'.format(version))
container_version = TRITON_VERSION_MAP[version][0]
if upstream_container_version is None:
if version not in TRITON_VERSION_MAP:
fail('upstream container version not known for {}'.format(version))
upstream_container_version = TRITON_VERSION_MAP[version][1]
return container_version, upstream_container_version


def create_dockerfile_buildbase(ddir, dockerfile_name, argmap):
df = '''
ARG TRITON_VERSION={}
ARG TRITON_CONTAINER_VERSION={}
Expand Down Expand Up @@ -586,7 +602,7 @@ def create_dockerfile_buildbase(ddir, dockerfile_name, argmap, backends):
COPY . .
ENTRYPOINT []
'''
df += install_dcgm_libraries()
df += install_dcgm_libraries(argmap['DCGM_VERSION'])

df += '''
ENV TRITON_SERVER_VERSION ${TRITON_VERSION}
Expand All @@ -598,7 +614,7 @@ def create_dockerfile_buildbase(ddir, dockerfile_name, argmap, backends):
dfile.write(df)


def create_dockerfile_build(ddir, dockerfile_name, argmap, backends):
def create_dockerfile_build(ddir, dockerfile_name, backends):
df = '''
FROM tritonserver_builder_image AS build
FROM tritonserver_buildbase
Expand Down Expand Up @@ -641,19 +657,60 @@ def create_dockerfile_linux(ddir, dockerfile_name, argmap, backends, repoagents,
## Production stage: Create container with just inference server executable
############################################################################
FROM ${{BASE_IMAGE}}
'''.format(argmap['TRITON_VERSION'], argmap['TRITON_CONTAINER_VERSION'],
argmap['BASE_IMAGE'])

df += dockerfile_prepare_container_linux(argmap, backends)

df += '''
WORKDIR /opt/tritonserver
COPY --chown=1000:1000 LICENSE .
COPY --chown=1000:1000 TRITON_VERSION .
COPY --chown=1000:1000 NVIDIA_Deep_Learning_Container_License.pdf .
COPY --chown=1000:1000 --from=tritonserver_build /tmp/tritonbuild/install/bin/tritonserver bin/
COPY --chown=1000:1000 --from=tritonserver_build /tmp/tritonbuild/install/lib/libtritonserver.so lib/
COPY --chown=1000:1000 --from=tritonserver_build /tmp/tritonbuild/install/include/triton/core include/triton/core

# Top-level include/core not copied so --chown does not set it correctly,
# so explicit set on all of include
RUN chown -R triton-server:triton-server include
'''

for noncore in NONCORE_BACKENDS:
if noncore in backends:
df += '''
COPY --chown=1000:1000 --from=tritonserver_build /tmp/tritonbuild/install/backends backends
'''
break

if len(repoagents) > 0:
df += '''
COPY --chown=1000:1000 --from=tritonserver_build /tmp/tritonbuild/install/repoagents repoagents
'''
# Add feature labels for SageMaker endpoint
deadeyegoodwin marked this conversation as resolved.
Show resolved Hide resolved
if 'sagemaker' in endpoints:
df += '''
LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port=true
COPY --chown=1000:1000 --from=tritonserver_build /workspace/build/sagemaker/serve /usr/bin/.
'''
mkdir(ddir)
with open(os.path.join(ddir, dockerfile_name), "w") as dfile:
dfile.write(df)


def dockerfile_prepare_container_linux(argmap, backends):
# Common steps to produce docker images shared by build.py and compose.py.
# Sets enviroment variables, installs dependencies and adds entrypoint
df = '''
ARG TRITON_VERSION
ARG TRITON_CONTAINER_VERSION

ENV TRITON_SERVER_VERSION ${{TRITON_VERSION}}
ENV NVIDIA_TRITON_SERVER_VERSION ${{TRITON_CONTAINER_VERSION}}
ENV TRITON_SERVER_VERSION ${{TRITON_VERSION}}
ENV NVIDIA_TRITON_SERVER_VERSION ${{TRITON_CONTAINER_VERSION}}
LABEL com.nvidia.tritonserver.version="${{TRITON_SERVER_VERSION}}"
ENV TRITON_SERVER_VERSION ${TRITON_VERSION}
ENV NVIDIA_TRITON_SERVER_VERSION ${TRITON_CONTAINER_VERSION}
LABEL com.nvidia.tritonserver.version="${TRITON_SERVER_VERSION}"

ENV PATH /opt/tritonserver/bin:${{PATH}}
'''.format(argmap['TRITON_VERSION'], argmap['TRITON_CONTAINER_VERSION'],
argmap['BASE_IMAGE'])
ENV PATH /opt/tritonserver/bin:${PATH}
'''
df += '''
ENV TF_ADJUST_HUE_FUSED 1
ENV TF_ADJUST_SATURATION_FUSED 1
Expand Down Expand Up @@ -683,7 +740,7 @@ def create_dockerfile_linux(ddir, dockerfile_name, argmap, backends, repoagents,
libre2-5 && \
rm -rf /var/lib/apt/lists/*
'''
df += install_dcgm_libraries()
df += install_dcgm_libraries(argmap['DCGM_VERSION'])
# Add dependencies needed for python backend
if 'python' in backends:
df += '''
Expand All @@ -697,59 +754,26 @@ def create_dockerfile_linux(ddir, dockerfile_name, argmap, backends, repoagents,
pip3 install --upgrade numpy && \
rm -rf /var/lib/apt/lists/*
'''
df += '''
WORKDIR /opt/tritonserver
RUN rm -fr /opt/tritonserver/*
COPY --chown=1000:1000 LICENSE .
COPY --chown=1000:1000 TRITON_VERSION .
COPY --chown=1000:1000 NVIDIA_Deep_Learning_Container_License.pdf .
COPY --chown=1000:1000 --from=tritonserver_build /tmp/tritonbuild/install/bin/tritonserver bin/
COPY --chown=1000:1000 --from=tritonserver_build /tmp/tritonbuild/install/lib/libtritonserver.so lib/
COPY --chown=1000:1000 --from=tritonserver_build /tmp/tritonbuild/install/include/triton/core include/triton/core

# Top-level include/core not copied so --chown does not set it correctly,
# so explicit set on all of include
RUN chown -R triton-server:triton-server include
'''

for noncore in NONCORE_BACKENDS:
if noncore in backends:
df += '''
COPY --chown=1000:1000 --from=tritonserver_build /tmp/tritonbuild/install/backends backends
'''
break

if len(repoagents) > 0:
df += '''
COPY --chown=1000:1000 --from=tritonserver_build /tmp/tritonbuild/install/repoagents repoagents
'''

df += '''
# Extra defensive wiring for CUDA Compat lib
RUN ln -sf ${{_CUDA_COMPAT_PATH}}/lib.real ${{_CUDA_COMPAT_PATH}}/lib \
&& echo ${{_CUDA_COMPAT_PATH}}/lib > /etc/ld.so.conf.d/00-cuda-compat.conf \
RUN ln -sf ${_CUDA_COMPAT_PATH}/lib.real ${_CUDA_COMPAT_PATH}/lib \
&& echo ${_CUDA_COMPAT_PATH}/lib > /etc/ld.so.conf.d/00-cuda-compat.conf \
&& ldconfig \
&& rm -f ${{_CUDA_COMPAT_PATH}}/lib
&& rm -f ${_CUDA_COMPAT_PATH}/lib
deadeyegoodwin marked this conversation as resolved.
Show resolved Hide resolved

COPY --chown=1000:1000 nvidia_entrypoint.sh /opt/tritonserver
WORKDIR /opt/tritonserver
RUN rm -fr /opt/tritonserver/*
COPY --chown=1000:1000 nvidia_entrypoint.sh .
ENTRYPOINT ["/opt/tritonserver/nvidia_entrypoint.sh"]

'''
df += '''
ENV NVIDIA_BUILD_ID {}
LABEL com.nvidia.build.id={}
LABEL com.nvidia.build.ref={}
'''.format(argmap['NVIDIA_BUILD_ID'], argmap['NVIDIA_BUILD_ID'],
argmap['NVIDIA_BUILD_REF'])

# Add feature labels for SageMaker endpoint
if 'sagemaker' in endpoints:
df += '''
LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port=true
COPY --chown=1000:1000 --from=tritonserver_build /workspace/build/sagemaker/serve /usr/bin/.
'''

mkdir(ddir)
with open(os.path.join(ddir, dockerfile_name), "w") as dfile:
dfile.write(df)
deadeyegoodwin marked this conversation as resolved.
Show resolved Hide resolved
return df


def create_dockerfile_windows(ddir, dockerfile_name, argmap, backends,
Expand Down Expand Up @@ -777,8 +801,6 @@ def create_dockerfile_windows(ddir, dockerfile_name, argmap, backends,
ARG TRITON_VERSION
ARG TRITON_CONTAINER_VERSION

ENV TRITON_SERVER_VERSION ${{TRITON_VERSION}}
ENV NVIDIA_TRITON_SERVER_VERSION ${{TRITON_CONTAINER_VERSION}}
ENV TRITON_SERVER_VERSION ${{TRITON_VERSION}}
ENV NVIDIA_TRITON_SERVER_VERSION ${{TRITON_CONTAINER_VERSION}}
LABEL com.nvidia.tritonserver.version="${{TRITON_SERVER_VERSION}}"
Expand Down Expand Up @@ -849,6 +871,9 @@ def container_build(images, backends, repoagents, endpoints):
FLAGS.container_version,
'BASE_IMAGE':
base_image,
'DCGM_VERSION':
'' if FLAGS.version is None or FLAGS.version
not in TRITON_VERSION_MAP else TRITON_VERSION_MAP[FLAGS.version][5],
}

cachefrommap = [
Expand All @@ -868,7 +893,7 @@ def container_build(images, backends, repoagents, endpoints):

log_verbose('buildbase container {}'.format(commonargs + cachefromargs))
create_dockerfile_buildbase(FLAGS.build_dir, 'Dockerfile.buildbase',
dockerfileargmap, backends)
dockerfileargmap)
try:
# Create buildbase image, this is an image with all
# dependencies needed for the build.
Expand Down Expand Up @@ -980,8 +1005,7 @@ def container_build(images, backends, repoagents, endpoints):
container.commit('tritonserver_builder_image', 'latest')
container.remove(force=True)

create_dockerfile_build(FLAGS.build_dir, 'Dockerfile.build',
dockerfileargmap, backends)
create_dockerfile_build(FLAGS.build_dir, 'Dockerfile.build', backends)
p = subprocess.Popen([
'docker', 'build', '-t', 'tritonserver_build', '-f',
os.path.join(FLAGS.build_dir, 'Dockerfile.build'), '.'
Expand Down Expand Up @@ -1248,8 +1272,8 @@ def container_build(images, backends, repoagents, endpoints):
if cver is None:
if FLAGS.version not in TRITON_VERSION_MAP:
fail(
'unable to determine default repo-tag, container version not known for {}'.format(
FLAGS.version))
'unable to determine default repo-tag, container version not known for {}'
.format(FLAGS.version))
cver = TRITON_VERSION_MAP[FLAGS.version][0]
if not cver.endswith('dev'):
default_repo_tag = 'r' + cver
Expand All @@ -1258,16 +1282,9 @@ def container_build(images, backends, repoagents, endpoints):
# For other versions use the TRITON_VERSION_MAP unless explicitly
# given.
if not FLAGS.no_container_build:
if FLAGS.container_version is None:
if FLAGS.version not in TRITON_VERSION_MAP:
fail('container version not known for {}'.format(FLAGS.version))
FLAGS.container_version = TRITON_VERSION_MAP[FLAGS.version][0]
if FLAGS.upstream_container_version is None:
if FLAGS.version not in TRITON_VERSION_MAP:
fail('upstream container version not known for {}'.format(
FLAGS.version))
FLAGS.upstream_container_version = TRITON_VERSION_MAP[
FLAGS.version][1]
FLAGS.container_version, FLAGS.upstream_container_version = get_container_versions(
FLAGS.version, FLAGS.container_version,
FLAGS.upstream_container_version)

log('container version {}'.format(FLAGS.container_version))
log('upstream container version {}'.format(
Expand Down
Loading