Skip to content

Commit

Permalink
Add CPU only to compose script (#3365)
Browse files Browse the repository at this point in the history
- Enable compose script to compose cpu-only containers. 
- Fixed upstream-version confusion to ensure compose script works for all branches
- Added documentation
  • Loading branch information
jbkyang-nvi authored Sep 15, 2021
1 parent aa05498 commit 0801098
Show file tree
Hide file tree
Showing 3 changed files with 153 additions and 56 deletions.
4 changes: 3 additions & 1 deletion build.py
Original file line number Diff line number Diff line change
Expand Up @@ -742,6 +742,7 @@ def create_dockerfile_linux(ddir, dockerfile_name, argmap, backends, repoagents,


def dockerfile_prepare_container_linux(argmap, backends, enable_gpu):
gpu_enabled = 1 if enable_gpu else 0
# Common steps to produce docker images shared by build.py and compose.py.
# Sets enviroment variables, installs dependencies and adds entrypoint
df = '''
Expand All @@ -760,6 +761,7 @@ def dockerfile_prepare_container_linux(argmap, backends, enable_gpu):
ENV TF_ADJUST_SATURATION_FUSED 1
ENV TF_ENABLE_WINOGRAD_NONFUSED 1
ENV TF_AUTOTUNE_THRESHOLD 2
ENV TRITON_SERVER_GPU_ENABLED {gpu_enabled}
# Create a user that can be used to run triton as
# non-root. Make sure that this user to given ID 1000. All server
Expand Down Expand Up @@ -788,7 +790,7 @@ def dockerfile_prepare_container_linux(argmap, backends, enable_gpu):
curl \
{ort_dependencies} && \
rm -rf /var/lib/apt/lists/*
'''.format(ort_dependencies=ort_dependencies)
'''.format(gpu_enabled=gpu_enabled, ort_dependencies=ort_dependencies)

if enable_gpu:
df += install_dcgm_libraries(argmap['DCGM_VERSION'])
Expand Down
148 changes: 97 additions & 51 deletions compose.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ def fail_if(p, msg):
fail(msg)


def start_gpu_dockerfile(ddir, images, argmap, dockerfile_name, backends):
def start_dockerfile(ddir, images, argmap, dockerfile_name, backends):
# Set enviroment variables, set default user and install dependencies
df = '''
#
Expand Down Expand Up @@ -106,15 +106,16 @@ def add_requested_repoagents(ddir, dockerfile_name, repoagents):
for ra in repoagents:
df += '''COPY --chown=1000:1000 --from=full /opt/tritonserver/repoagents/{} /opt/tritonserver/repoagents/{}
'''.format(ra, ra)
df += '''
if len(repoagents) > 0:
df += '''
# Top-level /opt/tritonserver/repoagents not copied so need to explicitly set permissions here
RUN chown triton-server:triton-server /opt/tritonserver/repoagents
'''
with open(os.path.join(ddir, dockerfile_name), "a") as dfile:
dfile.write(df)


def end_gpu_dockerfile(ddir, dockerfile_name, argmap):
def end_dockerfile(ddir, dockerfile_name, argmap):
# Install additional dependencies
df = ""
if argmap['SAGEMAKER_ENDPOINT']:
Expand All @@ -140,51 +141,90 @@ def get_container_version_if_not_specified():
with open('TRITON_VERSION', "r") as vfile:
version = vfile.readline().strip()
import build
FLAGS.container_version, upstream_container_version = build.get_container_versions(
version, FLAGS.container_version, "")
current_container_version, FLAGS.container_version = build.get_container_versions(
version, None, FLAGS.container_version)
log('version {}'.format(version))
log('using container version {}'.format(FLAGS.container_version))


def create_argmap(images):
# Extract information from upstream build and create map other functions can
# use
upstreamDockerImage = images["full"]
full_docker_image = images["full"]
min_docker_image = images["min"]
enable_gpu = FLAGS.enable_gpu
# Docker inspect enviroment variables
base_run_args = ['docker', 'inspect', '-f']
import re # parse all PATH enviroment variables

# first pull docker image
log("pulling container:{}".format(upstreamDockerImage))
p = subprocess.run(['docker', 'pull', upstreamDockerImage])
# first pull docker images
log("pulling container:{}".format(full_docker_image))
p = subprocess.run(['docker', 'pull', full_docker_image])
fail_if(
p.returncode != 0,
'docker pull container {} failed, {}'.format(upstreamDockerImage,
'docker pull container {} failed, {}'.format(full_docker_image,
p.stderr))

baseRunArgs = ['docker', 'inspect', '-f']
p_version = subprocess.run(baseRunArgs + [
if enable_gpu:
pm = subprocess.run(['docker', 'pull', min_docker_image])
fail_if(
pm.returncode != 0, 'docker pull container {} failed, {}'.format(
min_docker_image, pm.stderr))
pm_path = subprocess.run(base_run_args + [
'{{range $index, $value := .Config.Env}}{{$value}} {{end}}',
min_docker_image
],
capture_output=True,
text=True)
fail_if(
pm_path.returncode != 0,
'docker inspect to find triton enviroment variables for min container failed, {}'
.format(pm_path.stderr))
# min container needs to be GPU enabled if the build is GPU build
vars = pm_path.stdout
e = re.search("CUDA_VERSION", vars)
gpu_enabled = False if e == None else True
fail_if(
not gpu_enabled,
'\'enable-gpu\' flag specified but min container provided does not have CUDA installed'
)

# Check full container enviroment variables
p_path = subprocess.run(base_run_args + [
'{{range $index, $value := .Config.Env}}{{$value}} {{end}}',
upstreamDockerImage
full_docker_image
],
capture_output=True,
text=True)
vars = p_version.stdout
capture_output=True,
text=True)
fail_if(
p_path.returncode != 0,
'docker inspect to find enviroment variables for full container failed, {}'
.format(p_path.stderr))
vars = p_path.stdout
log_verbose("inspect args: {}".format(vars))
import re # parse all PATH enviroment variables

e0 = re.search("TRITON_SERVER_GPU_ENABLED=([\S]{1,}) ", vars)
e1 = re.search("CUDA_VERSION", vars)
gpu_enabled = False
if (e0 != None):
gpu_enabled = e0.group(1) == "1"
elif (e1 != None):
gpu_enabled = True
fail_if(
gpu_enabled != enable_gpu,
'Error: full container provided was build with \'enable_gpu\' as {} and you are composing container with \'enable_gpu\' as {}'
.format(gpu_enabled, enable_gpu))
e = re.search("TRITON_SERVER_VERSION=([\S]{6,}) ", vars)
version = "" if e == None else e.group(1)
fail_if(
p_version.returncode != 0 or len(version) == 0,
'docker inspect to find triton version failed, {}'.format(
p_version.stderr))

vars = p_version.stdout
len(version) == 0,
'docker inspect to find triton server version failed, {}'.format(
p_path.stderr))
e = re.search("NVIDIA_TRITON_SERVER_VERSION=([\S]{5,}) ", vars)
container_version = "" if e == None else e.group(1)
fail_if(
len(container_version) == 0,
'docker inspect to find triton container version failed, {}'.format(
vars))

vars = p_version.stdout
dcgm_ver = re.search("DCGM_VERSION=([\S]{4,}) ", vars)
dcgm_version = ""
if dcgm_ver == None:
Expand All @@ -197,27 +237,27 @@ def create_argmap(images):
len(dcgm_version) == 0,
'docker inspect to find DCGM version failed, {}'.format(vars))

p_sha = subprocess.run(baseRunArgs + [
'{{ index .Config.Labels "com.nvidia.build.ref"}}', upstreamDockerImage
],
capture_output=True,
text=True)
p_sha = subprocess.run(
base_run_args +
['{{ index .Config.Labels "com.nvidia.build.ref"}}', full_docker_image],
capture_output=True,
text=True)
fail_if(
p_sha.returncode != 0,
'docker inspect of upstream docker image build sha failed, {}'.format(
p_sha.stderr))
p_build = subprocess.run(baseRunArgs + [
'{{ index .Config.Labels "com.nvidia.build.id"}}', upstreamDockerImage
],
capture_output=True,
text=True)
p_build = subprocess.run(
base_run_args +
['{{ index .Config.Labels "com.nvidia.build.id"}}', full_docker_image],
capture_output=True,
text=True)
fail_if(
p_build.returncode != 0,
'docker inspect of upstream docker image build sha failed, {}'.format(
p_build.stderr))

p_find = subprocess.run(
['docker', 'run', upstreamDockerImage, 'bash', '-c', 'ls /usr/bin/'],
['docker', 'run', full_docker_image, 'bash', '-c', 'ls /usr/bin/'],
capture_output=True,
text=True)
f = re.search("serve", p_find.stdout)
Expand Down Expand Up @@ -298,10 +338,6 @@ def create_argmap(images):
)

FLAGS = parser.parse_args()
fail_if(
not FLAGS.enable_gpu,
"Only GPU versions are supported right now. Add --enable-gpu to compose.py command."
)

if FLAGS.work_dir is None:
FLAGS.work_dir = "."
Expand Down Expand Up @@ -329,24 +365,34 @@ def create_argmap(images):
images[parts[0]] = parts[1]
else:
get_container_version_if_not_specified()
images = {
"full":
"nvcr.io/nvidia/tritonserver:{}-py3".format(
FLAGS.container_version),
"min":
"nvcr.io/nvidia/tritonserver:{}-py3-min".format(
FLAGS.container_version)
}
if (FLAGS.enable_gpu):
images = {
"full":
"nvcr.io/nvidia/tritonserver:{}-py3".format(
FLAGS.container_version),
"min":
"nvcr.io/nvidia/tritonserver:{}-py3-min".format(
FLAGS.container_version)
}
else:
images = {
"full":
"nvcr.io/nvidia/tritonserver:{}-cpu-only-py3".format(
FLAGS.container_version),
"min":
"ubuntu:20.04"
}
fail_if(
len(images) != 2,
"Need to both specify 'full' and 'min' images if at all")

argmap = create_argmap(images)

start_gpu_dockerfile(FLAGS.work_dir, images, argmap, dockerfile_name,
FLAGS.backend)
start_dockerfile(FLAGS.work_dir, images, argmap, dockerfile_name,
FLAGS.backend)
add_requested_backends(FLAGS.work_dir, dockerfile_name, FLAGS.backend)
add_requested_repoagents(FLAGS.work_dir, dockerfile_name, FLAGS.repoagent)
end_gpu_dockerfile(FLAGS.work_dir, dockerfile_name, argmap)
end_dockerfile(FLAGS.work_dir, dockerfile_name, argmap)

if (not FLAGS.dry_run):
build_docker_image(FLAGS.work_dir, dockerfile_name, FLAGS.output_name)
57 changes: 53 additions & 4 deletions docs/compose.md
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,18 @@ from source to get more exact customization.

## Use the compose.py script

The `compose.py` script can be found in the [server repository](https://github.com/triton-inference-server/server). Simply clone the repository and run `compose.py` to create a custom container. Note created container version will depend on the branch that was cloned. For example branch [r21.06](https://github.com/triton-inference-server/server/tree/r21.06) should be used to create a image based on the NGC 21.06 Triton release.

`compose.py` provides `--backend`, `--repoagent` options that allow you to specify which backends and repository agents to include in the custom image. The `--enable-gpu` flag indicates that you want to create an image that supports NVIDIA GPUs. For example, the following creates a new docker image that contains only the TensorFlow 1 and TensorFlow 2 backends and the checksum repository agent.
The `compose.py` script can be found in the [server repository](https://github.com/triton-inference-server/server).
Simply clone the repository and run `compose.py` to create a custom container.
Note: Created container version will depend on the branch that was cloned.
For example branch [r21.08](https://github.com/triton-inference-server/server/tree/r21.08)
should be used to create a image based on the NGC 21.08 Triton release.

`compose.py` provides `--backend`, `--repoagent` options that allow you to
specify which backends and repository agents to include in the custom image.
The `--enable-gpu` flag indicates that you want to create an image that supports
NVIDIA GPUs. For example, the following creates a new docker image that
contains only the TensorFlow 1 and TensorFlow 2 backends and the checksum
repository agent.

Example:
```
Expand All @@ -54,7 +63,47 @@ will provide a container `tritonserver` locally. You can access the container wi
$ docker run -it tritonserver:latest
```

Note: If `compose.py` is run on release versions `r21.08` and older, the resulting container will have DCGM version 2.2.3 installed. This may result in different GPU statistic reporting behavior.
Note: If `compose.py` is run on release versions `r21.08` and earlier,
the resulting container will have DCGM version 2.2.3 installed.
This may result in different GPU statistic reporting behavior.

### Compose a specific version of Triton

`compose.py` requires two containers: a `min` container which is the
base the compose container is built from and a `full` container from which the
script will extract components. The version of the `min` and `full` container
is determined by the branch of Triton `compose.py` is on.
For example, running
```
python3 compose.py --backend tensorflow1 --repoagent checksum --enable-gpu
```
on branch [r21.08](https://github.com/triton-inference-server/server/tree/r21.08) pulls:
- `min` container `nvcr.io/nvidia/tritonserver:21.08-py3-min`
- `full` container `nvcr.io/nvidia/tritonserver:21.08-py3`

Alternatively, users can specify the version of Triton container to pull from any branch by either:
1. Adding flag `--container-version <container version>` to branch
```
python3 compose.py --backend tensorflow1 --repoagent checksum --container-version 21.08 --enable-gpu
```
2. Specifying `--image min,<min container image name> --image full,<full container image name>`.
The user is responsible for specifying compatible `min` and `full` containers.
```
python3 compose.py --backend tensorflow1 --repoagent checksum --image min,nvcr.io/nvidia/tritonserver:21.08-py3-min --image full,nvcr.io/nvidia/tritonserver:21.08-py3 --enable-gpu
```
Method 1 and 2 will result in the same composed container. Furthermore, `--image` flag overrides the `--container-version` flag when both are specified.

### CPU only container composition

To compose a container that is built for only cpu usage, simply remove the
`--enable-gpu` flag when running `compose.py`.

This will build a container using `ubuntu:20.04` docker as the `min` container
and `nvcr.io/nvidia/tritonserver:<upstream-container-version>-cpu-only-py3` as the `full` container.
Note:
1. When composing a CPU only container, both `min` and `full` containers should be built for CPU only and not have CUDA installed.
2. CPU only containers are only available for Triton versions > `21.09`
3. CPU only "full" containers are build with less backends then the GPU enabled containers. Currently supported backends are `onnxruntime`, `openvino` and `python`.

## Build it yourself

Expand Down

0 comments on commit 0801098

Please sign in to comment.