From cc26208d92b820ebff0acbe63577f2bd749450e9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joonhyung=20Lee/=EC=9D=B4=EC=A4=80=ED=98=95?= <33523965+veritas9872@users.noreply.github.com> Date: Mon, 1 May 2023 10:59:41 +0900 Subject: [PATCH] Fix some serious bugs in the NGC image (#129) * Fix bug in the NGC Docker image where the year and month variables were not being passed from the docker-compose.yaml file. * Update default NGC image to the one released in April 2023. * Remove redundant `pytest` requirement. * Fix serious bug where `--ignore-installed` was mistaken for an option to always leave existing packages as-is, which unfortunately does not exist in `pip`. The default behavior is to update only if version incompatibilities are found. * Reformat the README.md file. --- README.md | 6 +++--- docker-compose.yaml | 2 +- dockerfiles/ngc.Dockerfile | 10 +++++++--- reqs/ngc-pip.requirements.txt | 4 +++- 4 files changed, 14 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index 2af56ab..5622057 100644 --- a/README.md +++ b/README.md @@ -498,7 +498,7 @@ which is useful if `sudo` permissions are unavailable on the host. Also, when one user switches between multiple Cresset-based containers on a single machine, VSCode may not be able to find the container workspace. This is because the `docker-compose.yaml` file mounts the host's - `~/.vscode-server` directory to the `/home/${USR}/.vscode-server` directory +`~/.vscode-server` directory to the `/home/${USR}/.vscode-server` directory of all containers to preserve VSCode extensions between containers. To fix this issue, create a new directory on the host to mount the containers' `.vscode-server` directories. @@ -529,8 +529,8 @@ For other VSCode problems, try deleting `~/.vscode-server` on the host. networking issues during installation. Updating git submodules is [not fail-safe](https://stackoverflow.com/a/8573310/9289275). -4. `torch.cuda.is_available()` will return a - `... UserWarning: CUDA initialization:...` +4. `torch.cuda.is_available()` will return a + `... UserWarning: CUDA initialization:...` error or the image will simply not start if the host CUDA driver is incompatible with the CUDA version on the Docker image. Either upgrade the host CUDA driver or downgrade the CUDA version of the image. diff --git a/docker-compose.yaml b/docker-compose.yaml index 43398b7..289e75c 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -161,7 +161,7 @@ services: dockerfile: dockerfiles/ngc.Dockerfile args: NGC_YEAR: ${NGC_YEAR:-23} - NGC_MONTH: ${NGC_MONTH:-03} + NGC_MONTH: ${NGC_MONTH:-04} hub: # Service based on the official PyTorch Docker images from Docker Hub. extends: # Available images: https://hub.docker.com/r/pytorch/pytorch/tags diff --git a/dockerfiles/ngc.Dockerfile b/dockerfiles/ngc.Dockerfile index 9b8f4dc..0d33ce1 100644 --- a/dockerfiles/ngc.Dockerfile +++ b/dockerfiles/ngc.Dockerfile @@ -1,9 +1,11 @@ # syntax = docker/dockerfile:1 # The top line is used by BuildKit. _**DO NOT ERASE IT**_. +ARG NGC_YEAR +ARG NGC_MONTH ARG INTERACTIVE_MODE ARG GIT_IMAGE=bitnami/git:latest -ARG BASE_IMAGE=nvcr.io/nvidia/pytorch:${NGC_YEAR:-23}.${NGC_MONTH:-03}-py3 +ARG BASE_IMAGE=nvcr.io/nvidia/pytorch:${NGC_YEAR}.${NGC_MONTH}-py3 ######################################################################## FROM ${GIT_IMAGE} AS stash @@ -45,11 +47,13 @@ RUN --mount=type=bind,from=stash,source=/tmp/apt,target=/tmp/apt \ rm -rf /var/lib/apt/lists/* # Use `sudo` to install new `pip` packages during development if necessary. -# Previous installations are preserved via the `--ignore-installed` flag. +# Note that new `pip` packages may overwrite existing packages if incompatible. +# Check the installed packages before and after `pip` installation and minimize +# the number of requirements to keep overwriting to a minumum. ARG PIP_CACHE_DIR=/root/.cache/pip RUN --mount=type=cache,target=${PIP_CACHE_DIR},sharing=locked \ --mount=type=bind,from=stash,source=/tmp/req,target=/tmp/req \ - python -m pip install --ignore-installed -r /tmp/req/requirements.txt && ldconfig + python -m pip install -r /tmp/req/requirements.txt && ldconfig # Enable Intel MKL optimizations on AMD CPUs. # https://danieldk.eu/Posts/2020-08-31-MKL-Zen.html diff --git a/reqs/ngc-pip.requirements.txt b/reqs/ngc-pip.requirements.txt index b29a837..207b63f 100644 --- a/reqs/ngc-pip.requirements.txt +++ b/reqs/ngc-pip.requirements.txt @@ -1,2 +1,4 @@ +# Pre-existing Python packages may be overwritten by new packages. +# Minimize the number of requirements and check the installed packages +# before and after `pip` installation to find any discrepencies. hydra-core -pytest