Skip to content

Commit 0a1b8b8

Browse files
akihironittaBordacarmocca
authored
Fix horovod installation base-cuda Dockerfile (#11811)
* pip install --user * add checks * rm unrelated comment * consistent format * Fail if horovod not found Co-authored-by: Jirka <jirka.borovec@seznam.cz> Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> Co-authored-by: Carlos Mocholí <carlossmocholi@gmail.com>
1 parent 8d23f62 commit 0a1b8b8

File tree

2 files changed

+10
-7
lines changed

2 files changed

+10
-7
lines changed

dockers/base-conda/Dockerfile

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -129,4 +129,5 @@ RUN \
129129
conda info && \
130130
pip list && \
131131
python -c "import sys; ver = sys.version_info ; assert f'{ver.major}.{ver.minor}' == '$PYTHON_VERSION', ver" && \
132-
python -c "import torch; assert torch.__version__.startswith('$PYTORCH_VERSION'), torch.__version__"
132+
python -c "import torch; assert torch.__version__.startswith('$PYTORCH_VERSION'), torch.__version__" && \
133+
python -c "import horovod.torch"

dockers/base-cuda/Dockerfile

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,6 @@ ENV \
7575
COPY ./requirements.txt requirements.txt
7676
COPY ./requirements/ ./requirements/
7777

78-
# conda init
7978
RUN \
8079
wget https://bootstrap.pypa.io/get-pip.py --progress=bar:force:noscroll --no-check-certificate && \
8180
python${PYTHON_VERSION} get-pip.py && \
@@ -88,11 +87,11 @@ RUN \
8887
python ./requirements/adjust_versions.py requirements/extra.txt ${PYTORCH_VERSION} && \
8988
python ./requirements/adjust_versions.py requirements/examples.txt ${PYTORCH_VERSION} && \
9089
# Install all requirements
91-
pip install -r requirements/devel.txt --no-cache-dir && \
90+
pip install --user -r requirements/devel.txt --no-cache-dir && \
9291
rm -rf requirements.* requirements/
9392

9493
RUN \
95-
CUDA_VERSION_MAJOR=$(python -c "import torch ; print(torch.version.cuda.split('.')[0])") && \
94+
CUDA_VERSION_MAJOR=$(python -c "import torch; print(torch.version.cuda.split('.')[0])") && \
9695
py_ver=$(python -c "print(int('$PYTHON_VERSION'.split('.') >= '3.9'.split('.')))") && \
9796
# install DALI, needed for examples
9897
# todo: waiting for 1.4 - https://github.com/NVIDIA/DALI/issues/3144#issuecomment-877386691
@@ -108,15 +107,18 @@ RUN \
108107

109108
RUN \
110109
# install FairScale
111-
pip install fairscale==0.4.0
110+
pip install fairscale==0.4.0 && \
111+
python -c "import fairscale; print(fairscale.__version__)"
112112

113113
RUN \
114114
# install DeepSpeed
115-
pip install deepspeed==0.5.7
115+
pip install deepspeed==0.5.7 && \
116+
python -c "import deepspeed; print(deepspeed.__version__)"
116117

117118
RUN \
118119
# Show what we have
119120
pip --version && \
120121
pip list && \
121122
python -c "import sys; ver = sys.version_info ; assert f'{ver.major}.{ver.minor}' == '$PYTHON_VERSION', ver" && \
122-
python -c "import torch; assert torch.__version__.startswith('$PYTORCH_VERSION'), torch.__version__"
123+
python -c "import torch; assert torch.__version__.startswith('$PYTORCH_VERSION'), torch.__version__" && \
124+
python -c "import horovod.torch"

0 commit comments

Comments
 (0)