forked from Azure/AzureML-Containers
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathDockerfile
166 lines (149 loc) · 5.03 KB
/
Dockerfile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.
FROM mcr.microsoft.com/azureml/o16n-base/python-assets:20220331.v12 AS inferencing-assets
# Tag: cuda:11.1.1-devel-ubuntu20.04
# Env: CUDA_VERSION=11.1.1
# Env: NCCL_VERSION=2.8.4
# Env: CUDNN_VERSION=8.0.5.39
FROM nvidia/cuda:11.2.2-cudnn8-devel-ubuntu20.04
USER root:root
ENV com.nvidia.cuda.version $CUDA_VERSION
ENV com.nvidia.volumes.needed nvidia_driver
ENV LANG=C.UTF-8 LC_ALL=C.UTF-8
ENV DEBIAN_FRONTEND noninteractive
ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64
ENV NCCL_DEBUG=INFO
ENV HOROVOD_GPU_ALLREDUCE=NCCL
# Install Common Dependencies
RUN apt-get update && \
apt-get install -y --no-install-recommends \
# SSH and RDMA
libmlx4-1 \
libmlx5-1 \
librdmacm1 \
libibverbs1 \
libmthca1 \
libdapl2 \
dapl2-utils \
openssh-client \
openssh-server \
redis \
iproute2 && \
# rdma-core dependencies
apt-get install -y \
udev \
libudev-dev \
libnl-3-dev \
libnl-route-3-dev \
gcc \
ninja-build \
pkg-config \
valgrind \
cython3 \
python3-docutils \
pandoc \
dh-python \
python3-dev && \
# Others
apt-get install -y \
build-essential \
bzip2 \
libbz2-1.0 \
systemd \
git \
wget \
cpio \
pciutils \
libnuma-dev \
ibutils \
ibverbs-utils \
rdmacm-utils \
infiniband-diags \
perftest \
librdmacm-dev \
libibverbs-dev \
libsm6 \
libxext6 \
libssl1.1 \
libxrender-dev \
libglib2.0-0 \
dh-make \
libx11-dev \
libgcrypt20 \
binutils-multiarch \
nginx \
fuse && \
apt-get clean -y && \
rm -rf /var/lib/apt/lists/*
# Inference
# Copy logging utilities, nginx and rsyslog configuration files, IOT server binary, etc.
COPY --from=inferencing-assets /artifacts /var/
RUN /var/requirements/install_system_requirements.sh && \
cp /var/configuration/rsyslog.conf /etc/rsyslog.conf && \
cp /var/configuration/nginx.conf /etc/nginx/sites-available/app && \
ln -s /etc/nginx/sites-available/app /etc/nginx/sites-enabled/app && \
rm -f /etc/nginx/sites-enabled/default
ENV SVDIR=/var/runit
ENV WORKER_TIMEOUT=300
EXPOSE 5001 8883 8888
# Conda Environment
ENV MINICONDA_VERSION py37_4.9.2
ENV PATH /opt/miniconda/bin:$PATH
RUN wget -qO /tmp/miniconda.sh https://repo.continuum.io/miniconda/Miniconda3-${MINICONDA_VERSION}-Linux-x86_64.sh && \
bash /tmp/miniconda.sh -bf -p /opt/miniconda && \
conda clean -ay && \
rm -rf /opt/miniconda/pkgs && \
rm /tmp/miniconda.sh && \
find / -type d -name __pycache__ | xargs rm -rf
# Open-MPI-UCX installation
RUN mkdir /tmp/ucx && \
cd /tmp/ucx && \
wget -q https://github.com/openucx/ucx/releases/download/v1.9.0/ucx-1.9.0.tar.gz && \
tar zxf ucx-1.9.0.tar.gz && \
cd ucx-1.9.0 && \
./configure --prefix=/usr/local --enable-optimizations --disable-assertions --disable-params-check --enable-mt && \
make -j $(nproc --all) && \
make install && \
rm -rf /tmp/ucx
# Open-MPI installation
ENV OPENMPI_VERSION 4.1.0
RUN mkdir /tmp/openmpi && \
cd /tmp/openmpi && \
wget https://download.open-mpi.org/release/open-mpi/v4.1/openmpi-${OPENMPI_VERSION}.tar.gz && \
tar zxf openmpi-${OPENMPI_VERSION}.tar.gz && \
cd openmpi-${OPENMPI_VERSION} && \
./configure --with-ucx=/usr/local/ --enable-mca-no-build=btl-uct --enable-orterun-prefix-by-default && \
make -j $(nproc) all && \
make install && \
ldconfig && \
rm -rf /tmp/openmpi
# Msodbcsql17 installation
RUN apt-get update && \
apt-get install -y curl && \
curl https://packages.microsoft.com/keys/microsoft.asc | apt-key add - && \
curl https://packages.microsoft.com/config/ubuntu/20.04/prod.list > /etc/apt/sources.list.d/mssql-release.list && \
apt-get update && \
ACCEPT_EULA=Y apt-get install -y msodbcsql17 unixodbc-dev
#Cmake Installation
RUN apt-get update && \
apt-get install -y cmake
# rdma-core v30.0 for Mlnx_ofed_5_1_2 as user space driver
RUN mkdir /tmp/rdma-core && \
cd /tmp/rdma-core && \
git clone --branch v30.0 https://github.com/linux-rdma/rdma-core && \
cd /tmp/rdma-core/rdma-core && \
debian/rules binary && \
dpkg -i ../*.deb && \
rm -rf /tmp/rdma-core
#Install latest version of nccl-rdma-sharp-plugins
RUN cd /tmp && \
mkdir -p /usr/local/nccl-rdma-sharp-plugins && \
apt install -y dh-make zlib1g-dev && \
git clone -b v2.1.0 https://github.com/Mellanox/nccl-rdma-sharp-plugins.git && \
cd nccl-rdma-sharp-plugins && \
./autogen.sh && \
./configure --prefix=/usr/local/nccl-rdma-sharp-plugins --with-cuda=/usr/local/cuda --without-ucx && \
make && \
make install
# set env var to find nccl rdma plugins inside this container
ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/nccl-rdma-sharp-plugins/lib