forked from NVIDIA-Merlin/Merlin
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdockerfile-nightly.merlin
164 lines (146 loc) · 7.05 KB
/
dockerfile-nightly.merlin
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
# syntax=docker/dockerfile:1.2
ARG VERSION=latest
ARG CONTAINER=merlin-hugectr
ARG FULL_IMAGE=nvcr.io/nvstaging/merlin/${CONTAINER}:${VERSION}
FROM ${FULL_IMAGE} as current
ARG CONTAINER
# Add Merlin Repo
RUN rm -rf /Merlin && git clone --depth 1 https://github.com/NVIDIA-Merlin/Merlin/ /Merlin && \
cd /Merlin/ && pip install . --no-deps
# Install Merlin Core
RUN rm -rf /core && git clone --depth 1 https://github.com/NVIDIA-Merlin/core.git /core/ && \
cd /core/ && pip install . --no-deps
# Install Merlin Dataloader
RUN rm -rf /dataloader && git clone --depth 1 https://github.com/NVIDIA-Merlin/dataloader.git /dataloader/ && \
cd /dataloader/ && pip install . --no-deps
# Install NVTabular
ENV PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION='python'
RUN rm -rf /nvtabular && git clone --depth 1 https://github.com/NVIDIA-Merlin/NVTabular.git /nvtabular/ && \
cd /nvtabular/ && pip install . --no-deps
# Install Merlin Systems
RUN rm -rf /systems && git clone --depth 1 https://github.com/NVIDIA-Merlin/systems.git /systems/ && \
cd /systems/ && pip install . --no-deps
# Install Models
RUN rm -rf /models && git clone --depth 1 https://github.com/NVIDIA-Merlin/Models.git /models/ && \
cd /models/ && pip install . --no-deps
# Install Transformers4Rec
RUN rm -rf /transformers4rec && git clone --depth 1 https://github.com/NVIDIA-Merlin/Transformers4Rec.git /transformers4rec && \
cd /transformers4rec/ && pip install . --no-deps
# -----------------------------------------------------------------------------
# HugeCTR + Dependencies
# Optional dependency: Build and install protocol buffers and Hadoop/HDFS.
ARG INSTALL_HDFS=false
ARG _HUGECTR_BACKEND_REPO="github.com/triton-inference-server/hugectr_backend.git"
ARG HUGECTR_DEV_MODE=false
ARG _HUGECTR_REPO="github.com/NVIDIA-Merlin/HugeCTR.git"
ARG _CI_JOB_TOKEN=""
ARG HUGECTR_VER=main
ARG HUGECTR_BACKEND_VER=main
ARG HUGECTR_HOME=/usr/local/hugectr
ARG INSTALL_DISTRIBUTED_EMBEDDINGS=true
ARG TFDE_VER=v0.2
RUN if [[ ${CONTAINER} == *"hugectr" ]]; then \
ln -s /usr/local/cuda/lib64/stubs/libcuda.so /usr/local/cuda/lib64/stubs/libcuda.so.1; \
fi
RUN if [[ ${CONTAINER} == *"hugectr" ]]; then \
export HADOOP_HOME=/opt/hadoop && \
export PATH=${PATH}:${HADOOP_HOME}/bin:${HADOOP_HOME}/sbin \
HDFS_NAMENODE_USER=root \
HDFS_SECONDARYNAMENODE_USER=root \
HDFS_DATANODE_USER=root \
YARN_RESOURCEMANAGER_USER=root \
YARN_NODEMANAGER_USER=root \
LIBHDFS_OPTS='-Djdk.lang.processReaperUseDefaultStackSize=true' \
UCX_ERROR_SIGNALS='' \
CLASSPATH=${CLASSPATH}:\
${HADOOP_HOME}/etc/hadoop/*:\
${HADOOP_HOME}/share/hadoop/common/*:\
${HADOOP_HOME}/share/hadoop/common/lib/*:\
${HADOOP_HOME}/share/hadoop/hdfs/*:\
${HADOOP_HOME}/share/hadoop/hdfs/lib/*:\
${HADOOP_HOME}/share/hadoop/mapreduce/*:\
${HADOOP_HOME}/share/hadoop/yarn/*:\
${HADOOP_HOME}/share/hadoop/yarn/lib/*; \
rm -rf /hugectr && \
if [[ "${HUGECTR_DEV_MODE}" == "false" ]]; then \
git clone --branch ${HUGECTR_VER} --depth 1 https://${_CI_JOB_TOKEN}${_HUGECTR_REPO} /hugectr && \
cd /hugectr && \
git submodule update --init --recursive && \
mkdir build && \
cd build && \
if [[ "${INSTALL_HDFS}" == "false" ]]; then \
cmake -DCMAKE_BUILD_TYPE=Release -DSM="60;61;70;75;80" -DENABLE_INFERENCE=ON .. \
; else \
cmake -DCMAKE_BUILD_TYPE=Release -DSM="60;61;70;75;80" -DENABLE_INFERENCE=ON -DENABLE_HDFS=ON ..; \
fi && \
make -j$(nproc) && \
make install && \
rm -rf ./* && \
chmod +x ${HUGECTR_HOME}/bin/* ${HUGECTR_HOME}/lib/*.so; \
fi && \
if [[ "${HUGECTR_DEV_MODE}" == "false" ]]; then \
cd /hugectr && \
git submodule update --init --recursive && \
rm -rf build && \
mkdir build && \
cd build && \
LD_LIBRARY_PATH=/usr/local/cuda/lib64/stubs/:$LD_LIBRARY_PATH && \
if [[ "${INSTALL_HDFS}" == "false" ]]; then \
cmake -DCMAKE_BUILD_TYPE=Release -DSM="60;61;70;75;80" -DENABLE_MULTINODES=ON .. \
; else \
cmake -DCMAKE_BUILD_TYPE=Release -DSM="60;61;70;75;80" -DENABLE_MULTINODES=ON -DENABLE_HDFS=ON ..; \
fi && \
make -j$(nproc) && \
make install && \
rm -rf ./* && \
chmod +x ${HUGECTR_HOME}/bin/* ${HUGECTR_HOME}/lib/*.so && \
cd ../onnx_converter && \
python setup.py install; \
fi && \
if [ "${HUGECTR_DEV_MODE}" == "false" ]; then \
git clone --branch ${HUGECTR_BACKEND_VER} --depth 1 https://${_CI_JOB_TOKEN}${_HUGECTR_BACKEND_REPO} /repos/hugectr_triton_backend && \
mkdir /repos/hugectr_triton_backend/build && \
cd /repos/hugectr_triton_backend/build && \
cmake \
-DCMAKE_INSTALL_PREFIX:PATH=${HUGECTR_HOME} \
-DTRITON_COMMON_REPO_TAG="r${TRTOSS_VERSION}" \
-DTRITON_CORE_REPO_TAG="r${TRTOSS_VERSION}" \
-DTRITON_BACKEND_REPO_TAG="r${TRTOSS_VERSION}" .. && \
make -j$(nproc) && \
make install && \
cd ../.. && \
rm -rf hugectr_triton_backend && \
chmod +x ${HUGECTR_HOME}/lib/*.so ${HUGECTR_HOME}/backends/hugectr/*.so; \
fi && \
ln -s ${HUGECTR_HOME}/backends/hugectr /opt/tritonserver/backends/hugectr && \
rm /usr/local/cuda/lib64/stubs/libcuda.so.1 && \
rm -rf /repos /usr/local/share/jupyter/lab/staging/node_modules/marked \
/usr/local/share/jupyter/lab/staging/node_modules/node-fetch; \
fi
RUN if [[ ${CONTAINER} == *"tensorflow" ]]; then \
export LD_LIBRARY_PATH=/usr/local/hugectr/lib:$LD_LIBRARY_PATH \
LIBRARY_PATH=/usr/local/hugectr/lib:$LIBRARY_PATH \
SOK_COMPILE_UNIT_TEST=ON && \
rm -rf /hugectr /distributed_embeddings && \
if [ "$HUGECTR_DEV_MODE" == "false" ]; then \
git clone --branch ${HUGECTR_VER} --depth 1 https://${_HUGECTR_REPO} /hugectr && \
pushd /hugectr && \
pip install ninja && \
git submodule update --init --recursive && \
# Install SOK
cd sparse_operation_kit && \
python setup.py install && \
# Install HPS TF plugin
cd ../hps_tf && \
python setup.py install && \
popd; \
fi && \
if [ "$INSTALL_DISTRIBUTED_EMBEDDINGS" == "true" ]; then \
git clone https://github.com/NVIDIA-Merlin/distributed-embeddings.git /distributed_embeddings/ && \
cd /distributed_embeddings && git checkout ${TFDE_VER} && git submodule update --init --recursive && \
make pip_pkg && pip install artifacts/*.whl && make clean; \
fi; \
fi
HEALTHCHECK NONE
CMD ["/bin/bash"]
ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"]