forked from mozilla/DeepSpeech
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Dockerfile
225 lines (168 loc) · 6.32 KB
/
Dockerfile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
# Need devel version cause we need /usr/include/cudnn.h
# for compiling libctc_decoder_with_kenlm.so
FROM nvidia/cuda:10.0-cudnn7-devel-ubuntu18.04
# >> START Install base software
# Get basic packages
RUN apt-get update && apt-get install -y --no-install-recommends \
build-essential \
curl \
wget \
git \
python3 \
python3-dev \
python3-pip \
python3-wheel \
python3-numpy \
libcurl3-dev \
ca-certificates \
gcc \
sox \
libsox-fmt-mp3 \
htop \
nano \
swig \
cmake \
libboost-all-dev \
zlib1g-dev \
libbz2-dev \
liblzma-dev \
locales \
pkg-config \
libsox-dev \
openjdk-8-jdk \
bash-completion \
g++ \
unzip
RUN ln -s -f /usr/bin/python3 /usr/bin/python
# Install NCCL 2.2
RUN apt-get install -qq -y --allow-downgrades --allow-change-held-packages libnccl2=2.3.7-1+cuda10.0 libnccl-dev=2.3.7-1+cuda10.0
# Install Bazel
RUN curl -LO "https://github.com/bazelbuild/bazel/releases/download/0.19.2/bazel_0.19.2-linux-x86_64.deb"
RUN dpkg -i bazel_*.deb
# Install CUDA CLI Tools
RUN apt-get install -qq -y cuda-command-line-tools-10-0
# Install pip
RUN wget https://bootstrap.pypa.io/get-pip.py && \
python3 get-pip.py && \
rm get-pip.py
# << END Install base software
# >> START Configure Tensorflow Build
# Clone TensoFlow from Mozilla repo
RUN git clone https://github.com/mozilla/tensorflow/
WORKDIR /tensorflow
RUN git checkout r1.13
# GPU Environment Setup
ENV TF_NEED_CUDA 1
ENV CUDA_TOOLKIT_PATH /usr/local/cuda
ENV TF_CUDA_VERSION 10.0
ENV TF_CUDNN_VERSION 7
ENV CUDNN_INSTALL_PATH /usr/lib/x86_64-linux-gnu/
ENV TF_CUDA_COMPUTE_CAPABILITIES 6.0
ENV TF_NCCL_VERSION 2.3
# ENV NCCL_INSTALL_PATH /usr/lib/x86_64-linux-gnu/
# Common Environment Setup
ENV TF_BUILD_CONTAINER_TYPE GPU
ENV TF_BUILD_OPTIONS OPT
ENV TF_BUILD_DISABLE_GCP 1
ENV TF_BUILD_ENABLE_XLA 0
ENV TF_BUILD_PYTHON_VERSION PYTHON3
ENV TF_BUILD_IS_OPT OPT
ENV TF_BUILD_IS_PIP PIP
# Other Parameters
ENV CC_OPT_FLAGS -mavx -mavx2 -msse4.1 -msse4.2 -mfma
ENV TF_NEED_GCP 0
ENV TF_NEED_HDFS 0
ENV TF_NEED_JEMALLOC 1
ENV TF_NEED_OPENCL 0
ENV TF_CUDA_CLANG 0
ENV TF_NEED_MKL 0
ENV TF_ENABLE_XLA 0
ENV TF_NEED_AWS 0
ENV TF_NEED_KAFKA 0
ENV TF_NEED_NGRAPH 0
ENV TF_DOWNLOAD_CLANG 0
ENV TF_NEED_TENSORRT 0
ENV TF_NEED_GDR 0
ENV TF_NEED_VERBS 0
ENV TF_NEED_OPENCL_SYCL 0
ENV PYTHON_BIN_PATH /usr/bin/python3.6
ENV PYTHON_LIB_PATH /usr/lib/python3.6/dist-packages
# << END Configure Tensorflow Build
# >> START Configure Bazel
# Running bazel inside a `docker build` command causes trouble, cf:
# https://github.com/bazelbuild/bazel/issues/134
# The easiest solution is to set up a bazelrc file forcing --batch.
RUN echo "startup --batch" >>/etc/bazel.bazelrc
# Similarly, we need to workaround sandboxing issues:
# https://github.com/bazelbuild/bazel/issues/418
RUN echo "build --spawn_strategy=standalone --genrule_strategy=standalone" \
>>/etc/bazel.bazelrc
# Put cuda libraries to where they are expected to be
RUN mkdir /usr/local/cuda/lib && \
ln -s /usr/lib/x86_64-linux-gnu/libnccl.so.2 /usr/local/cuda/lib/libnccl.so.2 && \
ln -s /usr/include/nccl.h /usr/local/cuda/include/nccl.h && \
ln -s /usr/local/cuda/lib64/stubs/libcuda.so /usr/local/cuda/lib64/stubs/libcuda.so.1 && \
ln -s /usr/include/cudnn.h /usr/local/cuda/include/cudnn.h
# Set library paths
ENV LD_LIBRARY_PATH $LD_LIBRARY_PATH:/usr/local/cuda/extras/CUPTI/lib64:/usr/local/cuda/lib64:/usr/lib/x86_64-linux-gnu/:/usr/local/cuda/lib64/stubs/
# << END Configure Bazel
# Copy DeepSpeech repo contents to container's /DeepSpeech
COPY . /DeepSpeech/
# Alternative clone from GitHub
# RUN apt-get update && apt-get install -y git-lfs
# WORKDIR /
# RUN git clone https://github.com/mozilla/DeepSpeech.git
WORKDIR /DeepSpeech
RUN pip3 --no-cache-dir install -r requirements.txt
# Link DeepSpeech native_client libs to tf folder
RUN ln -s /DeepSpeech/native_client /tensorflow
# >> START Build and bind
WORKDIR /tensorflow
# Fix for not found script https://github.com/tensorflow/tensorflow/issues/471
RUN ./configure
# Using CPU optimizations:
# -mtune=generic -march=x86-64 -msse -msse2 -msse3 -msse4.1 -msse4.2 -mavx.
# Adding --config=cuda flag to build using CUDA.
# passing LD_LIBRARY_PATH is required cause Bazel doesn't pickup it from environment
# Build DeepSpeech
RUN bazel build --config=monolithic --config=cuda -c opt --copt=-O3 --copt="-D_GLIBCXX_USE_CXX11_ABI=0" --copt=-mtune=generic --copt=-march=x86-64 --copt=-msse --copt=-msse2 --copt=-msse3 --copt=-msse4.1 --copt=-msse4.2 --copt=-mavx --copt=-fvisibility=hidden //native_client:libdeepspeech.so //native_client:generate_trie --verbose_failures --action_env=LD_LIBRARY_PATH=${LD_LIBRARY_PATH}
###
### Using TensorFlow upstream should work
###
# # Build TF pip package
# RUN bazel build --config=opt --config=cuda --copt="-D_GLIBCXX_USE_CXX11_ABI=0" --copt=-mtune=generic --copt=-march=x86-64 --copt=-msse --copt=-msse2 --copt=-msse3 --copt=-msse4.1 --copt=-msse4.2 --copt=-mavx //tensorflow/tools/pip_package:build_pip_package --verbose_failures --action_env=LD_LIBRARY_PATH=${LD_LIBRARY_PATH}
#
# # Build wheel
# RUN bazel-bin/tensorflow/tools/pip_package/build_pip_package /tmp/tensorflow_pkg
#
# # Install tensorflow from our custom wheel
# RUN pip3 install /tmp/tensorflow_pkg/*.whl
# Copy built libs to /DeepSpeech/native_client
RUN cp /tensorflow/bazel-bin/native_client/generate_trie /DeepSpeech/native_client/ \
&& cp /tensorflow/bazel-bin/native_client/libdeepspeech.so /DeepSpeech/native_client/
# Install TensorFlow
WORKDIR /DeepSpeech/
RUN pip3 install tensorflow-gpu==1.13.1
# Make DeepSpeech and install Python bindings
ENV TFDIR /tensorflow
WORKDIR /DeepSpeech/native_client
RUN make deepspeech
WORKDIR /DeepSpeech/native_client/python
RUN make bindings
RUN pip3 install dist/deepspeech*
WORKDIR /DeepSpeech/native_client/ctcdecode
RUN make
RUN pip3 install dist/*.whl
# << END Build and bind
# Allow Python printing utf-8
ENV PYTHONIOENCODING UTF-8
# Build KenLM in /DeepSpeech/native_client/kenlm folder
WORKDIR /DeepSpeech/native_client
RUN rm -rf kenlm \
&& git clone --depth 1 https://github.com/kpu/kenlm && cd kenlm \
&& mkdir -p build \
&& cd build \
&& cmake .. \
&& make -j 4
# Done
WORKDIR /DeepSpeech