Create process to prebuild tensorflow wheels

rosbo · rosbo · commit 5976f3c19b36 · 2018-11-27T01:33:20.000Z
diff --git a/tensorflow-whl/CHANGELOG.md b/tensorflow-whl/CHANGELOG.md
@@ -0,0 +1 @@
+1.11.0-py36: Tensorflow 1.11.0 wheels built with python 3.6
diff --git a/tensorflow-whl/Dockerfile b/tensorflow-whl/Dockerfile
@@ -0,0 +1,100 @@
+FROM nvidia/cuda:9.1-cudnn7-devel-ubuntu16.04 AS nvidia
+FROM continuumio/anaconda3:5.0.1
+
+# Avoid interactive configuration prompts/dialogs during apt-get.
+ENV DEBIAN_FRONTEND=noninteractive
+
+# This is necessary to for apt to access HTTPS sources
+RUN apt-get update && \
+    apt-get install apt-transport-https
+
+# Cuda support
+COPY --from=nvidia /etc/apt/sources.list.d/cuda.list /etc/apt/sources.list.d/
+COPY --from=nvidia /etc/apt/sources.list.d/nvidia-ml.list /etc/apt/sources.list.d/
+COPY --from=nvidia /etc/apt/trusted.gpg /etc/apt/trusted.gpg.d/cuda.gpg
+
+ENV CUDA_VERSION=9.1.85
+ENV CUDA_PKG_VERSION=9-1=$CUDA_VERSION-1
+LABEL com.nvidia.volumes.needed="nvidia_driver"
+LABEL com.nvidia.cuda.version="${CUDA_VERSION}"
+ENV PATH=/usr/local/nvidia/bin:/usr/local/cuda/bin:${PATH}
+# The stub is useful to us both for built-time linking and run-time linking, on CPU-only systems.
+# When intended to be used with actual GPUs, make sure to (besides providing access to the host
+# CUDA user libraries, either manually or through the use of nvidia-docker) exclude them. One
+# convenient way to do so is to obscure its contents by a bind mount:
+#   docker run .... -v /non-existing-directory:/usr/local/cuda/lib64/stubs:ro ...
+ENV LD_LIBRARY_PATH="${LD_LIBRARY_PATH}:/usr/local/nvidia/lib64:/usr/local/cuda/lib64:/usr/local/cuda/lib64/stubs"
+ENV NVIDIA_VISIBLE_DEVICES=all
+ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
+ENV NVIDIA_REQUIRE_CUDA="cuda>=9.0"
+RUN apt-get update && apt-get install -y --no-install-recommends \
+      cuda-cudart-$CUDA_PKG_VERSION \
+      cuda-libraries-$CUDA_PKG_VERSION \
+      cuda-libraries-dev-$CUDA_PKG_VERSION \
+      cuda-nvml-dev-$CUDA_PKG_VERSION \
+      cuda-minimal-build-$CUDA_PKG_VERSION \
+      cuda-command-line-tools-$CUDA_PKG_VERSION \
+      libcudnn7=7.0.5.15-1+cuda9.1 \
+      libcudnn7-dev=7.0.5.15-1+cuda9.1 \
+      libnccl2=2.2.12-1+cuda9.1 \
+      libnccl-dev=2.2.12-1+cuda9.1 && \
+    ln -s /usr/local/cuda-9.1 /usr/local/cuda && \
+    ln -s /usr/local/cuda/lib64/stubs/libcuda.so /usr/local/cuda/lib64/stubs/libcuda.so.1 && \
+    rm -rf /var/lib/apt/lists/*
+
+# Install bazel
+RUN apt-get update && apt-get install -y python-software-properties zip && \
+    echo "deb http://ppa.launchpad.net/webupd8team/java/ubuntu precise main" | tee -a /etc/apt/sources.list && \
+    echo "deb-src http://ppa.launchpad.net/webupd8team/java/ubuntu precise main" | tee -a /etc/apt/sources.list && \
+    apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv-keys EEA14886 C857C906 2B90D010 && \
+    apt-get update && \
+    echo debconf shared/accepted-oracle-license-v1-1 select true | debconf-set-selections && \
+    echo debconf shared/accepted-oracle-license-v1-1 seen true | debconf-set-selections && \
+    apt-get install -y oracle-java8-installer && \
+    echo "deb [arch=amd64] http://storage.googleapis.com/bazel-apt stable jdk1.8" | tee /etc/apt/sources.list.d/bazel.list && \
+    curl https://bazel.build/bazel-release.pub.gpg | apt-key add - && \
+    apt-get update && apt-get install -y bazel && \
+    apt-get upgrade -y bazel
+
+# Tensorflow doesn't support python 3.7 yet. See https://github.com/tensorflow/tensorflow/issues/20517
+RUN conda install -y python=3.6.6 && \
+    # Another fix for TF 1.10 https://github.com/tensorflow/tensorflow/issues/21518
+    pip install keras_applications==1.0.4 --no-deps && \
+    pip install keras_preprocessing==1.0.2 --no-deps
+
+# Fetch tensorflow
+RUN cd /usr/local/src && \
+    git clone https://github.com/tensorflow/tensorflow && \
+    cd tensorflow && \
+    git checkout r1.11
+
+# Create a tensorflow wheel for CPU
+RUN cd /usr/local/src/tensorflow && \
+    cat /dev/null | ./configure && \
+    bazel build --config=opt //tensorflow/tools/pip_package:build_pip_package && \
+    bazel-bin/tensorflow/tools/pip_package/build_pip_package /tmp/tensorflow_cpu && \
+    bazel clean
+
+# Create a tensorflow wheel for GPU/cuda
+ENV TF_NEED_CUDA=1
+ENV TF_CUDA_VERSION=9.1
+ENV TF_CUDA_COMPUTE_CAPABILITIES=3.7,6.0
+ENV TF_CUDNN_VERSION=7
+ENV TF_NCCL_VERSION=2
+ENV NCCL_INSTALL_PATH=/usr/
+
+RUN cd /usr/local/src/tensorflow && \
+    # TF_NCCL_INSTALL_PATH is used for both libnccl.so.2 and libnccl.h. Make sure they are both accessible from the same directory.
+    ln -s /usr/lib/x86_64-linux-gnu/libnccl.so.2 /usr/lib/ && \
+    cat /dev/null | ./configure && \
+    echo "/usr/local/cuda-${TF_CUDA_VERSION}/targets/x86_64-linux/lib/stubs" > /etc/ld.so.conf.d/cuda-stubs.conf && ldconfig && \
+    bazel build --config=opt \
+                --config=cuda \
+                --cxxopt="-D_GLIBCXX_USE_CXX11_ABI=0" \
+                //tensorflow/tools/pip_package:build_pip_package && \
+    rm /etc/ld.so.conf.d/cuda-stubs.conf && ldconfig && \
+    bazel-bin/tensorflow/tools/pip_package/build_pip_package /tmp/tensorflow_gpu && \
+    bazel clean
+
+# Print out the built .whl files
+RUN ls -R /tmp/tensorflow*
diff --git a/tensorflow-whl/README.md b/tensorflow-whl/README.md
@@ -0,0 +1,22 @@
+# Build new Tensorflow wheels
+
+```
+./build
+```
+
+# Push the new wheels
+
+1. Add an entry in the [CHANGELOG](CHANGELOG.md) with an appropriate `LABEL`.
+2. Push the new image using the `LABEL` you picked above.
+
+    ```
+    ./push LABEL
+    ```
+
+# Use the new wheels
+
+Update the line below in the [CPU Dockerfile](../Dockerfile) and the [GPU Dockerfile](../gpu.Dockerfile) to use the new `LABEL`.
+
+```
+FROM gcr.io/kaggle-images/python-tensorflow-whl:<NEW-LABEL> as tensorflow_whl
+```
diff --git a/tensorflow-whl/build b/tensorflow-whl/build
@@ -0,0 +1,11 @@
+#!/bin/bash
+set -e
+set -x
+
+# Default behavior is to do everything from scratch.
+# The --use-cache option is useful if you're iterating on a broken build.
+if [[ "$1" ==  "--use-cache" ]]; then
+    docker build --rm -t kaggle/python-tensorflow-whl .
+else
+    docker build --pull --rm --no-cache -t kaggle/python-tensorflow-whl .
+fi
diff --git a/tensorflow-whl/push b/tensorflow-whl/push
@@ -0,0 +1,25 @@
+#!/bin/bash
+#
+# Push a newly-built image with the given label to gcr.io and DockerHub.
+#
+# Usage:
+#   ./push LABEL
+#
+# Description:
+#   LABEL: Image label. See CHANGELOG.md
+#
+set -e
+set -x
+
+SOURCE_IMAGE="kaggle/python-tensorflow-whl"
+TARGET_IMAGE="gcr.io/kaggle-images/python-tensorflow-whl"
+
+LABEL=$1
+
+if [[ -z "$LABEL" ]]; then
+    echo "You must provide a label for the image"
+    exit 1
+fi
+
+docker tag $SOURCE_IMAGE:latest $TARGET_IMAGE:$LABEL
+gcloud docker -- push $TARGET_IMAGE:$LABEL

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+1.11.0-py36: Tensorflow 1.11.0 wheels built with python 3.6`