Skip to content

TF 2.16 with Torch 2.4.0 #1415

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 21 commits into from
Aug 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 10 additions & 24 deletions Dockerfile.tmpl
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ ARG GPU_BASE_IMAGE_NAME
ARG LIGHTGBM_VERSION
ARG TORCH_VERSION
ARG TORCHAUDIO_VERSION
ARG TORCHTEXT_VERSION
ARG TORCHVISION_VERSION
ARG JAX_VERSION

Expand Down Expand Up @@ -38,16 +37,15 @@ RUN ln -s /usr/local/cuda/lib64/stubs/libcuda.so /usr/local/cuda/lib64/stubs/lib
{{ end }}

# Keep these variables in sync if base image is updated.
ENV TENSORFLOW_VERSION=2.15.0
ENV TENSORFLOW_VERSION=2.16.1
# See https://github.com/tensorflow/io#tensorflow-version-compatibility
ENV TENSORFLOW_IO_VERSION=0.35.0
ENV TENSORFLOW_IO_VERSION=0.37.0

# We need to redefine the ARG here to get the ARG value defined above the FROM instruction.
# See: https://docs.docker.com/engine/reference/builder/#understand-how-arg-and-from-interact
ARG LIGHTGBM_VERSION
ARG TORCH_VERSION
ARG TORCHAUDIO_VERSION
ARG TORCHTEXT_VERSION
ARG TORCHVISION_VERSION
ARG JAX_VERSION

Expand All @@ -62,7 +60,6 @@ ENV KMP_SETTINGS=false
ENV PIP_ROOT_USER_ACTION=ignore

ADD clean-layer.sh /tmp/clean-layer.sh
ADD patches/keras_patch.sh /tmp/keras_patch.sh
ADD patches/nbconvert-extensions.tpl /opt/kaggle/nbconvert-extensions.tpl
ADD patches/template_conf.json /opt/kaggle/conf.json

Expand Down Expand Up @@ -122,21 +119,20 @@ RUN pip install spacy && \
{{ end}}

# Install PyTorch
# b/356397043: magma-cuda121 is the latest version
{{ if eq .Accelerator "gpu" }}
COPY --from=torch_whl /tmp/whl/*.whl /tmp/torch/
RUN mamba install -y -c pytorch magma-cuda${CUDA_MAJOR_VERSION}${CUDA_MINOR_VERSION} && \
RUN mamba install -y -c pytorch magma-cuda121 && \
pip install /tmp/torch/*.whl && \
# b/255757999 openmp (libomp.so) is an dependency of libtorchtext and libtorchaudio but
mamba install -y openmp && \
sudo apt -y install libsox-dev && \
rm -rf /tmp/torch && \
/tmp/clean-layer.sh
{{ else }}
RUN pip install \
torch==$TORCH_VERSION+cpu \
torchvision==$TORCHVISION_VERSION+cpu \
torchaudio==$TORCHAUDIO_VERSION+cpu \
torchtext==$TORCHTEXT_VERSION \
-f https://download.pytorch.org/whl/torch_stable.html && \
--index-url https://download.pytorch.org/whl/cpu && \
/tmp/clean-layer.sh
{{ end }}

Expand Down Expand Up @@ -199,32 +195,22 @@ RUN apt-get update && \

RUN pip install -f http://h2o-release.s3.amazonaws.com/h2o/latest_stable_Py.html h2o && /tmp/clean-layer.sh

# b/318672158 Use simply tensorflow-probability once > 0.23.0 is released.
RUN pip install \
"tensorflow==${TENSORFLOW_VERSION}" \
"tensorflow-io==${TENSORFLOW_IO_VERSION}" \
git+https://github.com/tensorflow/probability.git@fbc5ebe9b1d343113fb917010096cfd88b32eecf \
tensorflow_text \
tensorflow-probability \
tensorflow_decision_forests \
tensorflow-text \
"tensorflow_hub>=0.16.0" \
# b/331799280 remove once other packages over to dm-tre
optree \
tf-keras && \
/tmp/clean-layer.sh

# b/318672158 Use simply tensorflow_decision_forests on next release, expected with tf 2.16
RUN pip install tensorflow_decision_forests==1.8.1 --no-deps && \
/tmp/clean-layer.sh

RUN chmod +x /tmp/keras_patch.sh && \
/tmp/keras_patch.sh

ADD patches/keras_internal.py /opt/conda/lib/python3.10/site-packages/tensorflow_decision_forests/keras/keras_internal.py
ADD patches/keras_internal_test.py /opt/conda/lib/python3.10/site-packages/tensorflow_decision_forests/keras/keras_internal_test.py

# Remove "--no-deps" flag and "namex" package once Keras 3.* is included in our base image.
# We ignore dependencies since tf2.15 and Keras 3.* should work despite pip saying it won't.
# Currently, keras tries to install a nightly version of tf 2.16: https://github.com/keras-team/keras/blob/fe2f54aa5bc42fb23a96449cf90434ab9bb6a2cd/requirements.txt#L2
RUN pip install --no-deps "keras>3" keras-cv keras-nlp namex && \
RUN pip install "keras>3" keras-cv keras-nlp && \
/tmp/clean-layer.sh

# b/328788268 libpysal 4.10 seems to fail with "module 'shapely' has no attribute 'Geometry'. Did you mean: 'geometry'"
Expand Down
1 change: 0 additions & 1 deletion Jenkinsfile
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@ pipeline {
--package torch \
--version $TORCH_VERSION \
--build-arg TORCHAUDIO_VERSION=$TORCHAUDIO_VERSION \
--build-arg TORCHTEXT_VERSION=$TORCHTEXT_VERSION \
--build-arg TORCHVISION_VERSION=$TORCHVISION_VERSION \
--build-arg CUDA_MAJOR_VERSION=$CUDA_MAJOR_VERSION \
--build-arg CUDA_MINOR_VERSION=$CUDA_MINOR_VERSION \
Expand Down
15 changes: 7 additions & 8 deletions config.txt
Original file line number Diff line number Diff line change
@@ -1,12 +1,11 @@
BASE_IMAGE_REPO=gcr.io/deeplearning-platform-release
BASE_IMAGE_TAG=m114
CPU_BASE_IMAGE_NAME=tf2-cpu.2-15.py310
GPU_BASE_IMAGE_NAME=tf2-gpu.2-15.py310
BASE_IMAGE_TAG=m122
CPU_BASE_IMAGE_NAME=tf2-cpu.2-16.py310
GPU_BASE_IMAGE_NAME=tf2-gpu.2-16.py310
LIGHTGBM_VERSION=4.2.0
TORCH_VERSION=2.1.2
TORCHAUDIO_VERSION=2.1.2
TORCHTEXT_VERSION=0.16.2
TORCHVISION_VERSION=0.16.2
TORCH_VERSION=2.4.0
TORCHAUDIO_VERSION=2.4.0
TORCHVISION_VERSION=0.19.0
JAX_VERSION=0.4.26
CUDA_MAJOR_VERSION=12
CUDA_MINOR_VERSION=1
CUDA_MINOR_VERSION=3
6 changes: 4 additions & 2 deletions packages/jaxlib.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,10 @@ ENV LIBRARY_PATH="$LIBRARY_PATH:/opt/conda/lib"
ENV LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/opt/conda/lib"

# Instructions: https://jax.readthedocs.io/en/latest/developer.html#building-jaxlib-from-source
RUN apt-get update && \
apt-get install -y g++ python python3-dev
RUN sudo ln -s /usr/bin/python3 /usr/bin/python

RUN apt-get update && \
apt-get install -y g++ python3 python3-dev

RUN pip install numpy wheel build

Expand Down
16 changes: 1 addition & 15 deletions packages/torch.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ FROM ${BASE_IMAGE} AS builder

ARG PACKAGE_VERSION
ARG TORCHAUDIO_VERSION
ARG TORCHTEXT_VERSION
ARG TORCHVISION_VERSION
ARG CUDA_MAJOR_VERSION
ARG CUDA_MINOR_VERSION
Expand All @@ -20,7 +19,7 @@ RUN conda install -c conda-forge mamba

# Build instructions: https://github.com/pytorch/pytorch#from-source
RUN mamba install astunparse numpy ninja pyyaml mkl mkl-include setuptools cmake cffi typing_extensions future six requests dataclasses
RUN mamba install -c pytorch magma-cuda${CUDA_MAJOR_VERSION}${CUDA_MINOR_VERSION}
RUN mamba install -c pytorch magma-cuda121

# By default, it uses the version from version.txt which includes the `a0` (alpha zero) suffix and part of the git hash.
# This causes dependency conflicts like these: https://paste.googleplex.com/4786486378496000
Expand Down Expand Up @@ -63,18 +62,6 @@ RUN sudo apt-get update && \
RUN sed -i 's/set(envs/set(envs\n "LIBS=-ltinfo"/' /usr/local/src/audio/third_party/sox/CMakeLists.txt
RUN cd /usr/local/src/audio && python setup.py bdist_wheel

# Build torchtext
# Instructions: https://github.com/pytorch/text#building-from-source
# See comment above for PYTORCH_BUILD_VERSION.
ENV BUILD_VERSION=$TORCHTEXT_VERSION
RUN cd /usr/local/src && \
git clone https://github.com/pytorch/text && \
cd text && \
git checkout tags/v$TORCHTEXT_VERSION && \
git submodule sync && \
git submodule update --init --recursive --jobs 1 && \
python setup.py bdist_wheel

# Build torchvision.
# Instructions: https://github.com/pytorch/vision/tree/main#installation
# See comment above for PYTORCH_BUILD_VERSION.
Expand All @@ -93,7 +80,6 @@ FROM alpine:latest
RUN mkdir -p /tmp/whl/
COPY --from=builder /usr/local/src/pytorch/dist/*.whl /tmp/whl
COPY --from=builder /usr/local/src/audio/dist/*.whl /tmp/whl
COPY --from=builder /usr/local/src/text/dist/*.whl /tmp/whl
COPY --from=builder /usr/local/src/vision/dist/*.whl /tmp/whl

# Print out the built .whl file.
Expand Down
41 changes: 0 additions & 41 deletions patches/keras_patch.sh

This file was deleted.

17 changes: 6 additions & 11 deletions tests/test_geopandas.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,11 @@
import unittest

import geopandas
from shapely.geometry import Polygon

class TestGeopandas(unittest.TestCase):
def test_read(self):
df = geopandas.read_file(geopandas.datasets.get_path('nybb'))
self.assertTrue(df.size > 1)

def test_spatial_join(self):
cities = geopandas.read_file(geopandas.datasets.get_path('naturalearth_cities'))
world = geopandas.read_file(geopandas.datasets.get_path('naturalearth_lowres'))
countries = world[['geometry', 'name']]
countries = countries.rename(columns={'name':'country'})
cities_with_country = geopandas.sjoin(cities, countries, how="inner", op='intersects')
self.assertTrue(cities_with_country.size > 1)
def test_GeoSeries(self):
p1 = Polygon([(0, 0), (1, 0), (1, 1)])
p2 = Polygon([(0, 0), (1, 0), (1, 1), (0, 1)])
p3 = Polygon([(2, 0), (3, 0), (3, 1), (2, 1)])
g = geopandas.GeoSeries([p1, p2, p3])
12 changes: 0 additions & 12 deletions tests/test_torchtext.py

This file was deleted.

3 changes: 1 addition & 2 deletions tpu/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ ARG TENSORFLOW_VERSION
ARG TF_LIBTPU_VERSION
ARG JAX_VERSION
ARG TORCHVISION_VERSION
ARG TORCHTEXT_VERSION
ARG TORCHAUDIO_VERSION

ENV ISTPUVM=1
Expand Down Expand Up @@ -60,7 +59,7 @@ RUN apt-get update && apt-get install ffmpeg libsm6 libxext6 -y
# Additional useful packages should be added here

RUN pip install tensorflow_hub https://storage.googleapis.com/cloud-tpu-tpuvm-artifacts/tensorflow/tf-${TENSORFLOW_VERSION}/tensorflow-${TENSORFLOW_VERSION}-${PYTHON_WHEEL_VERSION}-${PYTHON_WHEEL_VERSION}-${TF_LINUX_WHEEL_VERSION}.whl tensorflow-probability tensorflow-io \
torch~=${TORCH_VERSION} https://storage.googleapis.com/pytorch-xla-releases/wheels/tpuvm/torch_xla-${TORCH_VERSION}+libtpu-${PYTHON_WHEEL_VERSION}-${PYTHON_WHEEL_VERSION}-${TORCH_LINUX_WHEEL_VERSION}.whl torchvision==${TORCHVISION_VERSION} torchtext==${TORCHTEXT_VERSION} torchaudio==${TORCHAUDIO_VERSION} \
torch~=${TORCH_VERSION} https://storage.googleapis.com/pytorch-xla-releases/wheels/tpuvm/torch_xla-${TORCH_VERSION}+libtpu-${PYTHON_WHEEL_VERSION}-${PYTHON_WHEEL_VERSION}-${TORCH_LINUX_WHEEL_VERSION}.whl torchvision==${TORCHVISION_VERSION} torchaudio==${TORCHAUDIO_VERSION} \
jax[tpu]==${JAX_VERSION} -f https://storage.googleapis.com/jax-releases/libtpu_releases.html trax flax optax git+https://github.com/deepmind/dm-haiku jraph distrax \
papermill jupyterlab python-lsp-server[all] "jupyter-lsp==1.5.1" \
pandas matplotlib opencv-python-headless librosa accelerate diffusers scikit-learn transformers \
Expand Down
2 changes: 0 additions & 2 deletions tpu/config.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,6 @@ JAX_VERSION=0.4.23
TORCH_VERSION=2.4.0
# https://github.com/pytorch/audio supports nightly
TORCHAUDIO_VERSION=2.4.0
# https://github.com/pytorch/text supports main
TORCHTEXT_VERSION=0.18.0
# https://github.com/pytorch/vision supports nightly
TORCHVISION_VERSION=0.19.0
TORCH_LINUX_WHEEL_VERSION=manylinux_2_28_x86_64