michaelfeil · michaelfeil · Mar 16, 2025 · Mar 16, 2025 · Mar 16, 2025 · Mar 16, 2025
diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
@@ -27,7 +27,7 @@ jobs:
       run:
         working-directory: ${{ inputs.working-directory }}
     strategy:
-      # fail-fast: true
+      fail-fast: false
       matrix:
         os: [ubuntu-latest, windows-latest] # macos-latest
         python-version:

diff --git a/libs/client_infinity/infinity_client/poetry.lock b/libs/client_infinity/infinity_client/poetry.lock
diff --git a/libs/infinity_emb/Docker.template.yaml b/libs/infinity_emb/Docker.template.yaml
@@ -3,38 +3,42 @@
 # 1. Guide: pip install jinja2 jinja2-cli
 nvidia:
   # 2 .command: jinja2 Dockerfile.jinja2 Docker.template.yaml --format=yaml -s nvidia > Dockerfile.nvidia_auto
-  base_image: 'nvidia/cuda:12.1.1-base-ubuntu22.04'
-  main_install: "RUN poetry install --no-interaction --no-ansi --no-root --extras \"${EXTRAS}\" --without lint,test && poetry cache clear pypi --all"
+  base_image: "nvidia/cuda:12.4.1-base-ubuntu22.04"
+  main_install: 'RUN poetry install --no-interaction --no-ansi --no-root --extras "${EXTRAS}" --without lint,test && poetry cache clear pypi --all'
+  python_version: python3.10
+  extra_installs_main: |
+    # nvcc is not installed -> the following might break if the torch version or python version changes.
+    RUN poetry run $PYTHON -m pip install --no-cache-dir https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu12torch2.6cxx11abiFALSE-cp310-cp310-linux_x86_64.whl
 cpu:
   # 2. command: jinja2 Dockerfile.jinja2 Docker.template.yaml --format=yaml -s cpu > Dockerfile.cpu_auto
-  base_image: 'ubuntu:22.04' 
+  base_image: "ubuntu:22.04"
   # pyproject_sed: |
-  #   RUN sed -i 's|torch = "2.4.1"|torch = "2.5.0"|' pyproject.toml 
+  #   RUN sed -i 's|torch = "2.4.1"|torch = "2.5.0"|' pyproject.toml
   #   RUN sed -i 's|"pypi"|"pytorch_cpu"|' pyproject.toml
   #   RUN poetry lock --no-update
   poetry_extras: "all"
   main_install: |
     # "RUN poetry install --no-interaction --no-ansi --no-root --extras \"${EXTRAS}\" --without lint,test && poetry cache clear pypi --all"
     COPY requirements_install_from_poetry.sh requirements_install_from_poetry.sh
     RUN ./requirements_install_from_poetry.sh --no-root --without lint,test "https://download.pytorch.org/whl/cpu"
-    RUN poetry run $PYTHON -m pip install --no-cache-dir onnxruntime-openvino
+    RUN poetry run python -m pip install --no-cache-dir onnxruntime-openvino
   extra_env_variables: |
     # Sets default to onnx
     ENV INFINITY_ENGINE="optimum"
 
 amd:
   # 2 . command: jinja2 Dockerfile.jinja2 Docker.template.yaml --format=yaml -s amd > Dockerfile.amd_auto
-  base_image: 'rocm/pytorch:rocm6.2.3_ubuntu22.04_py3.10_pytorch_release_2.3.0'
+  base_image: "rocm/pytorch:rocm6.2.3_ubuntu22.04_py3.10_pytorch_release_2.3.0"
   # pyproject_sed: |
-  #   RUN sed -i 's|"pypi"|"pytorch_rocm"|' pyproject.toml 
-  #   RUN sed -i 's|torch = "2.4.1"|torch = "2.4.1"|' pyproject.toml 
-  #   RUN sed -i 's|torchvision = {version = "\*"|torchvision = {version = "0.19.1"|' pyproject.toml 
+  #   RUN sed -i 's|"pypi"|"pytorch_rocm"|' pyproject.toml
+  #   RUN sed -i 's|torch = "2.4.1"|torch = "2.4.1"|' pyproject.toml
+  #   RUN sed -i 's|torchvision = {version = "\*"|torchvision = {version = "0.19.1"|' pyproject.toml
   #   RUN poetry lock --no-update
   main_install: |
     # "RUN poetry install --no-interaction --no-ansi --no-root --extras \"${EXTRAS}\" --without lint,test && poetry cache clear pypi --all"
     COPY requirements_install_from_poetry.sh requirements_install_from_poetry.sh
     RUN ./requirements_install_from_poetry.sh --no-root --without lint,test "https://download.pytorch.org/whl/rocm6.2"
-  extra_installs_main: | 
+  extra_installs_main: |
     ARG GPU_ARCH
     ENV GPU_ARCH=${GPU_ARCH}
     # GPU architecture specific installations
@@ -77,9 +81,9 @@ amd:
     ENV INFINITY_BETTERTRANSFORMER="0"
 
 trt:
-  base_image: nvidia/cuda:12.3.2-cudnn9-devel-ubuntu22.04
+  base_image: nvidia/cuda:12.4.1-cudnn-devel-ubuntu22.04
   poetry_extras: "all onnxruntime-gpu"
-  extra_installs_main: | 
+  extra_installs_main: |
     # Install utils for tensorrt
     RUN apt-get install -y --no-install-recommends openmpi-bin libopenmpi-dev git git-lfs python3-pip
     RUN poetry run $PYTHON -m pip install --no-cache-dir flash-attn --no-build-isolation
@@ -89,4 +93,4 @@ trt:
     ENV LD_LIBRARY_PATH=/app/.venv/lib/${PYTHON}/site-packages/tensorrt:/usr/lib/x86_64-linux-gnu:/app/.venv/lib/${PYTHON}/site-packages/tensorrt_libs:${LD_LIBRARY_PATH}
     ENV PATH=/app/.venv/lib/${PYTHON}/site-packages/tensorrt/bin:${PATH}
   python_version: python3.10
-  main_install: "RUN poetry install --no-interaction --no-ansi --no-root --extras \"${EXTRAS}\" --without lint,test && poetry cache clear pypi --all"
+  main_install: 'RUN poetry install --no-interaction --no-ansi --no-root --extras "${EXTRAS}" --without lint,test && poetry cache clear pypi --all'
diff --git a/libs/infinity_emb/Dockerfile.amd_auto b/libs/infinity_emb/Dockerfile.amd_auto
@@ -18,8 +18,9 @@ ENV PYTHONUNBUFFERED=1 \
     HF_HUB_ENABLE_HF_TRANSFER=1 \
     # extras
     EXTRAS="all" \
-    PYTHON="python3.10"
-RUN apt-get update && apt-get install --no-install-recommends -y build-essential python3-dev libsndfile1 $PYTHON-venv $PYTHON curl
+    PYTHON="python3" 
+    # "python3.10"
+RUN apt-get update && apt-get install --no-install-recommends -y build-essential python3-dev libsndfile1 $PYTHON-venv $PYTHON-pip $PYTHON curl
 # RUN conda init --reverse --all
 # RUN rm -rf /opt/conda && rm -rf /var/lib/jenkins
 # Bettertransformer is not supported on AMD

diff --git a/libs/infinity_emb/Dockerfile.cpu_auto b/libs/infinity_emb/Dockerfile.cpu_auto
@@ -18,8 +18,9 @@ ENV PYTHONUNBUFFERED=1 \
     HF_HUB_ENABLE_HF_TRANSFER=1 \
     # extras
     EXTRAS="all" \
-    PYTHON="python3.11"
-RUN apt-get update && apt-get install --no-install-recommends -y build-essential python3-dev libsndfile1 $PYTHON-venv $PYTHON curl
+    PYTHON="python3" 
+    # "python3"
+RUN apt-get update && apt-get install --no-install-recommends -y build-essential python3-dev libsndfile1 $PYTHON-venv $PYTHON-pip $PYTHON curl
 # Sets default to onnx
 ENV INFINITY_ENGINE="optimum"
 
@@ -43,14 +44,14 @@ COPY poetry.lock poetry.toml pyproject.toml README.md /app/
 # "RUN poetry install --no-interaction --no-ansi --no-root --extras \"${EXTRAS}\" --without lint,test && poetry cache clear pypi --all"
 COPY requirements_install_from_poetry.sh requirements_install_from_poetry.sh
 RUN ./requirements_install_from_poetry.sh --no-root --without lint,test "https://download.pytorch.org/whl/cpu"
-RUN poetry run $PYTHON -m pip install --no-cache-dir onnxruntime-openvino
+RUN poetry run python -m pip install --no-cache-dir onnxruntime-openvino
 
 COPY infinity_emb infinity_emb
 # Install dependency with infinity_emb package
 # "RUN poetry install --no-interaction --no-ansi  --extras \"${EXTRAS}\" --without lint,test && poetry cache clear pypi --all"
 COPY requirements_install_from_poetry.sh requirements_install_from_poetry.sh
 RUN ./requirements_install_from_poetry.sh  --without lint,test "https://download.pytorch.org/whl/cpu"
-RUN poetry run $PYTHON -m pip install --no-cache-dir onnxruntime-openvino
+RUN poetry run python -m pip install --no-cache-dir onnxruntime-openvino
 
 #
 # TODO: remove this line
@@ -61,7 +62,7 @@ FROM builder AS testing
 # "RUN poetry install --no-interaction --no-ansi  --extras \"${EXTRAS}\" --with lint,test && poetry cache clear pypi --all"
 COPY requirements_install_from_poetry.sh requirements_install_from_poetry.sh
 RUN ./requirements_install_from_poetry.sh  --with lint,test "https://download.pytorch.org/whl/cpu"
-RUN poetry run $PYTHON -m pip install --no-cache-dir onnxruntime-openvino
+RUN poetry run python -m pip install --no-cache-dir onnxruntime-openvino
 
 # lint 
 RUN poetry run ruff check .

diff --git a/libs/infinity_emb/Dockerfile.jinja2 b/libs/infinity_emb/Dockerfile.jinja2
@@ -18,8 +18,9 @@ ENV PYTHONUNBUFFERED=1 \
     HF_HUB_ENABLE_HF_TRANSFER=1 \
     # extras
     EXTRAS="{{poetry_extras | default('all')}}" \
-    PYTHON="{{python_version | default('python3.11')}}"
-RUN apt-get update && apt-get install --no-install-recommends -y build-essential python3-dev libsndfile1 $PYTHON-venv $PYTHON curl
+    PYTHON="python3" 
+    # "{{python_version | default('python3')}}"
+RUN apt-get update && apt-get install --no-install-recommends -y build-essential python3-dev libsndfile1 $PYTHON-venv $PYTHON-pip $PYTHON curl
 {{extra_env_variables | default('')}}
 WORKDIR /app
 

diff --git a/libs/infinity_emb/Dockerfile.nvidia_auto b/libs/infinity_emb/Dockerfile.nvidia_auto
@@ -2,7 +2,7 @@
 # This file is generated from Dockerfile.jinja2. Do not edit the Dockerfile.cuda|cpu|amd file directly.
 # Only contribute to the Dockerfile.jinja2 and dockerfile_template.yaml and regenerate the Dockerfile.cuda|cpu|amd
 
-FROM nvidia/cuda:12.1.1-base-ubuntu22.04 AS base
+FROM nvidia/cuda:12.4.1-base-ubuntu22.04 AS base
 
 ENV PYTHONUNBUFFERED=1 \
     # pip
@@ -18,8 +18,9 @@ ENV PYTHONUNBUFFERED=1 \
     HF_HUB_ENABLE_HF_TRANSFER=1 \
     # extras
     EXTRAS="all" \
-    PYTHON="python3.11"
-RUN apt-get update && apt-get install --no-install-recommends -y build-essential python3-dev libsndfile1 $PYTHON-venv $PYTHON curl
+    PYTHON="python3" 
+    # "python3.10"
+RUN apt-get update && apt-get install --no-install-recommends -y build-essential python3-dev libsndfile1 $PYTHON-venv $PYTHON-pip $PYTHON curl
 
 WORKDIR /app
 
@@ -42,7 +43,9 @@ RUN poetry install --no-interaction --no-ansi --no-root --extras "${EXTRAS}" --w
 COPY infinity_emb infinity_emb
 # Install dependency with infinity_emb package
 RUN poetry install --no-interaction --no-ansi  --extras "${EXTRAS}" --without lint,test && poetry cache clear pypi --all
-#
+# nvcc is not installed -> the following might break if the torch version or python version changes.
+RUN poetry run $PYTHON -m pip install --no-cache-dir https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu12torch2.6cxx11abiFALSE-cp310-cp310-linux_x86_64.whl
+
 # TODO: remove this line
 RUN apt-get install --no-install-recommends -y git && poetry run python -m pip install git+https://github.com/huggingface/transformers.git@7547f55e5d93245c0a013b50df976924f2d9e8b0 && rm -rf ~/.cache/ /tmp/*
 

diff --git a/libs/infinity_emb/Dockerfile.trt_onnx_auto b/libs/infinity_emb/Dockerfile.trt_onnx_auto
@@ -2,7 +2,7 @@
 # This file is generated from Dockerfile.jinja2. Do not edit the Dockerfile.cuda|cpu|amd file directly.
 # Only contribute to the Dockerfile.jinja2 and dockerfile_template.yaml and regenerate the Dockerfile.cuda|cpu|amd
 
-FROM nvidia/cuda:12.3.2-cudnn9-devel-ubuntu22.04 AS base
+FROM nvidia/cuda:12.4.1-cudnn-devel-ubuntu22.04 AS base
 
 ENV PYTHONUNBUFFERED=1 \
     # pip
@@ -18,8 +18,9 @@ ENV PYTHONUNBUFFERED=1 \
     HF_HUB_ENABLE_HF_TRANSFER=1 \
     # extras
     EXTRAS="all onnxruntime-gpu" \
-    PYTHON="python3.10"
-RUN apt-get update && apt-get install --no-install-recommends -y build-essential python3-dev libsndfile1 $PYTHON-venv $PYTHON curl
+    PYTHON="python3" 
+    # "python3.10"
+RUN apt-get update && apt-get install --no-install-recommends -y build-essential python3-dev libsndfile1 $PYTHON-venv $PYTHON-pip $PYTHON curl
 # Set default to tensorrt
 ENV LD_LIBRARY_PATH=/app/.venv/lib/${PYTHON}/site-packages/tensorrt:/usr/lib/x86_64-linux-gnu:/app/.venv/lib/${PYTHON}/site-packages/tensorrt_libs:${LD_LIBRARY_PATH}
 ENV PATH=/app/.venv/lib/${PYTHON}/site-packages/tensorrt/bin:${PATH}