From bb2b7b183b910ea4b2fa9552c78760752c9d03ae Mon Sep 17 00:00:00 2001 From: Jarek Potiuk Date: Sun, 11 Feb 2024 14:24:01 +0100 Subject: [PATCH] Optimize cachine installed packages in CI build (#37315) Some of the recent changes in handling conflicting dependencies broke optimization of installing dependencies from branch tip. The optimisation worked in the way that it installed packages first from branch tip, to make them pre-installed (and cached in docker layer) so that final installatin step with pyproject.toml takes very little time, even if it is changed. The problem was that in case branch tip and constraints conflicted, the installation failed and effectively no packages were installed in the "branch tip" layer, effectively removing the cache. This change fixes it - when we install from branch tip now we are not using constraints, which means that they will never conflict, and this also means that cache will never be empty. It can contain other versions of some of the packages, but vast majority of the packages shoudo be the same as in constraints, so the following installation step should reuse vast majority of already installed packages. (cherry picked from commit 90a650d3718d76ec8ae07d9bc666cc278bcaf2d7) --- Dockerfile | 10 ++++++---- Dockerfile.ci | 10 ++++++---- .../install_airflow_dependencies_from_branch_tip.sh | 10 ++++++---- 3 files changed, 18 insertions(+), 12 deletions(-) diff --git a/Dockerfile b/Dockerfile index 6beb0f823750..80233a5bcc31 100644 --- a/Dockerfile +++ b/Dockerfile @@ -476,14 +476,16 @@ function install_airflow_dependencies_from_branch_tip() { if [[ ${INSTALL_POSTGRES_CLIENT} != "true" ]]; then AIRFLOW_EXTRAS=${AIRFLOW_EXTRAS/postgres,} fi - # Install latest set of dependencies using constraints. In case constraints were upgraded and there - # are conflicts, this might fail, but it should be fixed in the following installation steps + # Install latest set of dependencies - without constraints. This is to download a "base" set of + # dependencies that we can cache and reuse when installing airflow using constraints and latest + # pyproject.toml in the next step (when we install regular airflow). set -x pip install --root-user-action ignore \ ${ADDITIONAL_PIP_INSTALL_FLAGS} \ - "https://github.com/${AIRFLOW_REPO}/archive/${AIRFLOW_BRANCH}.tar.gz#egg=apache-airflow[${AIRFLOW_EXTRAS}]" \ - --constraint "${AIRFLOW_CONSTRAINTS_LOCATION}" || true + "https://github.com/${AIRFLOW_REPO}/archive/${AIRFLOW_BRANCH}.tar.gz#egg=apache-airflow[${AIRFLOW_EXTRAS}]" common::install_pip_version + # Uninstall airflow to keep only the dependencies. In the future when planned https://github.com/pypa/pip/issues/11440 + # is implemented in pip we might be able to use this flag and skip the remove step. pip freeze | grep apache-airflow-providers | xargs pip uninstall --yes 2>/dev/null || true set +x echo diff --git a/Dockerfile.ci b/Dockerfile.ci index 781c7eee4ee4..bef8a6b564a0 100644 --- a/Dockerfile.ci +++ b/Dockerfile.ci @@ -436,14 +436,16 @@ function install_airflow_dependencies_from_branch_tip() { if [[ ${INSTALL_POSTGRES_CLIENT} != "true" ]]; then AIRFLOW_EXTRAS=${AIRFLOW_EXTRAS/postgres,} fi - # Install latest set of dependencies using constraints. In case constraints were upgraded and there - # are conflicts, this might fail, but it should be fixed in the following installation steps + # Install latest set of dependencies - without constraints. This is to download a "base" set of + # dependencies that we can cache and reuse when installing airflow using constraints and latest + # pyproject.toml in the next step (when we install regular airflow). set -x pip install --root-user-action ignore \ ${ADDITIONAL_PIP_INSTALL_FLAGS} \ - "https://github.com/${AIRFLOW_REPO}/archive/${AIRFLOW_BRANCH}.tar.gz#egg=apache-airflow[${AIRFLOW_EXTRAS}]" \ - --constraint "${AIRFLOW_CONSTRAINTS_LOCATION}" || true + "https://github.com/${AIRFLOW_REPO}/archive/${AIRFLOW_BRANCH}.tar.gz#egg=apache-airflow[${AIRFLOW_EXTRAS}]" common::install_pip_version + # Uninstall airflow to keep only the dependencies. In the future when planned https://github.com/pypa/pip/issues/11440 + # is implemented in pip we might be able to use this flag and skip the remove step. pip freeze | grep apache-airflow-providers | xargs pip uninstall --yes 2>/dev/null || true set +x echo diff --git a/scripts/docker/install_airflow_dependencies_from_branch_tip.sh b/scripts/docker/install_airflow_dependencies_from_branch_tip.sh index 9c809039c7df..fcb30505d95a 100644 --- a/scripts/docker/install_airflow_dependencies_from_branch_tip.sh +++ b/scripts/docker/install_airflow_dependencies_from_branch_tip.sh @@ -46,14 +46,16 @@ function install_airflow_dependencies_from_branch_tip() { if [[ ${INSTALL_POSTGRES_CLIENT} != "true" ]]; then AIRFLOW_EXTRAS=${AIRFLOW_EXTRAS/postgres,} fi - # Install latest set of dependencies using constraints. In case constraints were upgraded and there - # are conflicts, this might fail, but it should be fixed in the following installation steps + # Install latest set of dependencies - without constraints. This is to download a "base" set of + # dependencies that we can cache and reuse when installing airflow using constraints and latest + # pyproject.toml in the next step (when we install regular airflow). set -x pip install --root-user-action ignore \ ${ADDITIONAL_PIP_INSTALL_FLAGS} \ - "https://github.com/${AIRFLOW_REPO}/archive/${AIRFLOW_BRANCH}.tar.gz#egg=apache-airflow[${AIRFLOW_EXTRAS}]" \ - --constraint "${AIRFLOW_CONSTRAINTS_LOCATION}" || true + "https://github.com/${AIRFLOW_REPO}/archive/${AIRFLOW_BRANCH}.tar.gz#egg=apache-airflow[${AIRFLOW_EXTRAS}]" common::install_pip_version + # Uninstall airflow to keep only the dependencies. In the future when planned https://github.com/pypa/pip/issues/11440 + # is implemented in pip we might be able to use this flag and skip the remove step. pip freeze | grep apache-airflow-providers | xargs pip uninstall --yes 2>/dev/null || true set +x echo