diff --git a/.rat-excludes b/.rat-excludes index 9822c9ac4154f..6464d827d5626 100644 --- a/.rat-excludes +++ b/.rat-excludes @@ -132,6 +132,8 @@ scripts/* images/* dev/* chart/*.iml +out/* +airflow-build-dockerfile* # Sha files .*sha256 diff --git a/3rd-party-licenses/LICENSE-reproducible.txt b/3rd-party-licenses/LICENSE-reproducible.txt new file mode 100644 index 0000000000000..65310b4f857a1 --- /dev/null +++ b/3rd-party-licenses/LICENSE-reproducible.txt @@ -0,0 +1,8 @@ +# Copyright 2013 The Servo Project Developers. +# Copyright 2017 zerolib Developers. +# +# Licensed under the Apache License, Version 2.0 or the MIT license +# , at your +# option. This file may not be copied, modified, or distributed +# except according to those terms. diff --git a/BREEZE.rst b/BREEZE.rst index 61b42c510f1e9..e5a12a8ac9454 100644 --- a/BREEZE.rst +++ b/BREEZE.rst @@ -1981,6 +1981,30 @@ default is to build ``both`` type of packages ``sdist`` and ``wheel``. :alt: Breeze release-management prepare-airflow-package +Preparing airflow tarball +""""""""""""""""""""""""" + +You can prepare airflow source tarball using Breeze: + +.. code-block:: bash + + breeze release-management prepare-airflow-tarball + +This prepares airflow -source.tar.gz package in the dist folder. + +You must specify ``--version`` flag which is a pre-release version of Airflow you are preparing the +tarball for. + +.. code-block:: bash + + breeze release-management prepare-airflow-tarball --version 2.8.0rc1 + +.. image:: ./images/breeze/output_release-management_prepare-airflow-tarball.svg + :target: https://raw.githubusercontent.com/apache/airflow/main/images/breeze/output_release-management_prepare-airflow-tarball.svg + :width: 100% + :alt: Breeze release-management prepare-airflow-tarball + + Start minor branch of Airflow """"""""""""""""""""""""""""" diff --git a/dev/README_RELEASE_AIRFLOW.md b/dev/README_RELEASE_AIRFLOW.md index 6b51c9189f7cc..6c98f9beb1ea0 100644 --- a/dev/README_RELEASE_AIRFLOW.md +++ b/dev/README_RELEASE_AIRFLOW.md @@ -585,16 +585,23 @@ Airflow supports reproducible builds, which means that the packages prepared fro produce binary identical packages in reproducible way. You should check if the packages can be binary-reproduced when built from the sources. -Checkout airflow sources and build packages in dist folder: +Checkout airflow sources and build packages in dist folder (replace X.Y.Zrc1 with the version +you are checking): ```shell script -git checkout X.Y.Zrc1 +VERSION=X.Y.Zrc1 +git checkout ${VERSION} export AIRFLOW_REPO_ROOT=$(pwd) rm -rf dist/* -breeze release-management prepare-airflow-package --package-format both +breeze release-management prepare-airflow-tarball --version ${VERSION} +breeze release-management prepare-airflow-package --package-format both --use-local-hatch ``` -That should produce `.whl` and `.tar.gz` packages in dist folder. +Note that you need to have `hatch` installed in order to build the packages with the last command. +If you do not have `hatch`, you can remove the `--use-local-hatch` flag and it will build and use +docker image that has `hatch` and other necessary tools installed. + +That should produce `-source.tar.gz` tarball of sources and `.whl`, `.tar.gz` packages in dist folder. Change to the directory where you have the packages from svn: diff --git a/dev/breeze/src/airflow_breeze/commands/release_candidate_command.py b/dev/breeze/src/airflow_breeze/commands/release_candidate_command.py index 09cc67c226370..11cba8d9f5a55 100644 --- a/dev/breeze/src/airflow_breeze/commands/release_candidate_command.py +++ b/dev/breeze/src/airflow_breeze/commands/release_candidate_command.py @@ -17,6 +17,7 @@ from __future__ import annotations import os +import shutil import click @@ -25,6 +26,7 @@ from airflow_breeze.utils.confirm import confirm_action from airflow_breeze.utils.console import console_print from airflow_breeze.utils.path_utils import AIRFLOW_SOURCES_ROOT +from airflow_breeze.utils.reproducible import archive_deterministically, get_source_date_epoch from airflow_breeze.utils.run_utils import run_command CI = os.environ.get("CI") @@ -59,21 +61,30 @@ def merge_pr(version_branch): def git_tag(version): if confirm_action(f"Tag {version}?"): run_command(["git", "tag", "-s", f"{version}", "-m", f"Apache Airflow {version}"], check=True) - console_print("Tagged") + console_print("[success]Tagged") def git_clean(): if confirm_action("Clean git repo?"): run_command(["breeze", "ci", "fix-ownership"], dry_run_override=DRY_RUN, check=True) run_command(["git", "clean", "-fxd"], dry_run_override=DRY_RUN, check=True) - console_print("Git repo cleaned") + console_print("[success]Git repo cleaned") -def tarball_release(version, version_without_rc): - if confirm_action("Create tarball?"): - run_command(["rm", "-rf", "dist"], check=True) +DIST_DIR = AIRFLOW_SOURCES_ROOT / "dist" +OUT_DIR = AIRFLOW_SOURCES_ROOT / "out" +REPRODUCIBLE_DIR = OUT_DIR / "reproducible" + - run_command(["mkdir", "dist"], check=True) +def tarball_release(version: str, version_without_rc: str, source_date_epoch: int): + if confirm_action("Create tarball?"): + console_print(f"[info]Creating tarball for Airflow {version}") + shutil.rmtree(OUT_DIR, ignore_errors=True) + DIST_DIR.mkdir(exist_ok=True) + OUT_DIR.mkdir(exist_ok=True) + REPRODUCIBLE_DIR.mkdir(exist_ok=True) + archive_name = f"apache-airflow-{version_without_rc}-source.tar.gz" + temporary_archive = OUT_DIR / archive_name run_command( [ "git", @@ -82,19 +93,48 @@ def tarball_release(version, version_without_rc): f"{version}", f"--prefix=apache-airflow-{version_without_rc}/", "-o", - f"dist/apache-airflow-{version_without_rc}-source.tar.gz", + temporary_archive.as_posix(), ], check=True, ) - console_print("Tarball created") + run_command( + [ + "tar", + "-xf", + temporary_archive.as_posix(), + "-C", + REPRODUCIBLE_DIR.as_posix(), + "--strip", + "1", + ] + ) + final_archive = DIST_DIR / archive_name + archive_deterministically( + dir_to_archive=REPRODUCIBLE_DIR.as_posix(), + dest_archive=final_archive.as_posix(), + prepend_path=None, + timestamp=source_date_epoch, + ) + console_print(f"[success]Tarball created in {final_archive}") -def create_artifacts_with_sdist(): - run_command(["hatch", "build", "-t", "sdist", "-t", "wheel"], check=True) - console_print("Artifacts created") +def create_artifacts_with_hatch(source_date_epoch: int): + console_print("[info]Creating artifacts with hatch") + shutil.rmtree(DIST_DIR, ignore_errors=True) + DIST_DIR.mkdir(exist_ok=True) + env_copy = os.environ.copy() + env_copy["SOURCE_DATE_EPOCH"] = str(source_date_epoch) + run_command( + ["hatch", "build", "-c", "-t", "custom", "-t", "sdist", "-t", "wheel"], check=True, env=env_copy + ) + console_print("[success]Successfully prepared Airflow packages:") + for file in sorted(DIST_DIR.glob("apache_airflow*")): + console_print(print(file.name)) + console_print() -def create_artifacts_with_breeze(): +def create_artifacts_with_docker(): + console_print("[info]Creating artifacts with docker") run_command( [ "breeze", @@ -105,14 +145,14 @@ def create_artifacts_with_breeze(): ], check=True, ) - console_print("Artifacts created") + console_print("[success]Artifacts created") def sign_the_release(repo_root): if confirm_action("Do you want to sign the release?"): os.chdir(f"{repo_root}/dist") run_command("./../dev/sign.sh *", dry_run_override=DRY_RUN, check=True, shell=True) - console_print("Release signed") + console_print("[success]Release signed") def tag_and_push_constraints(version, version_branch): @@ -135,7 +175,7 @@ def tag_and_push_constraints(version, version_branch): run_command( ["git", "push", "origin", "tag", f"constraints-{version}"], dry_run_override=DRY_RUN, check=True ) - console_print("Constraints tagged and pushed") + console_print("[success]Constraints tagged and pushed") def clone_asf_repo(version, repo_root): @@ -146,7 +186,7 @@ def clone_asf_repo(version, repo_root): check=True, ) run_command(["svn", "update", "--set-depth=infinity", "asf-dist/dev/airflow"], check=True) - console_print("Cloned ASF repo successfully") + console_print("[success]Cloned ASF repo successfully") def move_artifacts_to_svn(version, repo_root): @@ -154,7 +194,7 @@ def move_artifacts_to_svn(version, repo_root): os.chdir(f"{repo_root}/asf-dist/dev/airflow") run_command(["svn", "mkdir", f"{version}"], dry_run_override=DRY_RUN, check=True) run_command(f"mv {repo_root}/dist/* {version}/", dry_run_override=DRY_RUN, check=True, shell=True) - console_print("Moved artifacts to SVN:") + console_print("[success]Moved artifacts to SVN:") run_command(["ls"], dry_run_override=DRY_RUN) @@ -171,7 +211,7 @@ def push_artifacts_to_asf_repo(version, repo_root): dry_run_override=DRY_RUN, check=True, ) - console_print("Files pushed to svn") + console_print("[success]Files pushed to svn") def delete_asf_repo(repo_root): @@ -182,7 +222,7 @@ def delete_asf_repo(repo_root): def prepare_pypi_packages(version, version_suffix, repo_root): if confirm_action("Prepare pypi packages?"): - console_print("Preparing PyPI packages") + console_print("[info]Preparing PyPI packages") os.chdir(repo_root) run_command(["git", "checkout", f"{version}"], dry_run_override=DRY_RUN, check=True) run_command( @@ -198,13 +238,13 @@ def prepare_pypi_packages(version, version_suffix, repo_root): check=True, ) run_command(["twine", "check", "dist/*"], check=True) - console_print("PyPI packages prepared") + console_print("[success]PyPI packages prepared") def push_packages_to_pypi(version): if confirm_action("Do you want to push packages to production PyPI?"): run_command(["twine", "upload", "-r", "pypi", "dist/*"], dry_run_override=DRY_RUN, check=True) - console_print("Packages pushed to production PyPI") + console_print("[success]Packages pushed to production PyPI") console_print( "Again, confirm that the package is available here: https://pypi.python.org/pypi/apache-airflow" ) @@ -240,7 +280,7 @@ def push_release_candidate_tag_to_github(version): ) confirm_action(f"Confirm that {version} is pushed to PyPI(not PyPI test). Is it pushed?", abort=True) run_command(["git", "push", "origin", "tag", f"{version}"], dry_run_override=DRY_RUN, check=True) - console_print("Release candidate tag pushed to GitHub") + console_print("[success]Release candidate tag pushed to GitHub") def create_issue_for_testing(version, previous_version, github_token): @@ -293,10 +333,31 @@ def remove_old_releases(version, repo_root): dry_run_override=DRY_RUN, check=True, ) - + console_print("[success]Old releases removed") os.chdir(repo_root) +@release_management.command( + name="prepare-airflow-tarball", + help="Prepare airflow's source tarball.", +) +@click.option( + "--version", required=True, help="The release candidate version e.g. 2.4.3rc1", envvar="VERSION" +) +def prepare_airflow_tarball(version: str): + from packaging.version import Version + + airflow_version = Version(version) + if not airflow_version.is_prerelease: + exit("--version value must be a pre-release") + source_date_epoch = get_source_date_epoch() + version_without_rc = airflow_version.base_version + # Create the tarball + tarball_release( + version=version, version_without_rc=version_without_rc, source_date_epoch=source_date_epoch + ) + + @release_management.command( name="start-rc-process", short_help="Start RC process", @@ -311,7 +372,8 @@ def remove_old_releases(version, repo_root): def publish_release_candidate(version, previous_version, github_token): from packaging.version import Version - if not Version(version).is_prerelease: + airflow_version = Version(version) + if not airflow_version.is_prerelease: exit("--version value must be a pre-release") if Version(previous_version).is_prerelease: exit("--previous-version value must be a release not a pre-release") @@ -320,9 +382,10 @@ def publish_release_candidate(version, previous_version, github_token): if not github_token: console_print("GITHUB_TOKEN is not set! Issue generation will fail.") confirm_action("Do you want to continue?", abort=True) - version_suffix = version[5:] - version_branch = version[:3].replace(".", "-") - version_without_rc = version[:5] + + version_suffix = airflow_version.pre[0] + str(airflow_version.pre[1]) + version_branch = str(airflow_version.release[0]) + "-" + str(airflow_version.release[1]) + version_without_rc = airflow_version.base_version os.chdir(AIRFLOW_SOURCES_ROOT) airflow_repo_root = os.getcwd() @@ -343,20 +406,21 @@ def publish_release_candidate(version, previous_version, github_token): confirm_action("Pushes will be made to origin. Do you want to continue?", abort=True) # Merge the sync PR merge_pr(version_branch) - - # Tag & clean the repo + # + # # Tag & clean the repo git_tag(version) git_clean() - # Build the latest image - if confirm_action("Build latest breeze image?"): - run_command(["breeze", "ci-image", "build", "--python", "3.8"], dry_run_override=DRY_RUN, check=True) + source_date_epoch = get_source_date_epoch() + shutil.rmtree(DIST_DIR, ignore_errors=True) # Create the tarball - tarball_release(version, version_without_rc) + tarball_release( + version=version, version_without_rc=version_without_rc, source_date_epoch=source_date_epoch + ) # Create the artifacts - if confirm_action("Use breeze to create artifacts?"): - create_artifacts_with_breeze() + if confirm_action("Use docker to create artifacts?"): + create_artifacts_with_docker() elif confirm_action("Use hatch to create artifacts?"): - create_artifacts_with_sdist() + create_artifacts_with_hatch() # Sign the release sign_the_release(airflow_repo_root) # Tag and push constraints diff --git a/dev/breeze/src/airflow_breeze/commands/release_management_commands.py b/dev/breeze/src/airflow_breeze/commands/release_management_commands.py index 202ae1f682b08..5d132036e9bbc 100644 --- a/dev/breeze/src/airflow_breeze/commands/release_management_commands.py +++ b/dev/breeze/src/airflow_breeze/commands/release_management_commands.py @@ -142,6 +142,7 @@ get_related_providers, ) from airflow_breeze.utils.python_versions import get_python_version_list +from airflow_breeze.utils.reproducible import get_source_date_epoch from airflow_breeze.utils.run_utils import ( run_command, ) @@ -284,6 +285,11 @@ class VersionedFile(NamedTuple): name="prepare-airflow-package", help="Prepare sdist/whl package of Airflow.", ) +@click.option( + "--use-local-hatch", + is_flag=True, + help="Use local hatch instead of docker to build the package. You need to have hatch installed.", +) @option_package_format @option_version_suffix_for_pypi @option_verbose @@ -291,10 +297,30 @@ class VersionedFile(NamedTuple): def prepare_airflow_packages( package_format: str, version_suffix_for_pypi: str, + use_local_hatch: bool, ): perform_environment_checks() fix_ownership_using_docker() cleanup_python_generated_files() + source_date_epoch = get_source_date_epoch() + if use_local_hatch: + hatch_build_command = ["hatch", "build", "-c", "-t", "custom"] + if package_format in ["sdist", "both"]: + hatch_build_command.extend(["-t", "sdist"]) + if package_format in ["wheel", "both"]: + hatch_build_command.extend(["-t", "wheel"]) + env_copy = os.environ.copy() + env_copy["SOURCE_DATE_EPOCH"] = str(source_date_epoch) + run_command( + hatch_build_command, + check=True, + env=env_copy, + ) + get_console().print("[success]Successfully prepared Airflow packages:") + for file in sorted(DIST_DIR.glob("apache_airflow*")): + get_console().print(file.name) + get_console().print() + return # This is security feature. # # Building the image needed to build airflow package including .git directory @@ -352,7 +378,10 @@ def prepare_airflow_packages( # Copy all files in the dist directory in container to the host dist directory (note '/.' in SRC) run_command(["docker", "cp", f"{container_id}:/opt/airflow/dist/.", "./dist"], check=True) run_command(["docker", "rm", "--force", container_id], check=True) - get_console().print("[success]Successfully prepared Airflow package!\n\n") + get_console().print("[success]Successfully prepared Airflow packages:") + for file in sorted(DIST_DIR.glob("apache_airflow*")): + get_console().print(file.name) + get_console().print() def provider_action_summary(description: str, message_type: MessageType, packages: list[str]): diff --git a/dev/breeze/src/airflow_breeze/commands/release_management_commands_config.py b/dev/breeze/src/airflow_breeze/commands/release_management_commands_config.py index e6ddec7e161e6..b0b1bb96e0377 100644 --- a/dev/breeze/src/airflow_breeze/commands/release_management_commands_config.py +++ b/dev/breeze/src/airflow_breeze/commands/release_management_commands_config.py @@ -19,8 +19,9 @@ RELEASE_AIRFLOW_COMMANDS: dict[str, str | list[str]] = { "name": "Airflow release commands", "commands": [ - "prepare-airflow-package", "create-minor-branch", + "prepare-airflow-package", + "prepare-airflow-tarball", "start-rc-process", "start-release", "release-prod-images", @@ -43,9 +44,10 @@ RELEASE_OTHER_COMMANDS: dict[str, str | list[str]] = { "name": "Other release commands", "commands": [ + "add-back-references", "publish-docs", "generate-constraints", - "add-back-references", + "update-constraints", ], } @@ -55,10 +57,19 @@ "name": "Package flags", "options": [ "--package-format", + "--use-local-hatch", "--version-suffix-for-pypi", ], } ], + "breeze release-management prepare-airflow-tarball": [ + { + "name": "Package flags", + "options": [ + "--version", + ], + } + ], "breeze release-management verify-provider-packages": [ { "name": "Provider verification flags", diff --git a/dev/breeze/src/airflow_breeze/utils/reproducible.py b/dev/breeze/src/airflow_breeze/utils/reproducible.py new file mode 100644 index 0000000000000..a85d871a3cfaa --- /dev/null +++ b/dev/breeze/src/airflow_breeze/utils/reproducible.py @@ -0,0 +1,135 @@ +#!/usr/bin/env python3 + + +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# Copyright 2013 The Servo Project Developers. +# Copyright 2017 zerolib Developers. +# +# Licensed under the Apache License, Version 2.0 or the MIT license +# , at your +# option. This file may not be copied, modified, or distributed +# except according to those terms. + +# This command is a largely vendored-in script from +# https://github.com/MuxZeroNet/reproducible/blob/master/reproducible.py +from __future__ import annotations + +import contextlib +import gzip +import itertools +import locale +import os +import tarfile +from argparse import ArgumentParser + +from airflow_breeze.utils.path_utils import AIRFLOW_SOURCES_ROOT + + +def get_source_date_epoch(): + import yaml + + reproducible_build_yaml = AIRFLOW_SOURCES_ROOT / "airflow" / "reproducible_build.yaml" + reproducible_build_dict = yaml.safe_load(reproducible_build_yaml.read_text()) + source_date_epoch: int = reproducible_build_dict["source-date-epoch"] + return source_date_epoch + + +@contextlib.contextmanager +def cd(new_path): + """Context manager for changing the current working directory""" + previous_path = os.getcwd() + try: + os.chdir(new_path) + yield + finally: + os.chdir(previous_path) + + +@contextlib.contextmanager +def setlocale(name): + """Context manager for changing the current locale""" + saved_locale = locale.setlocale(locale.LC_ALL) + try: + yield locale.setlocale(locale.LC_ALL, name) + finally: + locale.setlocale(locale.LC_ALL, saved_locale) + + +def archive_deterministically(dir_to_archive, dest_archive, prepend_path=None, timestamp=0): + """Create a .tar.gz archive in a deterministic (reproducible) manner. + + See https://reproducible-builds.org/docs/archives/ for more details.""" + + def reset(tarinfo): + """Helper to reset owner/group and modification time for tar entries""" + tarinfo.uid = tarinfo.gid = 0 + tarinfo.uname = tarinfo.gname = "root" + tarinfo.mtime = timestamp + return tarinfo + + dest_archive = os.path.abspath(dest_archive) + with cd(dir_to_archive): + current_dir = "." + file_list = [current_dir] + for root, dirs, files in os.walk(current_dir): + for name in itertools.chain(dirs, files): + file_list.append(os.path.join(root, name)) + + # Sort file entries with the fixed locale + with setlocale("C"): + file_list.sort(key=locale.strxfrm) + + # Use a temporary file and atomic rename to avoid partially-formed + # packaging (in case of exceptional situations like running out of disk space). + temp_file = f"{dest_archive}.temp~" + with os.fdopen(os.open(temp_file, os.O_WRONLY | os.O_CREAT, 0o644), "wb") as out_file: + with gzip.GzipFile("wb", fileobj=out_file, mtime=0) as gzip_file: + with tarfile.open(fileobj=gzip_file, mode="w:") as tar_file: + for entry in file_list: + arcname = entry + if prepend_path is not None: + arcname = os.path.normpath(os.path.join(prepend_path, arcname)) + tar_file.add(entry, filter=reset, recursive=False, arcname=arcname) + os.rename(temp_file, dest_archive) + + +def main(): + parser = ArgumentParser() + parser.add_argument("-d", "--dir", help="directory to archive") + parser.add_argument("-o", "--out", help="archive destination") + parser.add_argument("-p", "--prepend", help="prepend path") + parser.add_argument( + "-t", "--timestamp", help="timestamp of files", type=int, default=get_source_date_epoch() + ) + + args = parser.parse_args() + + if not args.dir or not args.out: + error = ( + "You should provide a directory to archive, and the " + f"archive file name, not {repr((args.dir, args.out))}" + ) + raise ValueError(error) + + archive_deterministically(args.dir, args.out, args.prepend, args.timestamp) + + +if __name__ == "__main__": + main() diff --git a/images/breeze/output_release-management.svg b/images/breeze/output_release-management.svg index f853a75b697eb..745601db244e2 100644 --- a/images/breeze/output_release-management.svg +++ b/images/breeze/output_release-management.svg @@ -1,4 +1,4 @@ - +