Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion dev/breeze/src/airflow_breeze/commands/main_command.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,12 +44,12 @@
option_verbose,
)
from airflow_breeze.configure_rich_click import click
from airflow_breeze.global_constants import generate_provider_dependencies_if_needed
from airflow_breeze.utils.click_utils import BreezeGroup
from airflow_breeze.utils.confirm import Answer, user_confirm
from airflow_breeze.utils.console import get_console
from airflow_breeze.utils.docker_command_utils import remove_docker_networks, remove_docker_volumes
from airflow_breeze.utils.path_utils import AIRFLOW_HOME_PATH, BUILD_CACHE_PATH
from airflow_breeze.utils.provider_dependencies import generate_provider_dependencies_if_needed
from airflow_breeze.utils.run_utils import run_command
from airflow_breeze.utils.shared_options import get_dry_run

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -167,9 +167,9 @@
cleanup_python_generated_files,
)
from airflow_breeze.utils.provider_dependencies import (
DEPENDENCIES,
generate_providers_metadata_for_provider,
get_all_constraint_files_and_airflow_releases,
get_provider_dependencies,
get_related_providers,
load_constraints,
)
Expand Down Expand Up @@ -2523,7 +2523,7 @@ class ProviderPRInfo(NamedTuple):
suffix: str

if not provider_distributions:
provider_distributions = list(DEPENDENCIES.keys())
provider_distributions = list(get_provider_dependencies().keys())
with ci_group("Generates GitHub issue content with people who can test it"):
if excluded_pr_list:
excluded_prs = [int(pr) for pr in excluded_pr_list.split(",")]
Expand Down Expand Up @@ -3003,7 +3003,7 @@ def generate_providers_metadata(
airflow_release_dates=airflow_release_dates,
current_metadata=current_metadata,
)
package_ids = DEPENDENCIES.keys()
package_ids = get_provider_dependencies().keys()
with Pool() as pool:
results = pool.map(
partial_generate_providers_metadata,
Expand Down
4 changes: 2 additions & 2 deletions dev/breeze/src/airflow_breeze/commands/sbom_commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,6 @@
AIRFLOW_PYTHON_COMPATIBILITY_MATRIX,
ALL_HISTORICAL_PYTHON_VERSIONS,
DEVEL_DEPS_PATH,
PROVIDER_DEPENDENCIES,
)
from airflow_breeze.utils.cdxgen import (
CHECK_DOCS,
Expand Down Expand Up @@ -90,6 +89,7 @@
read_metadata_from_google_spreadsheet,
write_sbom_information_to_google_spreadsheet,
)
from airflow_breeze.utils.provider_dependencies import get_provider_dependencies
from airflow_breeze.utils.recording import generating_command_images
from airflow_breeze.utils.shared_options import get_dry_run, get_verbose

Expand Down Expand Up @@ -624,7 +624,7 @@ def build_all_airflow_images(
@option_historical_python_versions
@click.option(
"--provider-id",
type=BetterChoice(list(PROVIDER_DEPENDENCIES.keys())),
type=BetterChoice(list(get_provider_dependencies().keys())),
required=False,
help="Provider id to generate the requirements for",
)
Expand Down
95 changes: 1 addition & 94 deletions dev/breeze/src/airflow_breeze/global_constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,20 +20,15 @@

from __future__ import annotations

import json
import platform
import subprocess
from collections.abc import Generator
from enum import Enum
from pathlib import Path
from threading import Lock

from airflow_breeze.utils.functools_cache import clearable_cache
from airflow_breeze.utils.host_info_utils import Architecture
from airflow_breeze.utils.path_utils import (
AIRFLOW_CORE_SOURCES_PATH,
AIRFLOW_CTL_SOURCES_PATH,
AIRFLOW_PYPROJECT_TOML_FILE_PATH,
AIRFLOW_ROOT_PATH,
AIRFLOW_TASK_SDK_SOURCES_PATH,
)
Expand Down Expand Up @@ -653,103 +648,15 @@ def get_airflow_extras():

# Initialize integrations
PROVIDER_RUNTIME_DATA_SCHEMA_PATH = AIRFLOW_CORE_SOURCES_PATH / "airflow" / "provider_info.schema.json"
AIRFLOW_GENERATED_PROVIDER_DEPENDENCIES_PATH = AIRFLOW_ROOT_PATH / "generated" / "provider_dependencies.json"
AIRFLOW_GENERATED_PROVIDER_DEPENDENCIES_HASH_PATH = (
AIRFLOW_ROOT_PATH / "generated" / "provider_dependencies.json.sha256sum"
)

ALL_PYPROJECT_TOML_FILES = []


def get_all_provider_pyproject_toml_provider_yaml_files() -> Generator[Path, None, None]:
pyproject_toml_content = AIRFLOW_PYPROJECT_TOML_FILE_PATH.read_text().splitlines()
in_workspace = False
for line in pyproject_toml_content:
trimmed_line = line.strip()
if not in_workspace and trimmed_line.startswith("[tool.uv.workspace]"):
in_workspace = True
elif in_workspace:
if trimmed_line.startswith("#"):
continue
if trimmed_line.startswith('"'):
path = trimmed_line.split('"')[1]
ALL_PYPROJECT_TOML_FILES.append(AIRFLOW_ROOT_PATH / path / "pyproject.toml")
if trimmed_line.startswith('"providers/'):
yield AIRFLOW_ROOT_PATH / path / "pyproject.toml"
yield AIRFLOW_ROOT_PATH / path / "provider.yaml"
elif trimmed_line.startswith("]"):
break

ALL_PYPROJECT_TOML_FILES: list[Path] = []

_regenerate_provider_deps_lock = Lock()
_has_regeneration_of_providers_run = False

UPDATE_PROVIDER_DEPENDENCIES_SCRIPT = (
AIRFLOW_ROOT_PATH / "scripts" / "ci" / "prek" / "update_providers_dependencies.py"
)


def regenerate_provider_dependencies_once() -> None:
"""Run provider dependencies regeneration once per interpreter execution.

This function is safe to call multiple times from different modules; the
underlying command will only run once. If the underlying command fails the
CalledProcessError is propagated to the caller.
"""
global _has_regeneration_of_providers_run
with _regenerate_provider_deps_lock:
if _has_regeneration_of_providers_run:
return
# Run the regeneration command from the repository root to ensure correct
# relative paths if the script expects to be run from AIRFLOW_ROOT_PATH.
subprocess.check_call(
["uv", "run", UPDATE_PROVIDER_DEPENDENCIES_SCRIPT.as_posix()], cwd=AIRFLOW_ROOT_PATH
)
_has_regeneration_of_providers_run = True


def _calculate_provider_deps_hash():
import hashlib

hasher = hashlib.sha256()
for file in sorted(get_all_provider_pyproject_toml_provider_yaml_files()):
hasher.update(file.read_bytes())
return hasher.hexdigest()


def _run_provider_dependencies_generation(calculated_hash=None) -> dict:
if calculated_hash is None:
calculated_hash = _calculate_provider_deps_hash()
AIRFLOW_GENERATED_PROVIDER_DEPENDENCIES_HASH_PATH.write_text(calculated_hash)
# We use regular print there as rich console might not be initialized yet here
print("Regenerating provider dependencies file")
regenerate_provider_dependencies_once()
return json.loads(AIRFLOW_GENERATED_PROVIDER_DEPENDENCIES_PATH.read_text())


if not AIRFLOW_GENERATED_PROVIDER_DEPENDENCIES_PATH.exists():
PROVIDER_DEPENDENCIES = _run_provider_dependencies_generation()
else:
PROVIDER_DEPENDENCIES = json.loads(AIRFLOW_GENERATED_PROVIDER_DEPENDENCIES_PATH.read_text())


def generate_provider_dependencies_if_needed():
regenerate_provider_dependencies = False
if (
not AIRFLOW_GENERATED_PROVIDER_DEPENDENCIES_PATH.exists()
or not AIRFLOW_GENERATED_PROVIDER_DEPENDENCIES_HASH_PATH.exists()
):
regenerate_provider_dependencies = True
calculated_hash = _calculate_provider_deps_hash()
else:
calculated_hash = _calculate_provider_deps_hash()
if calculated_hash.strip() != AIRFLOW_GENERATED_PROVIDER_DEPENDENCIES_HASH_PATH.read_text().strip():
regenerate_provider_dependencies = True
if regenerate_provider_dependencies:
global PROVIDER_DEPENDENCIES
PROVIDER_DEPENDENCIES = _run_provider_dependencies_generation(calculated_hash)


DEVEL_DEPS_PATH = AIRFLOW_ROOT_PATH / "generated" / "devel_deps.txt"


Expand Down
7 changes: 2 additions & 5 deletions dev/breeze/src/airflow_breeze/utils/md5_build_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,13 +25,10 @@
from pathlib import Path
from typing import TYPE_CHECKING

from airflow_breeze.global_constants import (
ALL_PYPROJECT_TOML_FILES,
FILES_FOR_REBUILD_CHECK,
regenerate_provider_dependencies_once,
)
from airflow_breeze.global_constants import ALL_PYPROJECT_TOML_FILES, FILES_FOR_REBUILD_CHECK
from airflow_breeze.utils.console import get_console
from airflow_breeze.utils.path_utils import AIRFLOW_ROOT_PATH
from airflow_breeze.utils.provider_dependencies import regenerate_provider_dependencies_once
from airflow_breeze.utils.shared_options import get_verbose

if TYPE_CHECKING:
Expand Down
20 changes: 15 additions & 5 deletions dev/breeze/src/airflow_breeze/utils/packages.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@
from airflow_breeze.global_constants import (
ALLOWED_PYTHON_MAJOR_MINOR_VERSIONS,
DEFAULT_PYTHON_MAJOR_MINOR_VERSION,
PROVIDER_DEPENDENCIES,
PROVIDER_RUNTIME_DATA_SCHEMA_PATH,
REGULAR_DOC_PACKAGES,
)
Expand All @@ -47,7 +46,6 @@
BREEZE_SOURCES_PATH,
DOCS_ROOT,
PREVIOUS_AIRFLOW_PROVIDERS_NS_PACKAGE_PATH,
PROVIDER_DEPENDENCIES_JSON_PATH,
)
from airflow_breeze.utils.publish_docs_helpers import (
PROVIDER_DATA_SCHEMA_PATH,
Expand Down Expand Up @@ -317,7 +315,10 @@ def get_available_distributions(
:param include_all_providers: whether "all-providers" should be included ni the list.

"""
provider_dependencies = json.loads(PROVIDER_DEPENDENCIES_JSON_PATH.read_text())
# Need lazy import to prevent circular dependencies
from airflow_breeze.utils.provider_dependencies import get_provider_dependencies

provider_dependencies = get_provider_dependencies()

valid_states = set()
if include_not_ready:
Expand Down Expand Up @@ -657,7 +658,11 @@ def convert_optional_dependencies_to_table(
def get_cross_provider_dependent_packages(provider_id: str) -> list[str]:
if provider_id in get_removed_provider_ids():
return []
return PROVIDER_DEPENDENCIES[provider_id]["cross-providers-deps"]

# Need lazy import to prevent circular dependencies
from airflow_breeze.utils.provider_dependencies import get_provider_dependencies

return get_provider_dependencies()[provider_id]["cross-providers-deps"]


def get_license_files(provider_id: str) -> str:
Expand Down Expand Up @@ -859,6 +864,9 @@ def get_latest_provider_tag(provider_id: str, suffix: str) -> str:
def regenerate_pyproject_toml(
context: dict[str, Any], provider_details: ProviderPackageDetails, version_suffix: str | None
):
# Need lazy import to prevent circular dependencies
from airflow_breeze.utils.provider_dependencies import get_provider_dependencies

get_pyproject_toml_path = provider_details.root_provider_path / "pyproject.toml"
# we want to preserve comments in dependencies - both required and additional,
# so we should not really parse the toml file but extract dependencies "as is" in text form and pass
Expand Down Expand Up @@ -919,7 +927,9 @@ def regenerate_pyproject_toml(
context["AIRFLOW_DOC_URL"] = (
"https://airflow.staged.apache.org" if version_suffix else "https://airflow.apache.org"
)
cross_provider_ids = set(PROVIDER_DEPENDENCIES.get(provider_details.provider_id)["cross-providers-deps"])
cross_provider_ids = set(
get_provider_dependencies()[provider_details.provider_id]["cross-providers-deps"]
)
cross_provider_dependencies = []
# Add cross-provider dependencies to the optional dependencies if they are missing
for provider_id in sorted(cross_provider_ids):
Expand Down
1 change: 1 addition & 0 deletions dev/breeze/src/airflow_breeze/utils/path_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -272,6 +272,7 @@ def find_airflow_root_path_to_operate_on() -> Path:
GENERATED_PATH = AIRFLOW_ROOT_PATH / "generated"
CONSTRAINTS_CACHE_PATH = BUILD_CACHE_PATH / "constraints"
PROVIDER_DEPENDENCIES_JSON_PATH = GENERATED_PATH / "provider_dependencies.json"
PROVIDER_DEPENDENCIES_JSON_HASH_PATH = GENERATED_PATH / "provider_dependencies.json.sha256sum"
PROVIDER_METADATA_JSON_PATH = GENERATED_PATH / "provider_metadata.json"
UI_CACHE_PATH = BUILD_CACHE_PATH / "ui"
AIRFLOW_TMP_PATH = AIRFLOW_ROOT_PATH / "tmp"
Expand Down
Loading
Loading