Skip to content

Commit

Permalink
move Celery provider to new provider code structure (apache#45786)
Browse files Browse the repository at this point in the history
This also solves quite a few teething issues with providers move:

* Boring-Cyborg only needs only one-liner now for new providers
* Additional-Extras in new provider is moved to "optional-dependencies"
  in pyproject.toml
* generated pyproject.toml in new providers has better comments now
  explaining that dependencies can be edited in-place even if the file
  is generated
* comments in dependencies are preserved when regenerating the
  pyproject.toml files
* missing cross-project dependencies are added as optional-extras
  automatically when regenerating pyproject.toml
* LICENSE file for providers does not contain additional projects
  that are included in Airflow LICENCE
* Provider package tests do not have `__init__.py` in `providers`
  package - this way `celery` import is not importing the celery
  package from tests.providers
* When preparing package with --version-suffix-for-pypi - the
  pyproject.toml is updated temporarily with suffix added to airflow
  dependencies
* various constants in breeze code refering to providers were moved
  to path_utils and made consistent
* in case generated pyproject.toml file with pypi suffix has an error,
  content of it is printed when generating package. Also the content of
  pyproject.toml is printed when --verbose flag is used.
  • Loading branch information
potiuk authored Jan 20, 2025
1 parent 08d0273 commit 25aeb11
Show file tree
Hide file tree
Showing 81 changed files with 1,261 additions and 318 deletions.
10 changes: 3 additions & 7 deletions .github/boring-cyborg.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,7 @@

labelPRBasedOnFilePath:
provider:airbyte:
- providers/airbyte/src/airflow/providers/airbyte/**/*
- providers/airbyte/docs/**/*
- providers/airbyte/tests/**/*
- providers/airbyte/**

provider:alibaba:
- providers/src/airflow/providers/alibaba/**/*
Expand Down Expand Up @@ -144,9 +142,7 @@ labelPRBasedOnFilePath:
- providers/tests/atlassian/jira/**/*

provider:celery:
- providers/src/airflow/providers/celery/**/*
- docs/apache-airflow-providers-celery/**/*
- providers/tests/celery/**/*
- providers/celery/**

provider:cloudant:
- providers/src/airflow/providers/cloudant/**/*
Expand All @@ -157,7 +153,7 @@ labelPRBasedOnFilePath:
- airflow/example_dags/example_kubernetes_executor.py
- airflow/example_dags/example_local_kubernetes_executor.py
- providers/src/airflow/providers/cncf/kubernetes/**/*
- providers/src/airflow/providers/celery/executors/celery_kubernetes_executor.py
- providers/celery/src/airflow/providers/celery/executors/celery_kubernetes_executor.py
- docs/apache-airflow-providers-cncf-kubernetes/**/*
- kubernetes_tests/**/*
- providers/tests/cncf/kubernetes/**/*
Expand Down
4 changes: 2 additions & 2 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -320,7 +320,7 @@ repos:
exclude: material-icons\.css$|^images/.*$|^RELEASE_NOTES\.txt$|^.*package-lock\.json$|^.*/kinglear\.txt$|^.*pnpm-lock\.yaml$
args:
- --ignore-words=docs/spelling_wordlist.txt
- --skip=providers/src/airflow/providers/*/*.rst,airflow/www/*.log,docs/*/commits.rst,docs/apache-airflow/tutorial/pipeline_example.csv,*.min.js,*.lock,INTHEWILD.md
- --skip=providers/src/airflow/providers/*/*.rst,airflow/www/*.log,docs/*/commits.rst,providers/*/docs/commits.rst,providers/*/*/docs/commits.rst,docs/apache-airflow/tutorial/pipeline_example.csv,*.min.js,*.lock,INTHEWILD.md
- --exclude-file=.codespellignorelines
- repo: https://github.com/woodruffw/zizmor-pre-commit
rev: v1.0.0
Expand Down Expand Up @@ -349,7 +349,7 @@ repos:
files: ^providers/src/airflow/providers/.*/(operators|transfers|sensors)/.*\.py$
additional_dependencies: [ 'rich>=12.4.4' ]
- id: update-providers-build-files
name: Update providers build files files
name: Update providers build files
entry: ./scripts/ci/pre_commit/update_providers_build_files.py
language: python
pass_filenames: true
Expand Down
24 changes: 0 additions & 24 deletions airflow/new_provider.yaml.schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -334,30 +334,6 @@
"type": "string"
}
},
"additional-extras": {
"type": "array",
"items": {
"type": "object",
"properties": {
"name": {
"description": "Name of the extra",
"type": "string"
},
"dependencies": {
"description": "Dependencies that should be added for the extra",
"type": "array",
"items": {
"type": "string"
}
}
},
"required": [
"name",
"dependencies"
]
},
"description": "Additional extras that the provider should have. Replaces auto-generated cross-provider extras, if matching the same prefix, so that you can specify boundaries for existing dependencies."
},
"task-decorators": {
"type": "array",
"description": "Decorators to use with the TaskFlow API. Can be accessed by users via '@task.<name>'",
Expand Down
2 changes: 1 addition & 1 deletion contributing-docs/08_static_code_checks.rst
Original file line number Diff line number Diff line change
Expand Up @@ -382,7 +382,7 @@ require Breeze Docker image to be built locally.
+-----------------------------------------------------------+--------------------------------------------------------+---------+
| update-openapi-spec-tags-to-be-sorted | Sort alphabetically openapi spec tags | |
+-----------------------------------------------------------+--------------------------------------------------------+---------+
| update-providers-build-files | Update providers build files files | |
| update-providers-build-files | Update providers build files | |
+-----------------------------------------------------------+--------------------------------------------------------+---------+
| update-providers-dependencies | Update dependencies for provider packages | |
+-----------------------------------------------------------+--------------------------------------------------------+---------+
Expand Down
1 change: 1 addition & 0 deletions contributing-docs/11_provider_packages.rst
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,7 @@ of Airflow).
The old ``provider.yaml`` file is compliant with the schema that is available in
`json-schema specification <https://github.com/apache/airflow/blob/main/airflow/provider.yaml.schema.json>`_.

# TODO(potiuk) - rename when all providers are new-style
The new ``provider.yaml`` file is compliant with the new schema that is available in
`json-schema specification <https://github.com/apache/airflow/blob/main/airflow/new_provider.yaml.schema.json>`_.

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -97,12 +97,14 @@
PrepareReleasePackageErrorBuildingPackageException,
PrepareReleasePackageTagExistException,
PrepareReleasePackageWrongSetupException,
apply_version_suffix_to_pyproject_toml,
build_provider_package,
cleanup_build_remnants,
copy_provider_sources_to_target,
generate_build_files,
get_packages_list_to_act_on,
move_built_packages_and_cleanup,
restore_pyproject_toml,
should_skip_the_package,
)
from airflow_breeze.utils.add_back_references import (
Expand Down Expand Up @@ -157,9 +159,9 @@
)
from airflow_breeze.utils.shared_options import get_dry_run, get_verbose
from airflow_breeze.utils.version_utils import (
create_package_version,
get_latest_airflow_version,
get_latest_helm_chart_version,
get_package_version_suffix,
is_local_package_version,
)
from airflow_breeze.utils.versions import is_pre_release
Expand Down Expand Up @@ -948,8 +950,8 @@ def prepare_provider_packages(
include_removed=include_removed_providers,
include_not_ready=include_not_ready_providers,
)
package_version = create_package_version(version_suffix_for_pypi, version_suffix_for_local)
if not skip_tag_check and not is_local_package_version(package_version):
package_version_suffix = get_package_version_suffix(version_suffix_for_pypi, version_suffix_for_local)
if not skip_tag_check and not is_local_package_version(package_version_suffix):
run_command(["git", "remote", "rm", "apache-https-for-providers"], check=False, stderr=DEVNULL)
make_sure_remote_apache_exists_and_fetch(github_repository=github_repository)
success_packages = []
Expand All @@ -965,24 +967,32 @@ def prepare_provider_packages(
try:
basic_provider_checks(provider_id)
if not skip_tag_check:
should_skip, package_version = should_skip_the_package(provider_id, package_version)
should_skip, package_version_suffix = should_skip_the_package(
provider_id, package_version_suffix
)
if should_skip:
continue
get_console().print()
with ci_group(f"Preparing provider package [special]{provider_id}"):
get_console().print()
# TODO(potiuk) - rename when all providers are new-style
new_provider_root_dir = AIRFLOW_PROVIDERS_DIR.joinpath(*provider_id.split("."))
if (new_provider_root_dir / "provider.yaml").exists():
get_console().print(
f"[info]Provider {provider_id} is a new-style provider. "
f"Skipping package generation as it is not needed for new-style providers."
f"[info]Provider {provider_id} is a new-style provider building in-place."
)
cleanup_build_remnants(new_provider_root_dir)
build_provider_package(
provider_id=provider_id,
package_format=package_format,
target_provider_root_sources_path=new_provider_root_dir,
old_content = apply_version_suffix_to_pyproject_toml(
provider_id, new_provider_root_dir, package_version_suffix
)
try:
build_provider_package(
provider_id=provider_id,
package_format=package_format,
target_provider_root_sources_path=new_provider_root_dir,
)
finally:
restore_pyproject_toml(new_provider_root_dir, old_content)
move_built_packages_and_cleanup(
new_provider_root_dir,
DIST_DIR,
Expand All @@ -994,7 +1004,7 @@ def prepare_provider_packages(
target_provider_root_sources_path = copy_provider_sources_to_target(provider_id)
generate_build_files(
provider_id=provider_id,
version_suffix=package_version,
version_suffix=package_version_suffix,
target_provider_root_sources_path=target_provider_root_sources_path,
)
cleanup_build_remnants(target_provider_root_sources_path)
Expand Down Expand Up @@ -1838,6 +1848,7 @@ def _add_chicken_egg_providers_to_build_args(
python_build_args["DOCKER_CONTEXT_FILES"] = "./docker-context-files"


# TODO(potiuk) - remove when all providers are new-style
@release_management.command(
name="clean-old-provider-artifacts",
help="Cleans the old provider artifacts",
Expand Down
8 changes: 2 additions & 6 deletions dev/breeze/src/airflow_breeze/commands/setup_commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@
from airflow_breeze.utils.docker_command_utils import VOLUMES_FOR_SELECTED_MOUNTS
from airflow_breeze.utils.path_utils import (
AIRFLOW_SOURCES_ROOT,
BREEZE_IMAGES_DIR,
BREEZE_SOURCES_DIR,
SCRIPTS_CI_DOCKER_COMPOSE_LOCAL_YAML_FILE,
get_installation_airflow_sources,
get_installation_sources_config_metadata_hash,
Expand Down Expand Up @@ -469,12 +471,6 @@ def backup(script_path_file: Path):
shutil.copy(str(script_path_file), str(script_path_file) + ".bak")


BREEZE_INSTALL_DIR = AIRFLOW_SOURCES_ROOT / "dev" / "breeze"
BREEZE_DOC_DIR = BREEZE_INSTALL_DIR / "doc"
BREEZE_IMAGES_DIR = BREEZE_DOC_DIR / "images"
BREEZE_SOURCES_DIR = BREEZE_INSTALL_DIR / "src"


def get_old_command_hash() -> dict[str, str]:
command_hash = {}
for file in BREEZE_IMAGES_DIR.glob("output_*.txt"):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,22 +36,22 @@
import semver
from rich.syntax import Syntax

from airflow_breeze.global_constants import PROVIDER_DEPENDENCIES
from airflow_breeze.utils.black_utils import black_format
from airflow_breeze.utils.confirm import Answer, user_confirm
from airflow_breeze.utils.console import get_console
from airflow_breeze.utils.packages import (
HTTPS_REMOTE,
ProviderPackageDetails,
clear_cache_for_provider_metadata,
get_old_source_providers_package_path,
get_pip_package_name,
get_provider_details,
get_provider_jinja_context,
get_provider_yaml,
refresh_provider_metadata_from_yaml_file,
refresh_provider_metadata_with_provider_id,
render_template,
)
from airflow_breeze.utils.path_utils import AIRFLOW_SOURCES_ROOT
from airflow_breeze.utils.path_utils import AIRFLOW_SOURCES_ROOT, BREEZE_SOURCES_DIR
from airflow_breeze.utils.run_utils import run_command
from airflow_breeze.utils.shared_options import get_verbose
from airflow_breeze.utils.versions import get_version_tag
Expand Down Expand Up @@ -527,10 +527,10 @@ def _update_version_in_provider_yaml(
v = v.bump_patch()
provider_yaml_path, is_new_structure = get_provider_yaml(provider_id)
original_provider_yaml_content = provider_yaml_path.read_text()
new_provider_yaml_content = re.sub(
updated_provider_yaml_content = re.sub(
r"^versions:", f"versions:\n - {v}", original_provider_yaml_content, 1, re.MULTILINE
)
provider_yaml_path.write_text(new_provider_yaml_content)
provider_yaml_path.write_text(updated_provider_yaml_content)
get_console().print(f"[special]Bumped version to {v}\n")
return with_breaking_changes, maybe_with_new_features, original_provider_yaml_content

Expand All @@ -550,7 +550,7 @@ def _update_source_date_epoch_in_provider_yaml(
r"source-date-epoch: [0-9]*", f"source-date-epoch: {source_date_epoch}", original_text, 1
)
provider_yaml_path.write_text(new_text)
refresh_provider_metadata_with_provider_id(provider_id)
refresh_provider_metadata_from_yaml_file(provider_yaml_path)
get_console().print(f"[special]Updated source-date-epoch to {source_date_epoch}\n")


Expand Down Expand Up @@ -845,13 +845,12 @@ def update_release_notes(
else:
answer = Answer.YES

provider_yaml_path, is_new_structure = get_provider_yaml(provider_package_id)
if answer == Answer.NO:
if original_provider_yaml_content is not None:
# Restore original content of the provider.yaml
(get_old_source_providers_package_path(provider_package_id) / "provider.yaml").write_text(
original_provider_yaml_content
)
clear_cache_for_provider_metadata(provider_package_id)
provider_yaml_path.write_text(original_provider_yaml_content)
clear_cache_for_provider_metadata(provider_yaml_path=provider_yaml_path)

type_of_change = _ask_the_user_for_the_type_of_changes(non_interactive=False)
if type_of_change == TypeOfChange.SKIP:
Expand Down Expand Up @@ -1132,7 +1131,7 @@ def update_changelog(
_update_index_rst(jinja_context, package_id, provider_details.documentation_provider_package_path)


def _generate_get_provider_info_py(context, provider_details):
def _generate_get_provider_info_py(context: dict[str, Any], provider_details: ProviderPackageDetails):
get_provider_info_content = black_format(
render_template(
template_name="get_provider_info",
Expand All @@ -1149,7 +1148,7 @@ def _generate_get_provider_info_py(context, provider_details):
)


def _generate_readme_rst(context, provider_details):
def _generate_readme_rst(context: dict[str, Any], provider_details: ProviderPackageDetails):
get_provider_readme_content = render_template(
template_name="PROVIDER_README",
context=context,
Expand All @@ -1163,16 +1162,49 @@ def _generate_readme_rst(context, provider_details):
)


def _generate_pyproject(context, provider_details):
def _regenerate_pyproject_toml(context: dict[str, Any], provider_details: ProviderPackageDetails):
get_pyproject_toml_path = provider_details.root_provider_path / "pyproject.toml"
try:
import tomllib
except ImportError:
import tomli as tomllib
old_toml_content = tomllib.loads(get_pyproject_toml_path.read_text())
old_dependencies = old_toml_content["project"]["dependencies"]
install_requirements = "".join(f'\n "{ir}",' for ir in old_dependencies)
context["INSTALL_REQUIREMENTS"] = install_requirements
# we want to preserve comments in dependencies - both required and additional,
# so we should not really parse the toml file but extract dependencies "as is" in text form and pass
# them to context. While this is not "generic toml" perfect, for provider pyproject.toml files it is
# good enough, because we fully control the pyproject.toml content for providers as they are generated
# from our templates (Except the dependencies section that is manually updated)
pyproject_toml_content = get_pyproject_toml_path.read_text()
required_dependencies: list[str] = []
optional_dependencies: list[str] = []
in_required_dependencies = False
in_optional_dependencies = False
for line in pyproject_toml_content.splitlines():
if line == "dependencies = [":
in_required_dependencies = True
continue
if in_required_dependencies and line == "]":
in_required_dependencies = False
continue
if line == "[project.optional-dependencies]":
in_optional_dependencies = True
continue
if in_optional_dependencies and line == "":
in_optional_dependencies = False
continue
if in_required_dependencies:
required_dependencies.append(line)
if in_optional_dependencies:
optional_dependencies.append(line)

# For additional providers we want to load the dependencies and see if cross-provider-dependencies are
# present and if not, add them to the optional dependencies

context["INSTALL_REQUIREMENTS"] = "\n".join(required_dependencies)

# Add cross-provider dependencies to the optional dependencies if they are missing
for module in PROVIDER_DEPENDENCIES.get(provider_details.provider_id)["cross-providers-deps"]:
if f'"{module}" = [' not in optional_dependencies:
optional_dependencies.append(f'"{module}" = [')
optional_dependencies.append(f' "{get_pip_package_name(module)}"')
optional_dependencies.append("]")
context["EXTRAS_REQUIREMENTS"] = "\n".join(optional_dependencies)

get_pyproject_toml_content = render_template(
template_name="pyproject",
context=context,
Expand Down Expand Up @@ -1202,17 +1234,19 @@ def _generate_build_files_for_provider(
init_py_path.write_text(init_py_content)
# TODO(potiuk) - remove this if when we move all providers to new structure
if provider_details.is_new_structure:
_generate_get_provider_info_py(context, provider_details)
_generate_readme_rst(context, provider_details)
_generate_pyproject(context, provider_details)
shutil.copy(AIRFLOW_SOURCES_ROOT / "LICENSE", provider_details.base_provider_package_path / "LICENSE")
_regenerate_pyproject_toml(context, provider_details)
_generate_get_provider_info_py(context, provider_details)
shutil.copy(
BREEZE_SOURCES_DIR / "airflow_breeze" / "templates" / "PROVIDER_LICENSE.txt",
provider_details.base_provider_package_path / "LICENSE",
)


def _replace_min_airflow_version_in_provider_yaml(
context: dict[str, Any],
target_path: Path,
provider_yaml_path: Path,
):
provider_yaml_path = target_path / "provider.yaml"
provider_yaml_txt = provider_yaml_path.read_text()
provider_yaml_txt = re.sub(
r" {2}- apache-airflow>=.*",
Expand Down Expand Up @@ -1246,5 +1280,5 @@ def update_min_airflow_version_and_build_files(
provider_details=provider_details,
)
_replace_min_airflow_version_in_provider_yaml(
context=jinja_context, target_path=provider_details.root_provider_path
context=jinja_context, provider_yaml_path=provider_details.provider_yaml_path
)
Loading

0 comments on commit 25aeb11

Please sign in to comment.