-
Notifications
You must be signed in to change notification settings - Fork 16.4k
Apply configured bundle permissions to cloned repos for impersonation #60280
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,25 @@ | ||
| # Licensed to the Apache Software Foundation (ASF) under one | ||
| # or more contributor license agreements. See the NOTICE file | ||
| # distributed with this work for additional information | ||
| # regarding copyright ownership. The ASF licenses this file | ||
| # to you under the Apache License, Version 2.0 (the | ||
| # "License"); you may not use this file except in compliance | ||
| # with the License. You may obtain a copy of the License at | ||
| # | ||
| # http://www.apache.org/licenses/LICENSE-2.0 | ||
| # | ||
| # Unless required by applicable law or agreed to in writing, | ||
| # software distributed under the License is distributed on an | ||
| # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
| # KIND, either express or implied. See the License for the | ||
| # specific language governing permissions and limitations | ||
| # under the License. | ||
|
|
||
| from __future__ import annotations | ||
|
|
||
| try: | ||
| from airflow.dag_processing.bundles.base import get_bundle_permissions # type: ignore[attr-defined] | ||
| except ImportError: | ||
| # Compatibility for Airflow < 3.2.0 | ||
| def get_bundle_permissions(): | ||
| return 0o755, 0o600 # Directories can be listed and entered, but file contents are inaccessible |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -18,7 +18,8 @@ | |
|
|
||
| import os | ||
| import shutil | ||
| from contextlib import nullcontext | ||
| import subprocess | ||
| from contextlib import nullcontext, suppress | ||
| from pathlib import Path | ||
| from urllib.parse import urlparse | ||
|
|
||
|
|
@@ -28,12 +29,64 @@ | |
| from tenacity import retry, retry_if_exception_type, stop_after_attempt | ||
|
|
||
| from airflow.dag_processing.bundles.base import BaseDagBundle | ||
| from airflow.providers.common.compat.bundles import get_bundle_permissions | ||
| from airflow.providers.common.compat.sdk import AirflowException | ||
| from airflow.providers.git.hooks.git import GitHook | ||
|
|
||
| log = structlog.get_logger(__name__) | ||
|
|
||
|
|
||
| def _apply_permissions_recursively(path: Path) -> None: | ||
| """ | ||
| Apply configured bundle permissions to a directory tree. | ||
|
|
||
| This ensures that when user impersonation is used, the impersonated user | ||
| can access the cloned repository files. Permissions are applied at clone | ||
| time regardless of whether all or only some tasks use run_as_user, because: | ||
| 1. DAG parsing needs access before task execution | ||
| 2. Bundles may serve multiple DAGs with different impersonation settings | ||
| 3. Applying permissions upfront provides a consistent security model | ||
|
|
||
| :param path: The root path to apply permissions to recursively | ||
| """ | ||
| folder_perms, file_perms = get_bundle_permissions() | ||
| # While individual chmod failures are suppressed, os.walk errors are more serious - so they're raised. | ||
| for root, dirs, files in os.walk(path): | ||
| root_path = Path(root) | ||
| with suppress(OSError): | ||
| root_path.chmod(folder_perms) | ||
| for d in dirs: | ||
| with suppress(OSError): | ||
| (root_path / d).chmod(folder_perms) | ||
| for f in files: | ||
| with suppress(OSError): | ||
| (root_path / f).chmod(file_perms) | ||
|
|
||
|
|
||
| def _configure_git_safe_directory(path: Path) -> None: | ||
| """ | ||
| Add path to git safe.directory to allow cross-user access. | ||
|
|
||
| Git 2.35.2+ refuses to operate on repositories owned by different users without explicit safe directory | ||
| configuration. This is needed when using user impersonation (run_as_user) where the repository is created by one | ||
| user but accessed by another. | ||
|
|
||
| Note: This modifies the global git config (~/.gitconfig) because git's `safe.directory` setting must be in global | ||
| or system config to take effect. Repo-local config is ignored for security reasons. | ||
|
|
||
| :param path: The repository path to add as a safe directory | ||
| """ | ||
| try: | ||
| subprocess.run( | ||
| ["git", "config", "--global", "--add", "safe.directory", str(path)], | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I’m not sure if this is a good idea… I’d not expect my global Git config to be silently changed. Can this be repo-local instead?
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Also, why does this need to use a subprocess instead of the GitPython API?
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Another approach to consider:
|
||
| check=False, | ||
| capture_output=True, | ||
| text=True, | ||
| ) | ||
| except Exception as e: | ||
| log.debug("Could not configure git safe.directory for %s: %s", path, e) | ||
|
|
||
|
|
||
| class GitDagBundle(BaseDagBundle): | ||
| """ | ||
| git DAG bundle - exposes a git repository as a DAG bundle. | ||
|
|
@@ -170,8 +223,13 @@ def _clone_repo_if_required(self) -> None: | |
| url=self.bare_repo_path, | ||
| to_path=self.repo_path, | ||
| ) | ||
| # Apply permissions for multi-user access (e.g., impersonation) | ||
| _apply_permissions_recursively(self.repo_path) | ||
| else: | ||
| self._log.debug("repo exists", repo_path=self.repo_path) | ||
|
|
||
| # Configure git safe.directory for cross-user access (git 2.35.2+) | ||
| _configure_git_safe_directory(self.repo_path) | ||
| self.repo = Repo(self.repo_path) | ||
| except NoSuchPathError as e: | ||
| # Protection should the bare repo be removed manually | ||
|
|
@@ -204,6 +262,11 @@ def _clone_bare_repo_if_required(self) -> None: | |
| bare=True, | ||
| env=self.hook.env if self.hook else None, | ||
| ) | ||
| # Apply permissions for multi-user access (e.g., impersonation) | ||
| _apply_permissions_recursively(self.bare_repo_path) | ||
|
|
||
| # Configure git safe.directory for cross-user access (git 2.35.2+) | ||
| _configure_git_safe_directory(self.bare_repo_path) | ||
| self.bare_repo = Repo(self.bare_repo_path) | ||
|
|
||
| # Fetch to ensure we have latest refs and validate repo integrity | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What if the run_as_user is configured in only one task?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is perhaps slightly more permissive than strictly necessary. However, even if only one task uses
run_as_user, the repository still needs to be accessible during DAG parsing, and having consistent permissions simplifies the security model.How about I add the following docstring explaining this design decision?