Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

from __future__ import annotations

try:
from airflow.dag_processing.bundles.base import get_bundle_permissions # type: ignore[attr-defined]
except ImportError:
# Compatibility for Airflow < 3.2.0
def get_bundle_permissions():
return 0o755, 0o600 # Directories can be listed and entered, but file contents are inaccessible
2 changes: 1 addition & 1 deletion providers/git/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ requires-python = ">=3.10"
# After you modify the dependencies, and rebuild your Breeze CI image with ``breeze ci-image build``
dependencies = [
"apache-airflow>=3.0.0",
"apache-airflow-providers-common-compat>=1.10.1",
"apache-airflow-providers-common-compat>=1.10.1", # use next version
"GitPython>=3.1.44",
]

Expand Down
65 changes: 64 additions & 1 deletion providers/git/src/airflow/providers/git/bundles/git.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@

import os
import shutil
from contextlib import nullcontext
import subprocess
from contextlib import nullcontext, suppress
from pathlib import Path
from urllib.parse import urlparse

Expand All @@ -28,12 +29,64 @@
from tenacity import retry, retry_if_exception_type, stop_after_attempt

from airflow.dag_processing.bundles.base import BaseDagBundle
from airflow.providers.common.compat.bundles import get_bundle_permissions
from airflow.providers.common.compat.sdk import AirflowException
from airflow.providers.git.hooks.git import GitHook

log = structlog.get_logger(__name__)


def _apply_permissions_recursively(path: Path) -> None:
"""
Apply configured bundle permissions to a directory tree.

This ensures that when user impersonation is used, the impersonated user
can access the cloned repository files. Permissions are applied at clone
time regardless of whether all or only some tasks use run_as_user, because:
1. DAG parsing needs access before task execution
2. Bundles may serve multiple DAGs with different impersonation settings
3. Applying permissions upfront provides a consistent security model

:param path: The root path to apply permissions to recursively
"""
folder_perms, file_perms = get_bundle_permissions()
# While individual chmod failures are suppressed, os.walk errors are more serious - so they're raised.
for root, dirs, files in os.walk(path):
root_path = Path(root)
with suppress(OSError):
root_path.chmod(folder_perms)
for d in dirs:
with suppress(OSError):
(root_path / d).chmod(folder_perms)
for f in files:
with suppress(OSError):
(root_path / f).chmod(file_perms)


def _configure_git_safe_directory(path: Path) -> None:
"""
Add path to git safe.directory to allow cross-user access.

Git 2.35.2+ refuses to operate on repositories owned by different users without explicit safe directory
configuration. This is needed when using user impersonation (run_as_user) where the repository is created by one
user but accessed by another.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What if the run_as_user is configured in only one task?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is perhaps slightly more permissive than strictly necessary. However, even if only one task uses run_as_user, the repository still needs to be accessible during DAG parsing, and having consistent permissions simplifies the security model.

How about I add the following docstring explaining this design decision?

def _apply_permissions_recursively(path: Path) -> None:
    """
    Apply configured bundle permissions to a directory tree.
    
    This ensures that when user impersonation is used, the impersonated user
    can access the cloned repository files.  Permissions are applied at clone
    time regardless of whether all or only some tasks use run_as_user, because: 
    1. DAG parsing needs access before task execution
    2. Bundles may serve multiple DAGs with different impersonation settings
    3. Applying permissions upfront provides a consistent security model
    
    :param path: The root path to apply permissions to recursively
    """


Note: This modifies the global git config (~/.gitconfig) because git's `safe.directory` setting must be in global
or system config to take effect. Repo-local config is ignored for security reasons.

:param path: The repository path to add as a safe directory
"""
try:
subprocess.run(
["git", "config", "--global", "--add", "safe.directory", str(path)],
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I’m not sure if this is a good idea… I’d not expect my global Git config to be silently changed. Can this be repo-local instead?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also, why does this need to use a subprocess instead of the GitPython API?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  • Why global: I believe repository-local config is ignored for security reasons (otherwise malicious repos could whitelist themselves). Airflow typically runs in controlled environments (containers/VMs), modifying the global git config for the airflow user should be acceptable.
  • Why not GitPython:
    1. We need to configure safe.directory before creating the Repo object (L234), because GitPython internally runs git commands during Repo initialization. If the ownership check fails at that point, we get an exception before we can configure anything.
    2. While GitPython has GitConfigParser that can write to any config file, using it for ~/.gitconfig would essentially reimplement git config --global with more code.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Another approach to consider:

If cloning a bare repository or a repository intended for multiple users (using git clone --shared), Git can be told to ignore the umask and use a specific scheme via the core.sharedRepository setting:

  • group (or true): Makes the repository group-writable (ignores umask for the group bits).
  • all (or world): Makes the repository readable by everyone.
  • 0xxx: An explicit octal mode (e.g., 0660).

check=False,
capture_output=True,
text=True,
)
except Exception as e:
log.debug("Could not configure git safe.directory for %s: %s", path, e)


class GitDagBundle(BaseDagBundle):
"""
git DAG bundle - exposes a git repository as a DAG bundle.
Expand Down Expand Up @@ -170,8 +223,13 @@ def _clone_repo_if_required(self) -> None:
url=self.bare_repo_path,
to_path=self.repo_path,
)
# Apply permissions for multi-user access (e.g., impersonation)
_apply_permissions_recursively(self.repo_path)
else:
self._log.debug("repo exists", repo_path=self.repo_path)

# Configure git safe.directory for cross-user access (git 2.35.2+)
_configure_git_safe_directory(self.repo_path)
self.repo = Repo(self.repo_path)
except NoSuchPathError as e:
# Protection should the bare repo be removed manually
Expand Down Expand Up @@ -204,6 +262,11 @@ def _clone_bare_repo_if_required(self) -> None:
bare=True,
env=self.hook.env if self.hook else None,
)
# Apply permissions for multi-user access (e.g., impersonation)
_apply_permissions_recursively(self.bare_repo_path)

# Configure git safe.directory for cross-user access (git 2.35.2+)
_configure_git_safe_directory(self.bare_repo_path)
self.bare_repo = Repo(self.bare_repo_path)

# Fetch to ensure we have latest refs and validate repo integrity
Expand Down
46 changes: 46 additions & 0 deletions providers/git/tests/unit/git/bundles/test_git.py
Original file line number Diff line number Diff line change
Expand Up @@ -976,3 +976,49 @@ def test_submodule_fetch_error_raises_runtime_error(
bundle.initialize()

mock_rmtree.assert_not_called()

@mock.patch("airflow.providers.git.bundles.git.get_bundle_permissions")
@mock.patch("airflow.providers.git.bundles.git.GitHook")
def test_clone_applies_permissions(self, mock_githook, mock_get_perms, git_repo, bundle_temp_dir):
"""Test that cloning applies configured permissions to repository directories."""
import stat

repo_path, repo = git_repo
mock_githook.return_value.repo_url = repo_path
# Mock get_bundle_permissions to return the desired test values
mock_get_perms.return_value = (0o775, 0o664)

bundle = GitDagBundle(
name="test_perms",
git_conn_id=CONN_HTTPS,
tracking_ref=GIT_DEFAULT_BRANCH,
)
bundle.initialize()

# Check bare repo permissions
bare_mode = stat.S_IMODE(bundle.bare_repo_path.stat().st_mode)
assert bare_mode == 0o775

# Check tracking repo permissions
repo_mode = stat.S_IMODE(bundle.repo_path.stat().st_mode)
assert repo_mode == 0o775

@mock.patch("airflow.providers.git.bundles.git.GitHook")
@mock.patch("airflow.providers.git.bundles.git._configure_git_safe_directory")
def test_safe_directory_configured(self, mock_safe_dir, mock_githook, git_repo):
"""Test that git safe.directory is configured for cross-user access."""
repo_path, repo = git_repo
mock_githook.return_value.repo_url = repo_path

bundle = GitDagBundle(
name="test_safe",
git_conn_id=CONN_HTTPS,
tracking_ref=GIT_DEFAULT_BRANCH,
)
bundle.initialize()

# Verify safe.directory was called for both repos
calls = mock_safe_dir.call_args_list
assert len(calls) >= 2 # At least once for bare and once for version repo
paths_called = [str(call[0][0]) for call in calls]
assert any("bare" in p for p in paths_called)
Loading