Skip to content

Commit

Permalink
Speed up fixing-ownership when leaving breeze on Linux (apache#45805)
Browse files Browse the repository at this point in the history
When leaving breeze on Linux we are fixing ownership of potential
new generated files to be the same as the HOST user - because files
created in container will be owned by root.

This takes quite some time however, especially when you have
.venv or node_modules or other folders with large amount of files.

This change skips such files from being considered in fixing ownership
if the folders are already properly owned.
  • Loading branch information
potiuk authored Jan 20, 2025
1 parent 6d20d7b commit 24b1fe8
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 20 deletions.
2 changes: 2 additions & 0 deletions dev/breeze/src/airflow_breeze/utils/docker_command_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -579,6 +579,8 @@ def fix_ownership_using_docker(quiet: bool = False):
"-e",
f"HOST_GROUP_ID={get_host_group_id()}",
"-e",
f"VERBOSE={str(get_verbose()).lower()}",
"-e",
f"DOCKER_IS_ROOTLESS={is_docker_rootless()}",
"--rm",
"-t",
Expand Down
54 changes: 34 additions & 20 deletions scripts/in_container/run_fix_ownership.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,27 +44,41 @@ def change_ownership_of_files(path: Path) -> None:
sys.exit(1)
count_files = 0
root_uid = pwd.getpwnam("root").pw_uid
for file in path.rglob("*"):
try:
if file.is_symlink() and file.lstat().st_uid == root_uid:
# Change ownership of symlink itself (by default stat/chown follow the symlinks)
os.chown(file, int(host_user_id), int(host_group_id), follow_symlinks=False)
count_files += 1
skip_folders = {".venv", "node_modules", ".mypy_cache", ".pytest_cache", ".ruff_cache"}
for root, dirs, files in os.walk(path):
original_length = len(dirs)
original_dirs = dirs.copy()
# skip known big folders if they are not owned by root
dirs[:] = [d for d in dirs if d not in skip_folders or (Path(root) / d).stat().st_uid == root_uid]
new_length = len(dirs)
if new_length != original_length:
if os.environ.get("VERBOSE", "false") == "true":
print(
f"{root}: Skipped {original_length - new_length} "
f"folders: {set(original_dirs) - set(dirs)}"
)
for name in files:
file = Path(root) / name
try:
if file.is_symlink() and file.lstat().st_uid == root_uid:
# Change ownership of symlink itself (by default stat/chown follow the symlinks)
os.chown(file, int(host_user_id), int(host_group_id), follow_symlinks=False)
count_files += 1
if os.environ.get("VERBOSE_COMMANDS", "false") == "true":
print(f"Changed ownership of symlink {file}")
if file.stat().st_uid == root_uid:
# And here change ownership of the file (or if it is a symlink - the file it points to)
os.chown(file, int(host_user_id), int(host_group_id))
count_files += 1
if os.environ.get("VERBOSE_COMMANDS", "false") == "true":
print(f"Changed ownership of {file.resolve()}")
except FileNotFoundError:
# This is OK - file might have been deleted in the meantime or linked in Host from
# another place
if os.environ.get("VERBOSE_COMMANDS", "false") == "true":
print(f"Changed ownership of symlink {file}")
if file.stat().st_uid == root_uid:
# And here change ownership of the file (or if it is a symlink - the file it points to)
os.chown(file, int(host_user_id), int(host_group_id))
count_files += 1
if os.environ.get("VERBOSE_COMMANDS", "false") == "true":
print(f"Changed ownership of {file.resolve()}")
except FileNotFoundError:
# This is OK - file might have been deleted in the meantime or linked in Host from
# another place
if os.environ.get("VERBOSE_COMMANDS", "false") == "true":
print(f"Could not change ownership of {file}")
if count_files:
print(f"Changed ownership of {count_files} files back to {host_user_id}:{host_group_id}.")
print(f"Could not change ownership of {file}")
if count_files:
print(f"Changed ownership of {count_files} files back to {host_user_id}:{host_group_id}.")


if __name__ == "__main__":
Expand Down

0 comments on commit 24b1fe8

Please sign in to comment.