Skip to content

Commit

Permalink
[Flytekit] Support extra copy commands in ImageSpec (#2715)
Browse files Browse the repository at this point in the history
Signed-off-by: mao3267 <chenvincent610@gmail.com>
Signed-off-by: Kevin Su <pingsutw@apache.org>
Co-authored-by: Kevin Su <pingsutw@apache.org>
  • Loading branch information
mao3267 and pingsutw authored Sep 25, 2024
1 parent e60c152 commit 4e1ea68
Show file tree
Hide file tree
Showing 5 changed files with 101 additions and 36 deletions.
27 changes: 26 additions & 1 deletion flytekit/image_spec/default_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@
$COPY_COMMAND_RUNTIME
RUN $RUN_COMMANDS
$EXTRA_COPY_CMDS
WORKDIR /root
SHELL ["/bin/bash", "-c"]
Expand Down Expand Up @@ -221,6 +222,28 @@ def create_docker_context(image_spec: ImageSpec, tmp_dir: Path):
else:
run_commands = ""

if image_spec.copy:
copy_commands = []
for src in image_spec.copy:
src_path = Path(src)

if src_path.is_absolute() or ".." in src_path.parts:
raise ValueError("Absolute paths or paths with '..' are not allowed in COPY command.")

dst_path = tmp_dir / src_path
dst_path.parent.mkdir(parents=True, exist_ok=True)

if src_path.is_dir():
shutil.copytree(src_path, dst_path, dirs_exist_ok=True)
copy_commands.append(f"COPY --chown=flytekit {src_path.as_posix()} /root/{src_path.as_posix()}/")
else:
shutil.copy(src_path, dst_path)
copy_commands.append(f"COPY --chown=flytekit {src_path.as_posix()} /root/{src_path.parent.as_posix()}/")

extra_copy_cmds = "\n".join(copy_commands)
else:
extra_copy_cmds = ""

docker_content = DOCKER_FILE_TEMPLATE.substitute(
PYTHON_VERSION=python_version,
UV_PYTHON_INSTALL_COMMAND=uv_python_install_command,
Expand All @@ -232,6 +255,7 @@ def create_docker_context(image_spec: ImageSpec, tmp_dir: Path):
COPY_COMMAND_RUNTIME=copy_command_runtime,
ENTRYPOINT=entrypoint,
RUN_COMMANDS=run_commands,
EXTRA_COPY_CMDS=extra_copy_cmds,
)

dockerfile_path = tmp_dir / "Dockerfile"
Expand All @@ -247,7 +271,7 @@ class DefaultImageBuilder(ImageSpecBuilder):
"python_version",
"builder",
"source_root",
"copy",
"source_copy_mode",
"env",
"registry",
"packages",
Expand All @@ -263,6 +287,7 @@ class DefaultImageBuilder(ImageSpecBuilder):
"pip_extra_index_url",
# "registry_config",
"commands",
"copy",
}

def build_image(self, image_spec: ImageSpec) -> str:
Expand Down
14 changes: 14 additions & 0 deletions flytekit/image_spec/image_spec.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ class ImageSpec:
Python files into the image.
If the option is set by the user, then that option is of course used.
copy: List of files/directories to copy to /root. e.g. ["src/file1.txt", "src/file2.txt"]
"""

name: str = "flytekit"
Expand All @@ -84,6 +85,7 @@ class ImageSpec:
commands: Optional[List[str]] = None
tag_format: Optional[str] = None
source_copy_mode: Optional[CopyFileDetection] = None
copy: Optional[List[str]] = None

def __post_init__(self):
self.name = self.name.lower()
Expand Down Expand Up @@ -179,6 +181,12 @@ def tag(self) -> str:
# shortcut to represent all the files.
spec = dataclasses.replace(spec, source_root=ls_digest)

if self.copy:
from flytekit.tools.fast_registration import compute_digest

digest = compute_digest(self.copy, None)
spec = dataclasses.replace(spec, copy=digest)

if spec.requirements:
requirements = hashlib.sha1(pathlib.Path(spec.requirements).read_bytes().strip()).hexdigest()
spec = dataclasses.replace(spec, requirements=requirements)
Expand Down Expand Up @@ -306,6 +314,12 @@ def with_apt_packages(self, apt_packages: Union[str, List[str]]) -> "ImageSpec":
new_image_spec = self._update_attribute("apt_packages", apt_packages)
return new_image_spec

def with_copy(self, src: Union[str, List[str]]) -> "ImageSpec":
"""
Builder that returns a new image spec with the source files copied to the destination directory.
"""
return self._update_attribute("copy", src)

def force_push(self) -> "ImageSpec":
"""
Builder that returns a new image spec with force push enabled.
Expand Down
51 changes: 33 additions & 18 deletions flytekit/tools/fast_registration.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
import time
import typing
from dataclasses import dataclass
from typing import Optional
from typing import List, Optional, Union

import click
from rich import print as rich_print
Expand Down Expand Up @@ -170,30 +170,45 @@ def fast_package(
return archive_fname


def compute_digest(source: os.PathLike, filter: Optional[callable] = None) -> str:
def compute_digest(source: Union[os.PathLike, List[os.PathLike]], filter: Optional[callable] = None) -> str:
"""
Walks the entirety of the source dir to compute a deterministic md5 hex digest of the dir contents.
:param os.PathLike source:
:param callable filter:
:return Text:
"""
hasher = hashlib.md5()
for root, _, files in os.walk(source, topdown=True):
files.sort()

for fname in files:
abspath = os.path.join(root, fname)
# Only consider files that exist (e.g. disregard symlinks that point to non-existent files)
if not os.path.exists(abspath):
logger.info(f"Skipping non-existent file {abspath}")
continue
relpath = os.path.relpath(abspath, source)
if filter:
if filter(relpath):
continue

_filehash_update(abspath, hasher)
_pathhash_update(relpath, hasher)

def compute_digest_for_file(path: os.PathLike, rel_path: os.PathLike) -> None:
# Only consider files that exist (e.g. disregard symlinks that point to non-existent files)
if not os.path.exists(path):
logger.info(f"Skipping non-existent file {path}")
return

if filter:
if filter(rel_path):
return

_filehash_update(path, hasher)
_pathhash_update(rel_path, hasher)

def compute_digest_for_dir(source: os.PathLike) -> None:
for root, _, files in os.walk(source, topdown=True):
files.sort()

for fname in files:
abspath = os.path.join(root, fname)
relpath = os.path.relpath(abspath, source)
compute_digest_for_file(abspath, relpath)

if isinstance(source, list):
for src in source:
if os.path.isdir(src):
compute_digest_for_dir(src)
else:
compute_digest_for_file(src, os.path.basename(src))
else:
compute_digest_for_dir(source)

return hasher.hexdigest()

Expand Down
40 changes: 24 additions & 16 deletions tests/flytekit/unit/core/image_spec/test_default_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@
from flytekit.image_spec import ImageSpec
from flytekit.image_spec.default_builder import DefaultImageBuilder, create_docker_context
from flytekit.constants import CopyFileDetection

from pathlib import Path
import tempfile

def test_create_docker_context(tmp_path):
docker_context_path = tmp_path / "builder_root"
Expand All @@ -21,22 +22,27 @@ def test_create_docker_context(tmp_path):
other_requirements_path = tmp_path / "requirements.txt"
other_requirements_path.write_text("threadpoolctl\n")

image_spec = ImageSpec(
name="FLYTEKIT",
python_version="3.12",
env={"MY_ENV": "MY_VALUE"},
apt_packages=["curl"],
conda_packages=["scipy==1.13.0", "numpy"],
packages=["pandas==2.2.1"],
requirements=os.fspath(other_requirements_path),
source_root=os.fspath(source_root),
commands=["mkdir my_dir"],
entrypoint=["/bin/bash"],
pip_extra_index_url=["https://extra-url.com"],
source_copy_mode=CopyFileDetection.ALL,
)
with tempfile.TemporaryDirectory(dir=Path.cwd().as_posix()) as tmp_dir:
tmp_file = Path(tmp_dir) / "copy_file.txt"
tmp_file.write_text("copy_file_content")

image_spec = ImageSpec(
name="FLYTEKIT",
python_version="3.12",
env={"MY_ENV": "MY_VALUE"},
apt_packages=["curl"],
conda_packages=["scipy==1.13.0", "numpy"],
packages=["pandas==2.2.1"],
requirements=os.fspath(other_requirements_path),
source_root=os.fspath(source_root),
commands=["mkdir my_dir"],
entrypoint=["/bin/bash"],
pip_extra_index_url=["https://extra-url.com"],
source_copy_mode=CopyFileDetection.ALL,
copy=[tmp_file.relative_to(Path.cwd()).as_posix()],
)

create_docker_context(image_spec, docker_context_path)
create_docker_context(image_spec, docker_context_path)

dockerfile_path = docker_context_path / "Dockerfile"
assert dockerfile_path.exists()
Expand All @@ -51,6 +57,7 @@ def test_create_docker_context(tmp_path):
assert "RUN mkdir my_dir" in dockerfile_content
assert "ENTRYPOINT [\"/bin/bash\"]" in dockerfile_content
assert "mkdir -p $HOME" in dockerfile_content
assert f"COPY --chown=flytekit {tmp_file.relative_to(Path.cwd()).as_posix()} /root/" in dockerfile_content

requirements_path = docker_context_path / "requirements_uv.txt"
assert requirements_path.exists()
Expand Down Expand Up @@ -179,6 +186,7 @@ def test_build(tmp_path):
requirements=os.fspath(other_requirements_path),
source_root=os.fspath(source_root),
commands=["mkdir my_dir"],
copy=[f"{tmp_path}/hello_world.txt", f"{tmp_path}/requirements.txt"]
)

builder = DefaultImageBuilder()
Expand Down
5 changes: 4 additions & 1 deletion tests/flytekit/unit/core/image_spec/test_image_spec.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,12 +32,14 @@ def test_image_spec(mock_image_spec_builder, monkeypatch):
requirements=REQUIREMENT_FILE,
registry_config=REGISTRY_CONFIG_FILE,
entrypoint=["/bin/bash"],
copy=["/src/file1.txt"]
)
assert image_spec._is_force_push is False

image_spec = image_spec.with_commands("echo hello")
image_spec = image_spec.with_packages("numpy")
image_spec = image_spec.with_apt_packages("wget")
image_spec = image_spec.with_copy(["/src", "/src/file2.txt"])
image_spec = image_spec.force_push()

assert image_spec.python_version == "3.8"
Expand All @@ -58,8 +60,9 @@ def test_image_spec(mock_image_spec_builder, monkeypatch):
assert image_spec.commands == ["echo hello"]
assert image_spec._is_force_push is True
assert image_spec.entrypoint == ["/bin/bash"]
assert image_spec.copy == ["/src/file1.txt", "/src", "/src/file2.txt"]

assert image_spec.image_name() == f"localhost:30001/flytekit:nDg0IzEKso7jtbBnpLWTnw"
assert image_spec.image_name() == f"localhost:30001/flytekit:fYU5EUF6y0b2oFG4tu70tA"
ctx = context_manager.FlyteContext.current_context()
with context_manager.FlyteContextManager.with_context(
ctx.with_execution_state(ctx.execution_state.with_params(mode=ExecutionState.Mode.TASK_EXECUTION))
Expand Down

0 comments on commit 4e1ea68

Please sign in to comment.