Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Flytekit] Support extra copy commands in ImageSpec #2715

Merged
merged 30 commits into from
Sep 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
2f0d516
feat: support building images with extra copy commands
mao3267 Aug 27, 2024
f675359
feat: add `copy` function in ImageSpec
mao3267 Aug 27, 2024
4808d32
test: build image with extra copy cmds
mao3267 Aug 27, 2024
53b859c
test: ImageSpec with extra copy cmds
mao3267 Aug 27, 2024
4f9dafe
fix: f-string syntax error in copy command
mao3267 Aug 27, 2024
76520a0
fix: rename copy function and related arguments
mao3267 Sep 4, 2024
9a9707d
Merge branch 'master' of https://github.com/mao3267/flytekit into add…
mao3267 Sep 4, 2024
e390c51
fix: update image name hash
mao3267 Sep 4, 2024
d1adcee
Merge branch 'master' of https://github.com/mao3267/flytekit into add…
mao3267 Sep 12, 2024
a8bbb1a
fix: update digest with copy files
mao3267 Sep 12, 2024
e2ec68f
feat: support computing digest from list of files
mao3267 Sep 12, 2024
02611de
fix: update image tag
mao3267 Sep 12, 2024
948447e
Merge branch 'master' of https://github.com/mao3267/flytekit into add…
mao3267 Sep 12, 2024
a619773
fix: remove ignore logic
mao3267 Sep 13, 2024
e8545f3
fix: lint
mao3267 Sep 13, 2024
1c46800
fix: add source_copy_node
mao3267 Sep 14, 2024
eca2063
fix: copy local files to tmp_dir for docker copy
mao3267 Sep 17, 2024
dea9233
test: update default builder test and files for copy
mao3267 Sep 17, 2024
954b27b
fix: f-string related error
mao3267 Sep 17, 2024
4d5215c
fix: lint
mao3267 Sep 17, 2024
8f0e3af
feat: support list of strings and strings in `with_copy`
mao3267 Sep 18, 2024
3b4070d
fix: change ImageSpec.copy to list[str]
mao3267 Sep 18, 2024
b2f5659
Merge branch 'master' of https://github.com/mao3267/flytekit into add…
mao3267 Sep 20, 2024
b7e37dd
fix: remain source tree structure
mao3267 Sep 21, 2024
53ddf88
fix: path separator diff on windows
mao3267 Sep 21, 2024
e9b1c96
fix: use update_attribute instead
mao3267 Sep 25, 2024
ff7699f
rm: remove test files
mao3267 Sep 25, 2024
b1ee46a
fix: test with tmp dirs/files
mao3267 Sep 25, 2024
4e347b9
fix: path separator error
mao3267 Sep 25, 2024
a15940d
nit
pingsutw Sep 25, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 26 additions & 1 deletion flytekit/image_spec/default_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@

$COPY_COMMAND_RUNTIME
RUN $RUN_COMMANDS
$EXTRA_COPY_CMDS

WORKDIR /root
SHELL ["/bin/bash", "-c"]
Expand Down Expand Up @@ -221,6 +222,28 @@ def create_docker_context(image_spec: ImageSpec, tmp_dir: Path):
else:
run_commands = ""

if image_spec.copy:
copy_commands = []
for src in image_spec.copy:
src_path = Path(src)

if src_path.is_absolute() or ".." in src_path.parts:
raise ValueError("Absolute paths or paths with '..' are not allowed in COPY command.")

dst_path = tmp_dir / src_path
dst_path.parent.mkdir(parents=True, exist_ok=True)

if src_path.is_dir():
shutil.copytree(src_path, dst_path, dirs_exist_ok=True)
copy_commands.append(f"COPY --chown=flytekit {src_path.as_posix()} /root/{src_path.as_posix()}/")
else:
shutil.copy(src_path, dst_path)
copy_commands.append(f"COPY --chown=flytekit {src_path.as_posix()} /root/{src_path.parent.as_posix()}/")

extra_copy_cmds = "\n".join(copy_commands)
else:
extra_copy_cmds = ""

docker_content = DOCKER_FILE_TEMPLATE.substitute(
PYTHON_VERSION=python_version,
UV_PYTHON_INSTALL_COMMAND=uv_python_install_command,
Expand All @@ -232,6 +255,7 @@ def create_docker_context(image_spec: ImageSpec, tmp_dir: Path):
COPY_COMMAND_RUNTIME=copy_command_runtime,
ENTRYPOINT=entrypoint,
RUN_COMMANDS=run_commands,
EXTRA_COPY_CMDS=extra_copy_cmds,
)

dockerfile_path = tmp_dir / "Dockerfile"
Expand All @@ -247,7 +271,7 @@ class DefaultImageBuilder(ImageSpecBuilder):
"python_version",
"builder",
"source_root",
"copy",
"source_copy_mode",
"env",
"registry",
"packages",
Expand All @@ -263,6 +287,7 @@ class DefaultImageBuilder(ImageSpecBuilder):
"pip_extra_index_url",
# "registry_config",
"commands",
"copy",
}

def build_image(self, image_spec: ImageSpec) -> str:
Expand Down
14 changes: 14 additions & 0 deletions flytekit/image_spec/image_spec.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ class ImageSpec:
Python files into the image.

If the option is set by the user, then that option is of course used.
copy: List of files/directories to copy to /root. e.g. ["src/file1.txt", "src/file2.txt"]
"""

name: str = "flytekit"
Expand All @@ -84,6 +85,7 @@ class ImageSpec:
commands: Optional[List[str]] = None
tag_format: Optional[str] = None
source_copy_mode: Optional[CopyFileDetection] = None
copy: Optional[List[str]] = None

def __post_init__(self):
self.name = self.name.lower()
Expand Down Expand Up @@ -179,6 +181,12 @@ def tag(self) -> str:
# shortcut to represent all the files.
spec = dataclasses.replace(spec, source_root=ls_digest)

if self.copy:
from flytekit.tools.fast_registration import compute_digest

digest = compute_digest(self.copy, None)
spec = dataclasses.replace(spec, copy=digest)

if spec.requirements:
requirements = hashlib.sha1(pathlib.Path(spec.requirements).read_bytes().strip()).hexdigest()
spec = dataclasses.replace(spec, requirements=requirements)
Expand Down Expand Up @@ -306,6 +314,12 @@ def with_apt_packages(self, apt_packages: Union[str, List[str]]) -> "ImageSpec":
new_image_spec = self._update_attribute("apt_packages", apt_packages)
return new_image_spec

def with_copy(self, src: Union[str, List[str]]) -> "ImageSpec":
"""
Builder that returns a new image spec with the source files copied to the destination directory.
"""
return self._update_attribute("copy", src)

def force_push(self) -> "ImageSpec":
"""
Builder that returns a new image spec with force push enabled.
Expand Down
51 changes: 33 additions & 18 deletions flytekit/tools/fast_registration.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
import time
import typing
from dataclasses import dataclass
from typing import Optional
from typing import List, Optional, Union

import click
from rich import print as rich_print
Expand Down Expand Up @@ -170,30 +170,45 @@ def fast_package(
return archive_fname


def compute_digest(source: os.PathLike, filter: Optional[callable] = None) -> str:
def compute_digest(source: Union[os.PathLike, List[os.PathLike]], filter: Optional[callable] = None) -> str:
"""
Walks the entirety of the source dir to compute a deterministic md5 hex digest of the dir contents.
:param os.PathLike source:
:param callable filter:
:return Text:
"""
hasher = hashlib.md5()
for root, _, files in os.walk(source, topdown=True):
files.sort()

for fname in files:
abspath = os.path.join(root, fname)
# Only consider files that exist (e.g. disregard symlinks that point to non-existent files)
if not os.path.exists(abspath):
logger.info(f"Skipping non-existent file {abspath}")
continue
relpath = os.path.relpath(abspath, source)
if filter:
if filter(relpath):
continue

_filehash_update(abspath, hasher)
_pathhash_update(relpath, hasher)

def compute_digest_for_file(path: os.PathLike, rel_path: os.PathLike) -> None:
# Only consider files that exist (e.g. disregard symlinks that point to non-existent files)
if not os.path.exists(path):
logger.info(f"Skipping non-existent file {path}")
return

if filter:
if filter(rel_path):
return

_filehash_update(path, hasher)
_pathhash_update(rel_path, hasher)

def compute_digest_for_dir(source: os.PathLike) -> None:
for root, _, files in os.walk(source, topdown=True):
files.sort()

for fname in files:
abspath = os.path.join(root, fname)
relpath = os.path.relpath(abspath, source)
compute_digest_for_file(abspath, relpath)

if isinstance(source, list):
for src in source:
if os.path.isdir(src):
compute_digest_for_dir(src)
else:
compute_digest_for_file(src, os.path.basename(src))
else:
compute_digest_for_dir(source)

return hasher.hexdigest()

Expand Down
40 changes: 24 additions & 16 deletions tests/flytekit/unit/core/image_spec/test_default_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@
from flytekit.image_spec import ImageSpec
from flytekit.image_spec.default_builder import DefaultImageBuilder, create_docker_context
from flytekit.constants import CopyFileDetection

from pathlib import Path
import tempfile

def test_create_docker_context(tmp_path):
docker_context_path = tmp_path / "builder_root"
Expand All @@ -21,22 +22,27 @@ def test_create_docker_context(tmp_path):
other_requirements_path = tmp_path / "requirements.txt"
other_requirements_path.write_text("threadpoolctl\n")

image_spec = ImageSpec(
name="FLYTEKIT",
python_version="3.12",
env={"MY_ENV": "MY_VALUE"},
apt_packages=["curl"],
conda_packages=["scipy==1.13.0", "numpy"],
packages=["pandas==2.2.1"],
requirements=os.fspath(other_requirements_path),
source_root=os.fspath(source_root),
commands=["mkdir my_dir"],
entrypoint=["/bin/bash"],
pip_extra_index_url=["https://extra-url.com"],
source_copy_mode=CopyFileDetection.ALL,
)
with tempfile.TemporaryDirectory(dir=Path.cwd().as_posix()) as tmp_dir:
tmp_file = Path(tmp_dir) / "copy_file.txt"
tmp_file.write_text("copy_file_content")

image_spec = ImageSpec(
name="FLYTEKIT",
python_version="3.12",
env={"MY_ENV": "MY_VALUE"},
apt_packages=["curl"],
conda_packages=["scipy==1.13.0", "numpy"],
packages=["pandas==2.2.1"],
requirements=os.fspath(other_requirements_path),
source_root=os.fspath(source_root),
commands=["mkdir my_dir"],
entrypoint=["/bin/bash"],
pip_extra_index_url=["https://extra-url.com"],
source_copy_mode=CopyFileDetection.ALL,
copy=[tmp_file.relative_to(Path.cwd()).as_posix()],
)

create_docker_context(image_spec, docker_context_path)
create_docker_context(image_spec, docker_context_path)

dockerfile_path = docker_context_path / "Dockerfile"
assert dockerfile_path.exists()
Expand All @@ -51,6 +57,7 @@ def test_create_docker_context(tmp_path):
assert "RUN mkdir my_dir" in dockerfile_content
assert "ENTRYPOINT [\"/bin/bash\"]" in dockerfile_content
assert "mkdir -p $HOME" in dockerfile_content
assert f"COPY --chown=flytekit {tmp_file.relative_to(Path.cwd()).as_posix()} /root/" in dockerfile_content

requirements_path = docker_context_path / "requirements_uv.txt"
assert requirements_path.exists()
Expand Down Expand Up @@ -179,6 +186,7 @@ def test_build(tmp_path):
requirements=os.fspath(other_requirements_path),
source_root=os.fspath(source_root),
commands=["mkdir my_dir"],
copy=[f"{tmp_path}/hello_world.txt", f"{tmp_path}/requirements.txt"]
)

builder = DefaultImageBuilder()
Expand Down
5 changes: 4 additions & 1 deletion tests/flytekit/unit/core/image_spec/test_image_spec.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,12 +32,14 @@ def test_image_spec(mock_image_spec_builder, monkeypatch):
requirements=REQUIREMENT_FILE,
registry_config=REGISTRY_CONFIG_FILE,
entrypoint=["/bin/bash"],
copy=["/src/file1.txt"]
)
assert image_spec._is_force_push is False

image_spec = image_spec.with_commands("echo hello")
image_spec = image_spec.with_packages("numpy")
image_spec = image_spec.with_apt_packages("wget")
image_spec = image_spec.with_copy(["/src", "/src/file2.txt"])
image_spec = image_spec.force_push()

assert image_spec.python_version == "3.8"
Expand All @@ -58,8 +60,9 @@ def test_image_spec(mock_image_spec_builder, monkeypatch):
assert image_spec.commands == ["echo hello"]
assert image_spec._is_force_push is True
assert image_spec.entrypoint == ["/bin/bash"]
assert image_spec.copy == ["/src/file1.txt", "/src", "/src/file2.txt"]

assert image_spec.image_name() == f"localhost:30001/flytekit:nDg0IzEKso7jtbBnpLWTnw"
assert image_spec.image_name() == f"localhost:30001/flytekit:fYU5EUF6y0b2oFG4tu70tA"
ctx = context_manager.FlyteContext.current_context()
with context_manager.FlyteContextManager.with_context(
ctx.with_execution_state(ctx.execution_state.with_params(mode=ExecutionState.Mode.TASK_EXECUTION))
Expand Down
Loading