Skip to content

Commit

Permalink
Kubernetes projects: Change image-uri attribute to repository-uri (ml…
Browse files Browse the repository at this point in the history
…flow#1574)

* Image name > repo uri

* Tests fix

* Lint

* Fix tests

* Test fix
  • Loading branch information
dbczumar authored Jul 12, 2019
1 parent d15a33e commit 488dfaf
Show file tree
Hide file tree
Showing 7 changed files with 44 additions and 39 deletions.
6 changes: 3 additions & 3 deletions docs/source/projects.rst
Original file line number Diff line number Diff line change
Expand Up @@ -386,13 +386,13 @@ In project folder you need to create a ``backend_config.json`` with the followin
{
"kube-context": "docker-for-desktop",

"image-uri": "username/mlflow-kubernetes-example",
"repository-uri": "username/mlflow-kubernetes-example",

"kube-job-template-path": "kubernetes_job_template.yaml"
}

The ``kube-context`` attribute is the kubernetes context where mlflow will run the Job. ``image-uri`` points to the
registry/repository/image where the image will be pushed so kubernetes can download it and run. Remeber that mlflow
The ``kube-context`` attribute is the kubernetes context where mlflow will run the Job. ``repository-uri`` points to the
repository where the image will be pushed so kubernetes can download it and run. Remember that mlflow
expects that login credentials are already stored for both kubernetes context and docker repository to push images.

The ``kube-job-template-path`` points to a yaml file with the kubernetes Job/Batch specification to run the traning on
Expand Down
4 changes: 2 additions & 2 deletions examples/docker/kubernetes_config.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"kube-context": "docker-for-desktop",
"kube-job-template-path": "examples/docker/kubernetes_job_template.yaml",
"image-uri": "username/mlflow-kubernetes-example"
}
"repository-uri": "username/mlflow-kubernetes-example"
}
41 changes: 23 additions & 18 deletions mlflow/projects/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
import mlflow.projects.databricks
from mlflow.utils import process
from mlflow.utils.file_utils import path_to_local_sqlite_uri, path_to_local_file_uri
from mlflow.utils.mlflow_tags import MLFLOW_PROJECT_ENV, MLFLOW_DOCKER_IMAGE_NAME, \
from mlflow.utils.mlflow_tags import MLFLOW_PROJECT_ENV, MLFLOW_DOCKER_IMAGE_URI, \
MLFLOW_DOCKER_IMAGE_ID, MLFLOW_USER, MLFLOW_SOURCE_NAME, MLFLOW_SOURCE_TYPE, \
MLFLOW_GIT_COMMIT, MLFLOW_GIT_REPO_URL, MLFLOW_GIT_BRANCH, LEGACY_MLFLOW_GIT_REPO_URL, \
LEGACY_MLFLOW_GIT_BRANCH_NAME, MLFLOW_PROJECT_ENTRY_POINT, MLFLOW_PARENT_RUN_ID, \
Expand Down Expand Up @@ -131,7 +131,7 @@ def _run(uri, experiment_id, entry_point="main", version=None, parameters=None,
_validate_docker_env(project)
_validate_docker_installation()
image = _build_docker_image(work_dir=work_dir,
image_uri=project.name,
repository_uri=project.name,
base_image=project.docker_env.get('image'),
run_id=active_run.info.run_id)
command += _get_docker_command(image=image, active_run=active_run)
Expand Down Expand Up @@ -165,7 +165,7 @@ def _run(uri, experiment_id, entry_point="main", version=None, parameters=None,
_validate_docker_installation()
kube_config = _parse_kubernetes_config(backend_config)
image = _build_docker_image(work_dir=work_dir,
image_uri=kube_config["image-uri"],
repository_uri=kube_config["repository-uri"],
base_image=project.docker_env.get('image'),
run_id=active_run.info.run_id)
image_digest = kb.push_image_to_registry(image.tags[0])
Expand Down Expand Up @@ -752,8 +752,8 @@ def _parse_kubernetes_config(backend_config):
kube_job_template))
if 'kube-context' not in backend_config.keys():
raise ExecutionException("Could not find kube-context in backend_config.")
if 'image-uri' not in backend_config.keys():
raise ExecutionException("Could not find 'image-uri' in backend_config.")
if 'repository-uri' not in backend_config.keys():
raise ExecutionException("Could not find 'repository-uri' in backend_config.")
return kube_config


Expand All @@ -776,46 +776,51 @@ def _create_docker_build_ctx(work_dir, dockerfile_contents):
return result_path


def _build_docker_image(work_dir, image_uri, base_image, run_id):
def _build_docker_image(work_dir, repository_uri, base_image, run_id):
"""
Build a docker image containing the project in `work_dir`, using the base image.
"""
tag_name = _get_docker_tag_name(image_uri, work_dir)
image_uri = _get_docker_image_uri(repository_uri=repository_uri, work_dir=work_dir)
dockerfile = (
"FROM {imagename}\n"
"LABEL Name={tag_name}\n"
"COPY {build_context_path}/ /mlflow/projects/code/\n"
"WORKDIR /mlflow/projects/code/\n"
).format(imagename=base_image, tag_name=tag_name,
build_context_path=_PROJECT_TAR_ARCHIVE_NAME)
).format(imagename=base_image, build_context_path=_PROJECT_TAR_ARCHIVE_NAME)
build_ctx_path = _create_docker_build_ctx(work_dir, dockerfile)
with open(build_ctx_path, 'rb') as docker_build_ctx:
_logger.info("=== Building docker image %s ===", tag_name)
_logger.info("=== Building docker image %s ===", image_uri)
client = docker.from_env()
image, _ = client.images.build(
tag=tag_name, forcerm=True,
tag=image_uri, forcerm=True,
dockerfile=posixpath.join(_PROJECT_TAR_ARCHIVE_NAME, _GENERATED_DOCKERFILE_NAME),
fileobj=docker_build_ctx, custom_context=True, encoding="gzip")
try:
os.remove(build_ctx_path)
except Exception: # pylint: disable=broad-except
_logger.info("Temporary docker context file %s was not deleted.", build_ctx_path)
tracking.MlflowClient().set_tag(run_id,
MLFLOW_DOCKER_IMAGE_NAME,
tag_name)
MLFLOW_DOCKER_IMAGE_URI,
image_uri)
tracking.MlflowClient().set_tag(run_id,
MLFLOW_DOCKER_IMAGE_ID,
image.id)
return image


def _get_docker_tag_name(imagename, work_dir):
"""Returns an appropriate Docker tag for a project based on name and git hash."""
imagename = imagename if imagename else "docker-project"
def _get_docker_image_uri(repository_uri, work_dir):
"""
Returns an appropriate Docker image URI for a project based on the git hash of the specified
working directory.
:param repository_uri: The URI of the Docker repository with which to tag the image. The
repository URI is used as the prefix of the image URI.
:param work_dir: Path to the working directory in which to search for a git commit hash
"""
repository_uri = repository_uri if repository_uri else "docker-project"
# Optionally include first 7 digits of git SHA in tag name, if available.
git_commit = _get_git_commit(work_dir)
version_string = ":" + git_commit[:7] if git_commit else ""
return imagename + version_string
return repository_uri + version_string


__all__ = [
Expand Down
2 changes: 1 addition & 1 deletion mlflow/utils/mlflow_tags.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
MLFLOW_GIT_REPO_URL = "mlflow.source.git.repoURL"
MLFLOW_PROJECT_ENV = "mlflow.project.env"
MLFLOW_PROJECT_ENTRY_POINT = "mlflow.project.entryPoint"
MLFLOW_DOCKER_IMAGE_NAME = "mlflow.docker.image.name"
MLFLOW_DOCKER_IMAGE_URI = "mlflow.docker.image.uri"
MLFLOW_DOCKER_IMAGE_ID = "mlflow.docker.image.id"

MLFLOW_DATABRICKS_NOTEBOOK_ID = "mlflow.databricks.notebookID"
Expand Down
18 changes: 9 additions & 9 deletions tests/projects/test_docker_projects.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,9 @@

import mlflow
from mlflow.entities import ViewType
from mlflow.projects import ExecutionException, _get_docker_tag_name
from mlflow.projects import ExecutionException, _get_docker_image_uri
from mlflow.store import file_store
from mlflow.utils.mlflow_tags import MLFLOW_PROJECT_ENV, MLFLOW_DOCKER_IMAGE_NAME, \
from mlflow.utils.mlflow_tags import MLFLOW_PROJECT_ENV, MLFLOW_DOCKER_IMAGE_URI, \
MLFLOW_DOCKER_IMAGE_ID

from tests.projects.utils import TEST_DOCKER_PROJECT_DIR
Expand Down Expand Up @@ -46,7 +46,7 @@ def test_docker_project_execution(
assert run.data.metrics == {"some_key": 3}
exact_expected_tags = {MLFLOW_PROJECT_ENV: "docker"}
approx_expected_tags = {
MLFLOW_DOCKER_IMAGE_NAME: "docker-example",
MLFLOW_DOCKER_IMAGE_URI: "docker-example",
MLFLOW_DOCKER_IMAGE_ID: "sha256:",
}
run_tags = run.data.tags
Expand Down Expand Up @@ -92,18 +92,18 @@ def test_docker_uri_mode_validation(tracking_uri_mock): # pylint: disable=unuse


@mock.patch('mlflow.projects._get_git_commit')
def test_docker_tag_name_with_git(get_git_commit_mock):
def test_docker_image_uri_with_git(get_git_commit_mock):
get_git_commit_mock.return_value = '1234567890'
tag_name = _get_docker_tag_name("my_project", "my_workdir")
assert tag_name == "my_project:1234567"
image_uri = _get_docker_image_uri("my_project", "my_workdir")
assert image_uri == "my_project:1234567"
get_git_commit_mock.assert_called_with('my_workdir')


@mock.patch('mlflow.projects._get_git_commit')
def test_docker_tag_name_no_git(get_git_commit_mock):
def test_docker_image_uri_no_git(get_git_commit_mock):
get_git_commit_mock.return_value = None
tag_name = _get_docker_tag_name("my_project", "my_workdir")
assert tag_name == "my_project"
image_uri = _get_docker_image_uri("my_project", "my_workdir")
assert image_uri == "my_project"
get_git_commit_mock.assert_called_with('my_workdir')


Expand Down
8 changes: 4 additions & 4 deletions tests/projects/test_projects.py
Original file line number Diff line number Diff line change
Expand Up @@ -371,21 +371,21 @@ def test_parse_kubernetes_config():
kubernetes_config = {
"kube-context": "docker-for-desktop",
"kube-job-template-path": os.path.join(work_dir, "kubernetes_job_template.yaml"),
"image-uri": "dockerhub_account/mlflow-kubernetes-example"
"repository-uri": "dockerhub_account/mlflow-kubernetes-example"
}
yaml_obj = None
with open(kubernetes_config["kube-job-template-path"], 'r') as job_template:
yaml_obj = yaml.safe_load(job_template.read())
kube_config = mlflow.projects._parse_kubernetes_config(kubernetes_config)
assert kube_config["kube-context"] == kubernetes_config["kube-context"]
assert kube_config["kube-job-template-path"] == kubernetes_config["kube-job-template-path"]
assert kube_config["image-uri"] == kubernetes_config["image-uri"]
assert kube_config["repository-uri"] == kubernetes_config["repository-uri"]
assert kube_config["kube-job-template"] == yaml_obj


def test_parse_kubernetes_config_without_context():
kubernetes_config = {
"image-uri": "dockerhub_account/mlflow-kubernetes-example",
"repository-uri": "dockerhub_account/mlflow-kubernetes-example",
"kube-job-template-path": "kubernetes_job_template.yaml"
}
with pytest.raises(ExecutionException):
Expand All @@ -404,7 +404,7 @@ def test_parse_kubernetes_config_without_image_uri():
def test_parse_kubernetes_config_invalid_template_job_file():
kubernetes_config = {
"kube-context": "docker-for-desktop",
"image-uri": "username/mlflow-kubernetes-example",
"repository-uri": "username/mlflow-kubernetes-example",
"kube-job-template-path": "file_not_found.yaml"
}
with pytest.raises(ExecutionException):
Expand Down
4 changes: 2 additions & 2 deletions tests/resources/example_docker_project/kubernetes_config.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"kube-context": "docker-for-desktop",
"kube-job-template-path": "examples/docker/kubernetes_job_template.yaml",
"image-uri": "username/mlflow-kubernetes-example"
}
"repository-uri": "username/mlflow-kubernetes-example"
}

0 comments on commit 488dfaf

Please sign in to comment.