Skip to content

Commit

Permalink
Ray projects schema and validation (ray-project#5329)
Browse files Browse the repository at this point in the history
  • Loading branch information
pcmoritz authored Aug 6, 2019
1 parent 3ad2fe7 commit e8d9cfc
Show file tree
Hide file tree
Showing 19 changed files with 318 additions and 1 deletion.
2 changes: 2 additions & 0 deletions BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -798,6 +798,8 @@ filegroup(
"python/ray/dashboard/res/main.js",
"python/ray/experimental/*.py",
"python/ray/internal/*.py",
"python/ray/projects/*.py",
"python/ray/projects/schema.json",
"python/ray/workers/default_worker.py",
]),
)
Expand Down
1 change: 1 addition & 0 deletions doc/requirements-doc.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ click
filelock
flatbuffers
funcsigs
jsonschema
mock
numpy
opencv-python-headless
Expand Down
2 changes: 2 additions & 0 deletions python/ray/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@
wait,
) # noqa: E402
import ray.internal # noqa: E402
import ray.projects # noqa: E402
# We import ray.actor because some code is run in actor.py which initializes
# some functions in the worker.
import ray.actor # noqa: F401
Expand Down Expand Up @@ -135,6 +136,7 @@
"is_initialized",
"method",
"profile",
"projects",
"put",
"register_custom_serializer",
"remote",
Expand Down
11 changes: 11 additions & 0 deletions python/ray/projects/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

from ray.projects.projects import (check_project_definition, find_root,
load_project, validate_project_schema)

__all__ = [
"check_project_definition", "find_root", "load_project",
"validate_project_schema"
]
117 changes: 117 additions & 0 deletions python/ray/projects/projects.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
import json
import jsonschema
import os
import yaml


def find_root(directory):
"""Find root directory of the ray project.
Args:
directory (str): Directory to start the search in.
Returns:
Path of the parent directory containing the .rayproject or
None if no such project is found.
"""
prev, directory = None, os.path.abspath(directory)
while prev != directory:
if os.path.isdir(os.path.join(directory, ".rayproject")):
return directory
prev, directory = directory, os.path.abspath(
os.path.join(directory, os.pardir))
return None


def validate_project_schema(project_definition):
"""Validate a project file against the official ray project schema.
Args:
project_definition (dict): Parsed project yaml.
Raises:
jsonschema.exceptions.ValidationError: This exception is raised
if the project file is not valid.
"""
dir = os.path.dirname(os.path.abspath(__file__))
with open(os.path.join(dir, "schema.json")) as f:
schema = json.load(f)

jsonschema.validate(instance=project_definition, schema=schema)


def check_project_definition(project_root, project_definition):
"""Checks if the project definition is valid.
Args:
project_root (str): Path containing the .rayproject
project_definition (dict): Project definition
Raises:
jsonschema.exceptions.ValidationError: This exception is raised
if the project file is not valid.
ValueError: This exception is raised if there are other errors in
the project definition (e.g. files not existing).
"""

validate_project_schema(project_definition)

# Make sure the cluster yaml file exists
if "cluster" in project_definition:
cluster_file = os.path.join(project_root,
project_definition["cluster"])
if not os.path.exists(cluster_file):
raise ValueError("'cluster' file does not exist "
"in {}".format(project_root))

if "environment" in project_definition:
env = project_definition["environment"]

if sum(["dockerfile" in env, "dockerimage" in env]) > 1:
raise ValueError("Cannot specify both 'dockerfile' and "
"'dockerimage' in environment.")

if "requirements" in env:
requirements_file = os.path.join(project_root, env["requirements"])
if not os.path.exists(requirements_file):
raise ValueError("'requirements' file in 'environment' does "
"not exist in {}".format(project_root))

if "dockerfile" in env:
docker_file = os.path.join(project_root, env["dockerfile"])
if not os.path.exists(docker_file):
raise ValueError("'dockerfile' file in 'environment' does "
"not exist in {}".format(project_root))


def load_project(current_dir):
"""Finds .rayproject folder for current project, parse and validates it.
Args:
current_dir (str): Path from which to search for .rayproject.
Returns:
Dictionary containing the project definition.
Raises:
jsonschema.exceptions.ValidationError: This exception is raised
if the project file is not valid.
ValueError: This exception is raised if there are other errors in
the project definition (e.g. files not existing).
"""
project_root = find_root(current_dir)

if not project_root:
raise ValueError("No project root found")

project_file = os.path.join(project_root, ".rayproject", "project.yaml")

if not os.path.exists(project_file):
raise ValueError("Project file {} not found".format(project_file))

with open(project_file) as f:
project_definition = yaml.load(f)

check_project_definition(project_root, project_definition)

return project_definition
64 changes: 64 additions & 0 deletions python/ray/projects/schema.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
{
"type": "object",
"properties": {
"name": {
"description": "The name of the project",
"type": "string"
},
"description": {
"description": "A short description of the project",
"type": "string"
},
"repo": {
"description": "The URL of the repo this project is part of",
"type": "string"
},
"cluster": {
"description": "Path to a .yaml cluster configuration file (relative to the project root)",
"type": "string"
},
"environment": {
"description": "The environment that needs to be set up to run the project",
"type": "object",
"properties": {
"dockerimage": {
"description": "URL to a docker image that can be pulled to run the project in",
"type": "string"
},
"dockerfile": {
"description": "Path to a Dockerfile to set up an image the project can run in (relative to the project root)",
"type": "string"
},
"requirements": {
"description": "Path to a Python requirements.txt file to set up project dependencies (relative to the project root)",
"type": "string"
},
"shell": {
"description": "A sequence of shell commands to run to set up the project environment",
"type": "array",
"items": {
"type": "string"
}
}
}
},
"commands": {
"type": "array",
"items": {
"description": "Possible commands to run to start a session",
"type": "object",
"properties": {
"name": {
"description": "Name of the command",
"type": "string"
},
"command": {
"description": "Shell command to run on the cluster",
"type": "string"
}
}
}
}
},
"required": ["name", "cluster"]
}
12 changes: 12 additions & 0 deletions python/ray/scripts/scripts.py
Original file line number Diff line number Diff line change
Expand Up @@ -706,6 +706,17 @@ def get_worker_ips(cluster_config_file, cluster_name):
click.echo("\n".join(worker_ips))


@cli.command()
@click.argument("command", required=True, type=str)
@click.option(
"--dry",
is_flag=True,
default=False,
help="Print actions instead of running them.")
def session(command, dry):
ray.projects.load_project(os.getcwd())


@cli.command()
def stack():
COMMAND = """
Expand Down Expand Up @@ -791,6 +802,7 @@ def timeline(redis_address):
cli.add_command(kill_random_node)
cli.add_command(get_head_ip, name="get_head_ip")
cli.add_command(get_worker_ips)
cli.add_command(session)
cli.add_command(stack)
cli.add_command(timeline)

Expand Down
7 changes: 7 additions & 0 deletions python/ray/tests/project_files/docker_project.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
name: testproject1
description: "Test project for docker environment"

environment:
docker: "Dockerfile"

cluster: "cluster.yaml"
Empty file.
4 changes: 4 additions & 0 deletions python/ray/tests/project_files/no_project2.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
name: testproject2

environment:
shell: "one command"
8 changes: 8 additions & 0 deletions python/ray/tests/project_files/no_project3.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
name: testproject3

environment:
dockerfile: "Dockerfile"

dockerimage: "some docker image"

cluster: "cluster.yaml"
Empty file.
10 changes: 10 additions & 0 deletions python/ray/tests/project_files/project1/.rayproject/project.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
name: "project1"

cluster: .rayproject/cluster.yaml

environment:
requirements: requirements.txt

commands:
- name: default
command: ls
Empty file.
Empty file.
6 changes: 6 additions & 0 deletions python/ray/tests/project_files/requirements_project.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
name: testproject2

environment:
requirements: "requirements.txt"

cluster: "cluster.yaml"
10 changes: 10 additions & 0 deletions python/ray/tests/project_files/shell_project.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
name: testproject3
repo: "https://github.com/ray-project/ray"

environment:
shell:
- first command
- second command
- third command

cluster: "cluster.yaml"
61 changes: 61 additions & 0 deletions python/ray/tests/test_projects.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import jsonschema
import os
import pytest
import subprocess
import yaml

import ray

TEST_DIR = os.path.dirname(os.path.abspath(__file__))


def load_project_description(project_file):
path = os.path.join(TEST_DIR, "project_files", project_file)
with open(path) as f:
return yaml.load(f)


def test_validation_success():
project_files = [
"docker_project.yaml", "requirements_project.yaml",
"shell_project.yaml"
]
for project_file in project_files:
project_definition = load_project_description(project_file)
ray.projects.validate_project_schema(project_definition)


def test_validation_failure():
project_files = ["no_project1.yaml", "no_project2.yaml"]
for project_file in project_files:
project_definition = load_project_description(project_file)
with pytest.raises(jsonschema.exceptions.ValidationError):
ray.projects.validate_project_schema(project_definition)


def test_check_failure():
project_files = ["no_project3.yaml"]
for project_file in project_files:
project_definition = load_project_description(project_file)
with pytest.raises(ValueError):
ray.projects.check_project_definition("", project_definition)


def test_project_root():
path = os.path.join(TEST_DIR, "project_files", "project1")
assert ray.projects.find_root(path) == path

path2 = os.path.join(TEST_DIR, "project_files", "project1", "subdir")
assert ray.projects.find_root(path2) == path

path3 = "/tmp/"
assert ray.projects.find_root(path3) is None


def test_project_validation():
path = os.path.join(TEST_DIR, "project_files", "project1")
subprocess.check_call(["ray", "session", "create", "--dry"], cwd=path)
4 changes: 3 additions & 1 deletion python/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,8 @@
"ray/core/src/plasma/plasma_store_server", "ray/_raylet.so",
"ray/core/src/ray/raylet/raylet_monitor", "ray/core/src/ray/raylet/raylet",
"ray/dashboard/dashboard.py", "ray/dashboard/index.html",
"ray/dashboard/res/main.css", "ray/dashboard/res/main.js"
"ray/dashboard/res/main.css", "ray/dashboard/res/main.js",
"ray/projects/schema.json"
]

# These are the directories where automatically generated Python protobuf
Expand Down Expand Up @@ -138,6 +139,7 @@ def find_version(*filepath):
requires = [
"numpy >= 1.14",
"filelock",
"jsonschema",
"funcsigs",
"click",
"colorama",
Expand Down

0 comments on commit e8d9cfc

Please sign in to comment.