Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions doc/source/cluster/launcher-usage.rst
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,8 @@ You can also run ``ray up`` to restart a cluster if it seems to be in a bad stat

If you don't want the update to restart services (e.g., because the changes don't require a restart), pass ``--no-restart`` to the update call.

If you want to force re-generation of the config to pick up possible changes in the cloud environment, pass ``--no-config-cache`` to the update call.

See :ref:`the documentation <ray-up-doc>` for ``ray up``.

.. code-block:: shell
Expand Down
13 changes: 7 additions & 6 deletions python/ray/autoscaler/commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ def request_resources(num_cpus=None, bundles=None):

def create_or_update_cluster(config_file, override_min_workers,
override_max_workers, no_restart, restart_only,
yes, override_cluster_name):
yes, override_cluster_name, no_config_cache):
"""Create or updates an autoscaling Ray cluster from a config json."""
config = yaml.safe_load(open(config_file).read())
if override_min_workers is not None:
Expand All @@ -94,19 +94,19 @@ def create_or_update_cluster(config_file, override_min_workers,
config["max_workers"] = override_max_workers
if override_cluster_name is not None:
config["cluster_name"] = override_cluster_name
config = _bootstrap_config(config)
config = _bootstrap_config(config, no_config_cache)
get_or_create_head_node(config, config_file, no_restart, restart_only, yes,
override_cluster_name)


def _bootstrap_config(config):
def _bootstrap_config(config, no_config_cache=False):
config = prepare_config(config)

hasher = hashlib.sha1()
hasher.update(json.dumps([config], sort_keys=True).encode("utf-8"))
cache_key = os.path.join(tempfile.gettempdir(),
"ray-config-{}".format(hasher.hexdigest()))
if os.path.exists(cache_key):
if os.path.exists(cache_key) and not no_config_cache:
logger.info("Using cached config at {}".format(cache_key))
return json.loads(open(cache_key).read())
validate_config(config)
Expand All @@ -118,8 +118,9 @@ def _bootstrap_config(config):

bootstrap_config, _ = importer()
resolved_config = bootstrap_config(config)
with open(cache_key, "w") as f:
f.write(json.dumps(resolved_config))
if not no_config_cache:
with open(cache_key, "w") as f:
f.write(json.dumps(resolved_config))
return resolved_config


Expand Down
12 changes: 9 additions & 3 deletions python/ray/projects/scripts.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@ def __init__(self, session_name=None):
raise click.ClickException(
"Docker support in session is currently not implemented.")

def create_cluster(self):
def create_cluster(self, no_config_cache):
"""Create a cluster that will run the session."""
create_or_update_cluster(
config_file=self.project_definition.cluster_yaml(),
Expand All @@ -178,6 +178,7 @@ def create_cluster(self):
restart_only=False,
yes=True,
override_cluster_name=self.session_name,
no_config_cache=no_config_cache,
)

def sync_files(self):
Expand Down Expand Up @@ -351,7 +352,12 @@ def stop(name):
"the command in the project config"),
is_flag=True)
@click.option("--name", help="A name to tag the session with.", default=None)
def session_start(command, args, shell, name):
@click.option(
"--no-config-cache",
is_flag=True,
default=False,
help="Disable the local cluster config cache.")
def session_start(command, args, shell, name, no_config_cache):
project_definition = load_project_or_throw()

if not name:
Expand All @@ -375,7 +381,7 @@ def session_start(command, args, shell, name):
for run in session_runs:
runner = SessionRunner(session_name=run["name"])
logger.info("[1/{}] Creating cluster".format(run["num_steps"]))
runner.create_cluster()
runner.create_cluster(no_config_cache)
logger.info("[2/{}] Syncing the project".format(run["num_steps"]))
runner.sync_files()
logger.info("[3/{}] Setting up environment".format(run["num_steps"]))
Expand Down
12 changes: 9 additions & 3 deletions python/ray/scripts/scripts.py
Original file line number Diff line number Diff line change
Expand Up @@ -634,6 +634,11 @@ def stop(force, verbose):
default=False,
help=("Whether to skip running setup commands and only restart Ray. "
"This cannot be used with 'no-restart'."))
@click.option(
"--no-config-cache",
is_flag=True,
default=False,
help="Disable the local cluster config cache.")
@click.option(
"--min-workers",
required=False,
Expand All @@ -657,7 +662,7 @@ def stop(force, verbose):
default=False,
help="Don't ask for confirmation.")
def up(cluster_config_file, min_workers, max_workers, no_restart, restart_only,
yes, cluster_name):
yes, cluster_name, no_config_cache):
"""Create or update a Ray cluster."""
if restart_only or no_restart:
assert restart_only != no_restart, "Cannot set both 'restart_only' " \
Expand All @@ -673,7 +678,8 @@ def up(cluster_config_file, min_workers, max_workers, no_restart, restart_only,
except urllib.error.HTTPError as e:
logger.info("Error downloading file: ", e)
create_or_update_cluster(cluster_config_file, min_workers, max_workers,
no_restart, restart_only, yes, cluster_name)
no_restart, restart_only, yes, cluster_name,
no_config_cache)


@cli.command()
Expand Down Expand Up @@ -889,7 +895,7 @@ def submit(cluster_config_file, screen, tmux, stop, start, cluster_name,

if start:
create_or_update_cluster(cluster_config_file, None, None, False, False,
True, cluster_name)
True, cluster_name, False)
target = os.path.basename(script)
target = os.path.join("~", target)
rsync(cluster_config_file, script, target, cluster_name, down=False)
Expand Down