Skip to content

Commit

Permalink
Remove cluster_running flag
Browse files Browse the repository at this point in the history
  • Loading branch information
judahrand committed Jul 11, 2023
1 parent 2fe1293 commit b4d298a
Showing 1 changed file with 7 additions and 10 deletions.
17 changes: 7 additions & 10 deletions databricks/sdk/mixins/compute.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,31 +209,28 @@ def select_node_type(self,
return nt.node_type_id
raise ValueError("cannot determine smallest node type")

def ensure_cluster_is_running(self, cluster_id: str):
def ensure_cluster_is_running(self, cluster_id: str) -> None:
"""Ensures that given cluster is running, regardless of the current state"""
timeout = datetime.timedelta(minutes=20)
deadline = time.time() + timeout.total_seconds()
cluster_running = False
while time.time() < deadline and not cluster_running:
while time.time() < deadline:
try:
state = compute.State
info = self.get(cluster_id)
if info.state == state.RUNNING:
cluster_running = True
return
elif info.state == state.TERMINATED:
self.start(cluster_id).result()
cluster_running = True
return
elif info.state == state.TERMINATING:
self.wait_get_cluster_terminated(cluster_id)
self.start(cluster_id).result()
cluster_running = True
return
elif info.state in (state.PENDING, state.RESIZING, state.RESTARTING):
self.wait_get_cluster_running(cluster_id)
cluster_running = True
return
elif info.state in (state.ERROR, state.UNKNOWN):
raise RuntimeError(f'Cluster {info.cluster_name} is {info.state}: {info.state_message}')
except OperationFailed as e:
_LOG.debug('Operation failed, retrying', exc_info=e)

if not cluster_running:
raise TimeoutError(f'timed out after {timeout}')
raise TimeoutError(f'timed out after {timeout}')

0 comments on commit b4d298a

Please sign in to comment.