Skip to content

Commit

Permalink
Retry in w.clusters().ensureClusterIsRunning(id) when cluster is si…
Browse files Browse the repository at this point in the history
…multaneously started by two different processes (#134)

This PR adds a retry for timing edge cases like `INVALID_STATE: Cluster
XXX is in unexpected state Pending.`

Other PRs: 
- databricks/databricks-sdk-py#283
- databricks/databricks-sdk-go#580
  • Loading branch information
nfx authored Aug 16, 2023
1 parent a2479d7 commit e0174d0
Showing 1 changed file with 5 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

public class ClustersExt extends ClustersAPI {
private static final Logger LOG = LoggerFactory.getLogger(ClustersExt.class);
private static final String INVALID_STATE = "INVALID_STATE";

public ClustersExt(ApiClient apiClient) {
super(apiClient);
Expand Down Expand Up @@ -217,9 +218,11 @@ public void ensureClusterIsRunning(String clusterId) throws TimeoutException {
// running, reconfiguring
LOG.debug("Cluster is {}: {}", info.getState(), info.getStateMessage());
return;
} catch (IllegalStateException e) {
LOG.debug("Cluster reached illegal state. Retrying startup", e);
} catch (DatabricksError e) {
if (e.getErrorCode().equals(INVALID_STATE)) {
LOG.debug("Cluster was started by other process: {} Retrying.", e.getMessage());
continue;
}
LOG.debug("Received {} error code", e.getErrorCode());
throw e;
}
Expand Down

0 comments on commit e0174d0

Please sign in to comment.