Skip to content

Commit

Permalink
Fix for controller error stack trace and tokenbucket (opensearch-proj…
Browse files Browse the repository at this point in the history
…ect#1978)

* Fix for controller error stack trace and tokenbucket

Signed-off-by: Sicheng Song <sicheng.song@outlook.com>

* Adjust logging level

Signed-off-by: Sicheng Song <sicheng.song@outlook.com>

* Fix concern

Signed-off-by: Sicheng Song <sicheng.song@outlook.com>

* Fix spotless

Signed-off-by: Sicheng Song <sicheng.song@outlook.com>

---------

Signed-off-by: Sicheng Song <sicheng.song@outlook.com>
  • Loading branch information
b4sjoo authored Feb 2, 2024
1 parent f63c4df commit 0755e50
Show file tree
Hide file tree
Showing 2 changed files with 54 additions and 17 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -251,7 +251,7 @@ public void onResponse(DeleteResponse deleteResponse) {
@Override
public void onFailure(Exception e) {
if (e instanceof IndexNotFoundException) {
log.info("Model controller not deleted due to no model controller was found for model: " + modelId);
log.info("Model controller not deleted due to no model controller found for model: " + modelId);
actionListener.onFailure(e);
} else {
log.error("Failed to delete model controller for model: " + modelId, e);
Expand All @@ -272,9 +272,15 @@ private void deleteController(String modelId) {
if (deleteResponse.getResult() == DocWriteResponse.Result.DELETED) {
log.info("Model controller for model {} successfully deleted from index, result: {}", modelId, deleteResponse.getResult());
} else {
log.warn("The deletion of model controller for model {} returned with result: {}", modelId, deleteResponse.getResult());
log.info("The deletion of model controller for model {} returned with result: {}", modelId, deleteResponse.getResult());
}
}, e -> log.error("Failed to re-deploy the model controller for model: " + modelId, e)));
}, e -> {
if (e instanceof IndexNotFoundException) {
log.debug("Model controller not deleted due to no model controller found for model: " + modelId);
} else {
log.error("Failed to delete model controller for model: " + modelId, e);
}
}));
}

private Boolean isModelNotDeployed(MLModelState mlModelState) {
Expand Down
59 changes: 45 additions & 14 deletions plugin/src/main/java/org/opensearch/ml/model/MLModelManager.java
Original file line number Diff line number Diff line change
Expand Up @@ -1233,14 +1233,14 @@ public synchronized void undeployController(String modelId, ActionListener<Strin
} else if (isModelRunningOnNode(modelId)) {
log
.error(
"Failed to undeploy model controller due to model is in ML cache but with a state other than deployed. Please check model: "
"Failed to undeploy model controller because model is in ML cache but with a state other than deployed. Please check model: "
+ modelId,
new RuntimeException()
);
listener
.onFailure(
new RuntimeException(
"Failed to undeploy model controller due to model is in ML cache but with a state other than deployed. Please check model: "
"Failed to undeploy model controller because model is in ML cache but with a state other than deployed. Please check model: "
+ modelId
)
);
Expand Down Expand Up @@ -1276,25 +1276,47 @@ private synchronized void deployControllerWithDeployingModel(
log.error("Failed to parse ml task" + r.getId(), e);
listener.onFailure(e);
}
} else if (mlModel.getIsControllerEnabled() == null || !mlModel.getIsControllerEnabled()) {
} else if (!BooleanUtils.isTrue(mlModel.getIsControllerEnabled())) {
// Not going to respond the failure here due to the model deploy can still work
// well
listener
.onResponse(
"The model "
"No controller is deployed because the model "
+ modelId
+ " is expected not having a model controller. Please use the create model controller api to create one if this is unexpected."
+ " is expected not having an enabled model controller. Please use the create controller api to create one if this is unexpected."
);
log
.warn(
"The model "
+ modelId
+ " is expected not having a model controller. Please use the create model controller api to create one if this is unexpected."
.debug(
"No controller is deployed because the model " + modelId + " is expected not having an enabled model controller."
);
} else {
listener.onFailure(new OpenSearchStatusException("Failed to find model controller", RestStatus.NOT_FOUND));
}
}, listener::onFailure));
}, e -> {
if (e instanceof IndexNotFoundException) {
if (!BooleanUtils.isTrue(mlModel.getIsControllerEnabled())) {
// Not going to respond the failure here due to the model deploy can still work
// well
listener
.onResponse(
"No controller is deployed because the model "
+ modelId
+ " is expected not having an enabled model controller. Please use the create model controller api to create one if this is unexpected."
);
log
.debug(
"No controller is deployed because the model "
+ modelId
+ " is expected not having an enabled model controller."
);
} else {
listener.onFailure(new OpenSearchStatusException("Failed to find model controller", RestStatus.NOT_FOUND));
}
} else {
log.error("Failed to re-deploy the model controller for model: " + modelId, e);
listener.onFailure(e);
}
}));
}

/**
Expand All @@ -1315,10 +1337,13 @@ public void deployControllerWithDeployingModel(MLModel mlModel, Integer eligible
deployControllerWithDeployingModel(mlModel, eligibleNodeCount, ActionListener.wrap(response -> {
if (response.startsWith("Successfully")) {
log.debug(response, mlModel.getModelId());
} else if (response.startsWith("Failed")) {
log.error(response);
} else if (response
.endsWith(
"is expected not having a model controller. Please use the create model controller api to create one if this is unexpected."
)) {
log.warn(response);
} else {
log.info(response);
log.error(response);
}
}, e -> log.error("Failed to re-deploy the model controller for model: " + mlModel.getModelId(), e)));
}
Expand Down Expand Up @@ -1364,7 +1389,13 @@ private TokenBucket createTokenBucket(Integer eligibleNodeCount, MLRateLimiter r
limit / unit.toSeconds(1),
eligibleNodeCount
);
return new TokenBucket(System::nanoTime, limit / unit.toNanos(1) / eligibleNodeCount, limit, limit / eligibleNodeCount);
// Burst token must be greater than 1 to accept request
return new TokenBucket(
System::nanoTime,
limit / unit.toNanos(1) / eligibleNodeCount,
Math.max(limit / eligibleNodeCount, 1),
Math.max(limit / eligibleNodeCount, 1)
);
}
return null;
}
Expand Down

0 comments on commit 0755e50

Please sign in to comment.