Skip to content

Commit

Permalink
Less noisy redeployment
Browse files Browse the repository at this point in the history
- If no nodes added or removed (received solution is identical to
  current state), do not relabel nodes and re-send the KubeVela file.

- When an app is not in state RUNNING, discard incoming solver messages;
  the app will be redeploying or failed.

- But also, do not set app state to FAILED just because we managed to
  ask for redeployment while not in state RUNNING.
  • Loading branch information
rudi committed Jul 10, 2024
1 parent 56bd208 commit dbc1581
Show file tree
Hide file tree
Showing 2 changed files with 42 additions and 27 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -432,12 +432,20 @@ public void onMessage(String key, String address, Map body, Message message, Con
Main.logFile("solver-solution-" + app_id + ".json", json_body.toPrettyString());
NebulousApp app = NebulousApps.get(app_id);
if (app == null) {
log.warn("Received solver solutions for non-existant application, discarding.");
log.warn("Received solver solution for non-existant application, discarding.");
return;
} else {
MDC.put("clusterName", app.getClusterName());
log.debug("Received solver solutions for application");
app.processSolution(json_body);
if (app.getState() == NebulousApp.State.RUNNING) {
log.debug("Sending solver solution to application for redeployment");
app.processSolution(json_body);
} else {
// app.State==RUNNING gets checked once more inside
// app.processSolution -- here we discard
// high-frequency solver messages early while a
// redeployment is underway.
log.warn("Received solver solution when application not in state RUNNING, discarding.");
}
}
} catch (Exception e) {
log.error("Error while processing solver solutions message", e);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -209,8 +209,8 @@ private static boolean waitForClusterDeploymentFinished(ExnConnector conn, Strin
* Given a KubeVela file, extract node requirements, create the job, start
* its nodes and submit KubeVela.
*
* <p>NOTE: this method modifies the NebulousApp object state, storing
* various facts about the deployed cluster.
* Note: this method is not thread-safe and should only be called from
* {@link NebulousApp#deploy()} or similarly protected code.
*
* @param app The NebulOuS app object.
* @param kubevela the KubeVela file to deploy.
Expand Down Expand Up @@ -516,11 +516,15 @@ public static void deployUnmodifiedApplication(NebulousApp app) {

/**
* Given a KubeVela file, adapt the running application to its
* specification.
* specification.<p>
*
* The KubeVela file is already rewritten with updated information from
* the solver when this method is called, so reflects the desired new
* state of the application cluster.
* state of the application cluster.<p>
*
* Note: this method is not thread-safe and should only be called from
* {@link NebulousApp#processSolution(ObjectNode)} or similarly
* protected code.
*
* @param app the NebulOuS app object.
* @param updatedKubevela the KubeVela file to deploy.
Expand All @@ -530,9 +534,8 @@ public static void redeployApplication(NebulousApp app, ObjectNode updatedKubeve
String clusterName = app.getClusterName();
ExnConnector conn = app.getExnConnector();
if (!app.setStateRedeploying()) {
log.error("Trying to redeploy app that is in state {} (should be RUNNING), aborting",
log.warn("Trying to redeploy app that is in state {} (can only redeploy in state RUNNING), aborting",
app.getState().name());
app.setStateFailed();
return;
}

Expand Down Expand Up @@ -680,28 +683,32 @@ public static void redeployApplication(NebulousApp app, ObjectNode updatedKubeve
Main.logFile("redeploy-worker-requirements-" + appUUID + ".txt", componentRequirements);
Main.logFile("redeploy-worker-counts-" + appUUID + ".txt", componentReplicaCounts);

if (!nodesToAdd.isEmpty()) {
log.info("Starting scaleout: {}", nodesToAdd);
Main.logFile("redeploy-scaleout-" + appUUID + ".json", nodesToAdd.toPrettyString());
conn.scaleOut(appUUID, clusterName, nodesToAdd);
waitForClusterDeploymentFinished(conn, appUUID, clusterName);
} else {
log.info("No nodes added, skipping scaleout");
}
if (!nodesToRemove.isEmpty() || !nodesToAdd.isEmpty()) {
if (!nodesToAdd.isEmpty()) {
log.info("Starting scaleout: {}", nodesToAdd);
Main.logFile("redeploy-scaleout-" + appUUID + ".json", nodesToAdd.toPrettyString());
conn.scaleOut(appUUID, clusterName, nodesToAdd);
waitForClusterDeploymentFinished(conn, appUUID, clusterName);
} else {
log.info("No nodes added, skipping scaleout");
}

log.info("Labeling nodes: {}", nodeLabels);
Main.logFile("redeploy-labelNodes-" + appUUID + ".json", nodeLabels.toPrettyString());
conn.labelNodes(appUUID, clusterName, nodeLabels);
log.info("Labeling nodes: {}", nodeLabels);
Main.logFile("redeploy-labelNodes-" + appUUID + ".json", nodeLabels.toPrettyString());
conn.labelNodes(appUUID, clusterName, nodeLabels);

log.info("Redeploying application: {}", rewritten_kubevela);
conn.deployApplication(appUUID, clusterName, app.getName(), rewritten_kubevela);
log.info("Redeploying application: {}", rewritten_kubevela);
conn.deployApplication(appUUID, clusterName, app.getName(), rewritten_kubevela);

if (!nodesToRemove.isEmpty()) {
Main.logFile("redeploy-scalein-" + appUUID + ".json", nodesToRemove);
log.info("Starting scalein: {}", nodesToRemove);
conn.scaleIn(appUUID, clusterName, nodesToRemove);
if (!nodesToRemove.isEmpty()) {
Main.logFile("redeploy-scalein-" + appUUID + ".json", nodesToRemove);
log.info("Starting scalein: {}", nodesToRemove);
conn.scaleIn(appUUID, clusterName, nodesToRemove);
} else {
log.info("No nodes removed, skipping scalein");
}
} else {
log.info("No nodes removed, skipping scalein");
log.info("Solution did not require nodes to be added or removed, done.");
}

app.setStateDeploymentFinished(componentRequirements, componentReplicaCounts,
Expand Down

0 comments on commit dbc1581

Please sign in to comment.