Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Gracefully handle concurrent zone decommission action #5542

Merged
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
41 commits
Select commit Hold shift + click to select a range
767803e
Control concurrency and handle retries
imRishN Dec 13, 2022
743ca01
Fix spotless check
imRishN Dec 13, 2022
dbb404e
Add changelog
imRishN Dec 13, 2022
ee90987
Add request timeout param
imRishN Dec 14, 2022
5870737
Changes
imRishN Dec 16, 2022
8dd5899
Merge remote-tracking branch 'upstream/main' into decommission/handle…
imRishN Dec 16, 2022
4946914
Fix spotless check
imRishN Dec 16, 2022
4f5e73c
Fix
imRishN Dec 16, 2022
378e3d2
Refactor
imRishN Dec 27, 2022
dd1b19f
Refactor
imRishN Dec 27, 2022
e4107d5
Add test for request
imRishN Jan 5, 2023
7954fb2
Merge branch 'main' of https://github.com/opensearch-project/OpenSear…
imRishN Jan 5, 2023
3caed7b
Add tests for controller
imRishN Jan 5, 2023
8e4ab4d
Test for retry
imRishN Jan 5, 2023
a5c4e08
Fix spotless check
imRishN Jan 5, 2023
51743ce
Move check at rest layer
imRishN Jan 5, 2023
b63baf7
Fix spotless check
imRishN Jan 5, 2023
b8b1434
Minor fix
imRishN Jan 5, 2023
ad6207f
Fix spotless check
imRishN Jan 5, 2023
4801501
Fix
imRishN Jan 5, 2023
3a88fb1
Merge remote-tracking branch 'upstream/main' into decommission/handle…
imRishN Jan 9, 2023
eb5c393
Remove retry flag and use original flag
imRishN Jan 9, 2023
ed6bffb
Fix spotless check
imRishN Jan 9, 2023
c68cdc7
Refactor code
imRishN Jan 9, 2023
1368ff4
Clean up
imRishN Jan 9, 2023
8ddd128
Empty-Commit
imRishN Jan 9, 2023
aa5c4b4
Empty-Commit
imRishN Jan 9, 2023
3993de9
Empty-Commit
imRishN Jan 9, 2023
afe5f46
Cleanup
imRishN Jan 10, 2023
61c9e5d
Fix
imRishN Jan 10, 2023
0e8b76c
Throw exception in line
imRishN Jan 10, 2023
7395dff
Fixes
imRishN Jan 10, 2023
319b4b1
Add IT for concurrency
imRishN Jan 10, 2023
c46992c
Add request id to decommission request
imRishN Jan 10, 2023
d4670b7
Fix
imRishN Jan 10, 2023
7ecbe6e
Fix
imRishN Jan 10, 2023
88ec79d
Fix
imRishN Jan 10, 2023
2b40e12
Resolve comments
imRishN Jan 10, 2023
a6d0c3b
Fix test
imRishN Jan 10, 2023
927d0cc
Test fix
imRishN Jan 10, 2023
539d77a
Merge remote-tracking branch 'upstream/main' into decommission/handle…
imRishN Jan 10, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Refactor
Signed-off-by: Rishab Nahata <rnnahata@amazon.com>
  • Loading branch information
imRishN committed Dec 27, 2022
commit 378e3d26dcf49166b34b39ca3310ff73395a9273
Original file line number Diff line number Diff line change
Expand Up @@ -27,22 +27,22 @@
*/
public class DecommissionRequest extends ClusterManagerNodeRequest<DecommissionRequest> {

public static final TimeValue TIMEOUT = TimeValue.timeValueMinutes(2L);
public static final TimeValue DEFAULT_REQUEST_TIMEOUT = TimeValue.timeValueMinutes(2L);
public static final TimeValue DEFAULT_NODE_DRAINING_TIMEOUT = TimeValue.timeValueSeconds(120);

private DecommissionAttribute decommissionAttribute;
private boolean retryOnClusterManagerChange = false;
private TimeValue timeout = TIMEOUT;
private boolean retryOnClusterManagerSwitch = false;
private TimeValue timeout = DEFAULT_REQUEST_TIMEOUT;
private TimeValue delayTimeout = DEFAULT_NODE_DRAINING_TIMEOUT;

// holder for no_delay param. To avoid draining time timeout.
private boolean noDelay = false;
imRishN marked this conversation as resolved.
Show resolved Hide resolved

public DecommissionRequest() {}

public DecommissionRequest(DecommissionAttribute decommissionAttribute, boolean retryOnClusterManagerChange, TimeValue timeout) {
public DecommissionRequest(DecommissionAttribute decommissionAttribute, boolean retryOnClusterManagerSwitch, TimeValue timeout) {
this.decommissionAttribute = decommissionAttribute;
this.retryOnClusterManagerChange = retryOnClusterManagerChange;
this.retryOnClusterManagerSwitch = retryOnClusterManagerSwitch;
imRishN marked this conversation as resolved.
Show resolved Hide resolved
this.timeout = timeout;
}

Expand All @@ -55,7 +55,7 @@ public DecommissionRequest(StreamInput in) throws IOException {
decommissionAttribute = new DecommissionAttribute(in);
this.delayTimeout = in.readTimeValue();
this.noDelay = in.readBoolean();
this.retryOnClusterManagerChange = in.readBoolean();
this.retryOnClusterManagerSwitch = in.readBoolean();
this.timeout = in.readTimeValue();

}
Expand All @@ -66,7 +66,7 @@ public void writeTo(StreamOutput out) throws IOException {
decommissionAttribute.writeTo(out);
out.writeTimeValue(delayTimeout);
out.writeBoolean(noDelay);
out.writeBoolean(retryOnClusterManagerChange);
out.writeBoolean(retryOnClusterManagerSwitch);
out.writeTimeValue(timeout);
}

Expand Down Expand Up @@ -110,19 +110,19 @@ public boolean isNoDelay() {
/**
* Sets retryOnClusterManagerChange for decommission request
*
* @param retryOnClusterManagerChange boolean for request to retry decommission action on cluster manager change
* @param retryOnClusterManagerSwitch boolean for request to retry decommission action on cluster manager switch
* @return this request
*/
public DecommissionRequest setRetryOnClusterManagerChange(boolean retryOnClusterManagerChange) {
this.retryOnClusterManagerChange = retryOnClusterManagerChange;
public DecommissionRequest setRetryOnClusterManagerSwitch(boolean retryOnClusterManagerSwitch) {
this.retryOnClusterManagerSwitch = retryOnClusterManagerSwitch;
return this;
}

/**
* @return Returns whether decommission is retry eligible on cluster manager change
* @return Returns whether decommission is retry eligible on cluster manager switch
*/
public boolean retryOnClusterManagerChange() {
return this.retryOnClusterManagerChange;
public boolean retryOnClusterManagerSwitch() {
return this.retryOnClusterManagerSwitch;
}

/**
Expand Down Expand Up @@ -176,7 +176,7 @@ public String toString() {
+ "decommissionAttribute="
+ decommissionAttribute
+ ", retryOnClusterManagerChange="
+ retryOnClusterManagerChange
+ retryOnClusterManagerSwitch
+ ", timeout="
+ timeout
+ ", delayTimeout="
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,13 +48,13 @@ public DecommissionRequestBuilder setNoDelay(boolean noDelay) {
}

/**
* Sets retryOnClusterManagerChange for decommission request
* Sets retryOnClusterManagerSwitch for decommission request
*
* @param retryOnClusterManagerChange boolean for request to retry decommission action on cluster manager change
* @param retryOnClusterManagerSwitch boolean for request to retry decommission action on cluster manager switch
* @return current object
*/
public DecommissionRequestBuilder setRetryOnClusterManagerChange(boolean retryOnClusterManagerChange) {
request.setRetryOnClusterManagerChange(retryOnClusterManagerChange);
public DecommissionRequestBuilder setRetryOnClusterManagerSwitch(boolean retryOnClusterManagerSwitch) {
request.setRetryOnClusterManagerSwitch(retryOnClusterManagerSwitch);
return this;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ public void retryDecommissionAction(
listener.onFailure(new OpenSearchTimeoutException(errorMsg));
return;
}
decommissionRequest.setRetryOnClusterManagerChange(true);
decommissionRequest.setRetryOnClusterManagerSwitch(true);
decommissionRequest.setTimeout(TimeValue.timeValueMillis(remainingTimeoutMS));
transportService.sendRequest(
transportService.getLocalNode(),
imRishN marked this conversation as resolved.
Show resolved Hide resolved
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -524,7 +524,7 @@ private static void ensureEligibleRetry(
if (decommissionAttributeMetadata != null) {
// we just need to check for INIT status as for other transient statuses we already handle it separately
if (decommissionAttributeMetadata.status().equals(DecommissionStatus.INIT)
&& decommissionRequest.retryOnClusterManagerChange() == false) {
&& decommissionRequest.retryOnClusterManagerSwitch() == false) {
throw new DecommissioningFailedException(
decommissionRequest.getDecommissionAttribute(),
"concurrent request received to decommission attribute"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
import java.util.List;

import static java.util.Collections.singletonList;
import static org.opensearch.action.admin.cluster.decommission.awareness.put.DecommissionRequest.TIMEOUT;
import static org.opensearch.action.admin.cluster.decommission.awareness.put.DecommissionRequest.DEFAULT_REQUEST_TIMEOUT;
import static org.opensearch.rest.RestRequest.Method.PUT;

/**
Expand Down Expand Up @@ -60,7 +60,7 @@ DecommissionRequest createRequest(RestRequest request) throws IOException {
decommissionRequest.setDelayTimeout(delayTimeout);
}
return decommissionRequest.setDecommissionAttribute(new DecommissionAttribute(attributeName, attributeValue))
.setRetryOnClusterManagerChange(false)
.setTimeout(TimeValue.parseTimeValue(request.param("timeout"), TIMEOUT, getClass().getSimpleName() + ".timeout"));
.setRetryOnClusterManagerSwitch(false)
.setTimeout(TimeValue.parseTimeValue(request.param("timeout"), DEFAULT_REQUEST_TIMEOUT, getClass().getSimpleName() + ".timeout"));
}
}