Skip to content

Commit

Permalink
Merge branch 'main' into build-no-jdk-distributions
Browse files Browse the repository at this point in the history
  • Loading branch information
dblock authored Oct 25, 2022
2 parents 4530648 + 2268af2 commit a1b6ad6
Show file tree
Hide file tree
Showing 31 changed files with 388 additions and 55 deletions.
4 changes: 3 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ Inspired from [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
- Add dev help in gradle check CI failures ([4872](https://github.com/opensearch-project/OpenSearch/pull/4872))
- Copy `build.sh` over from opensearch-build ([#4887](https://github.com/opensearch-project/OpenSearch/pull/4887))
- Add project health badges to the README.md ([#4843](https://github.com/opensearch-project/OpenSearch/pull/4843))
- Added changes for graceful node decommission ([#4586](https://github.com/opensearch-project/OpenSearch/pull/4586))
- Build no-jdk distributions as part of release build ([#4902](https://github.com/opensearch-project/OpenSearch/pull/4902))

### Dependencies
Expand Down Expand Up @@ -72,8 +73,9 @@ Inspired from [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
- Exclude jettison version brought in with hadoop-minicluster. ([#4787](https://github.com/opensearch-project/OpenSearch/pull/4787))
- Bump protobuf-java to 3.21.7 in repository-gcs and repository-hdfs ([#4790](https://github.com/opensearch-project/OpenSearch/pull/4790))
- Bump reactor-netty-http to 1.0.24 in repository-azure ([#4880](https://github.com/opensearch-project/OpenSearch/pull/4880))
- Bumps `protobuf-java` from 3.21.7 to 3.21.8
- Bumps `protobuf-java` from 3.21.7 to 3.21.8 ([#4886](https://github.com/opensearch-project/OpenSearch/pull/4886))
- Upgrade netty to 4.1.84.Final ([#4893](https://github.com/opensearch-project/OpenSearch/pull/4893))
- Dependency updates: asm 9.3 -> 9.4, bytebuddy 1.12.12 -> 1.12.18 ([#4889](https://github.com/opensearch-project/OpenSearch/pull/4889))

### Changed
- Dependency updates (httpcore, mockito, slf4j, httpasyncclient, commons-codec) ([#4308](https://github.com/opensearch-project/OpenSearch/pull/4308))
Expand Down
6 changes: 4 additions & 2 deletions buildSrc/version.properties
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,10 @@ jackson_databind = 2.13.4.2
snakeyaml = 1.32
icu4j = 70.1
supercsv = 2.4.0
# Update to 2.17.2+ is breaking OpenSearchJsonLayout (see https://issues.apache.org/jira/browse/LOG4J2-3562)
log4j = 2.17.1
slf4j = 1.7.36
asm = 9.3
asm = 9.4
jettison = 1.5.1

# when updating the JNA version, also update the version in buildSrc/build.gradle
Expand Down Expand Up @@ -45,9 +46,10 @@ bouncycastle=1.70
randomizedrunner = 2.7.1
junit = 4.13.2
hamcrest = 2.1
# Update to 4.8.0 is using reflection without SecurityManager checks (fails with java.security.AccessControlException)
mockito = 4.7.0
objenesis = 3.2
bytebuddy = 1.12.12
bytebuddy = 1.12.18

# benchmark dependencies
jmh = 1.35
1 change: 0 additions & 1 deletion modules/lang-expression/licenses/asm-9.3.jar.sha1

This file was deleted.

1 change: 1 addition & 0 deletions modules/lang-expression/licenses/asm-9.4.jar.sha1
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
b4e0e2d2e023aa317b7cfcfc916377ea348e07d1
1 change: 0 additions & 1 deletion modules/lang-expression/licenses/asm-commons-9.3.jar.sha1

This file was deleted.

1 change: 1 addition & 0 deletions modules/lang-expression/licenses/asm-commons-9.4.jar.sha1
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
8fc2810ddbcbbec0a8bbccb3f8eda58321839912
1 change: 0 additions & 1 deletion modules/lang-expression/licenses/asm-tree-9.3.jar.sha1

This file was deleted.

1 change: 1 addition & 0 deletions modules/lang-expression/licenses/asm-tree-9.4.jar.sha1
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
a99175a17d7fdc18cbcbd0e8ea6a5d276844190a
1 change: 0 additions & 1 deletion modules/lang-painless/licenses/asm-9.3.jar.sha1

This file was deleted.

1 change: 1 addition & 0 deletions modules/lang-painless/licenses/asm-9.4.jar.sha1
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
b4e0e2d2e023aa317b7cfcfc916377ea348e07d1
1 change: 0 additions & 1 deletion modules/lang-painless/licenses/asm-analysis-9.3.jar.sha1

This file was deleted.

1 change: 1 addition & 0 deletions modules/lang-painless/licenses/asm-analysis-9.4.jar.sha1
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
0a5fec9dfc039448d4fd098fbaffcaf55373b223
1 change: 0 additions & 1 deletion modules/lang-painless/licenses/asm-commons-9.3.jar.sha1

This file was deleted.

1 change: 1 addition & 0 deletions modules/lang-painless/licenses/asm-commons-9.4.jar.sha1
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
8fc2810ddbcbbec0a8bbccb3f8eda58321839912
1 change: 0 additions & 1 deletion modules/lang-painless/licenses/asm-tree-9.3.jar.sha1

This file was deleted.

1 change: 1 addition & 0 deletions modules/lang-painless/licenses/asm-tree-9.4.jar.sha1
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
a99175a17d7fdc18cbcbd0e8ea6a5d276844190a
1 change: 0 additions & 1 deletion modules/lang-painless/licenses/asm-util-9.3.jar.sha1

This file was deleted.

1 change: 1 addition & 0 deletions modules/lang-painless/licenses/asm-util-9.4.jar.sha1
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
ab1e0a84b72561dbaf1ee260321e72148ebf4b19
Original file line number Diff line number Diff line change
Expand Up @@ -120,38 +120,53 @@ public void testDecommissionStatusUpdatePublishedToAllNodes() throws ExecutionEx

logger.info("--> starting decommissioning nodes in zone {}", 'c');
DecommissionAttribute decommissionAttribute = new DecommissionAttribute("zone", "c");
// Set the timeout to 0 to do immediate Decommission
DecommissionRequest decommissionRequest = new DecommissionRequest(decommissionAttribute);
decommissionRequest.setNoDelay(true);
DecommissionResponse decommissionResponse = client().execute(DecommissionAction.INSTANCE, decommissionRequest).get();
assertTrue(decommissionResponse.isAcknowledged());

logger.info("--> Received decommissioning nodes in zone {}", 'c');
// Keep some delay for scheduler to invoke decommission flow
Thread.sleep(500);

// Will wait for all events to complete
client().admin().cluster().prepareHealth().setWaitForEvents(Priority.LANGUID).get();

logger.info("--> Received LANGUID event");

// assert that decommission status is successful
GetDecommissionStateResponse response = client().execute(
GetDecommissionStateResponse response = client(clusterManagerNodes.get(0)).execute(
GetDecommissionStateAction.INSTANCE,
new GetDecommissionStateRequest(decommissionAttribute.attributeName())
).get();
assertEquals(response.getAttributeValue(), decommissionAttribute.attributeValue());
assertEquals(response.getDecommissionStatus(), DecommissionStatus.SUCCESSFUL);
assertEquals(DecommissionStatus.SUCCESSFUL, response.getDecommissionStatus());

logger.info("--> Decommission status is successful");
ClusterState clusterState = client(clusterManagerNodes.get(0)).admin().cluster().prepareState().execute().actionGet().getState();
assertEquals(4, clusterState.nodes().getSize());

logger.info("--> Got cluster state with 4 nodes.");
// assert status on nodes that are part of cluster currently
Iterator<DiscoveryNode> discoveryNodeIterator = clusterState.nodes().getNodes().valuesIt();
DiscoveryNode clusterManagerNodeAfterDecommission = null;
while (discoveryNodeIterator.hasNext()) {
// assert no node has decommissioned attribute
DiscoveryNode node = discoveryNodeIterator.next();
assertNotEquals(node.getAttributes().get("zone"), "c");

if (node.isClusterManagerNode()) {
clusterManagerNodeAfterDecommission = node;
}
// assert all the nodes has status as SUCCESSFUL
ClusterService localNodeClusterService = internalCluster().getInstance(ClusterService.class, node.getName());
assertEquals(
localNodeClusterService.state().metadata().decommissionAttributeMetadata().status(),
DecommissionStatus.SUCCESSFUL
);
}
assertNotNull("Cluster Manager not found after decommission", clusterManagerNodeAfterDecommission);
logger.info("--> Cluster Manager node found after decommission");

// assert status on decommissioned node
// Here we will verify that until it got kicked out, it received appropriate status updates
Expand All @@ -163,16 +178,18 @@ public void testDecommissionStatusUpdatePublishedToAllNodes() throws ExecutionEx
decommissionedNodeClusterService.state().metadata().decommissionAttributeMetadata().status(),
DecommissionStatus.IN_PROGRESS
);
logger.info("--> Verified the decommissioned node Has in progress state.");

// Will wait for all events to complete
client().admin().cluster().prepareHealth().setWaitForEvents(Priority.LANGUID).get();

client(clusterManagerNodeAfterDecommission.getName()).admin().cluster().prepareHealth().setWaitForEvents(Priority.LANGUID).get();
logger.info("--> Got LANGUID event");
// Recommissioning the zone back to gracefully succeed the test once above tests succeeds
DeleteDecommissionStateResponse deleteDecommissionStateResponse = client(clusterManagerNodes.get(0)).execute(
DeleteDecommissionStateResponse deleteDecommissionStateResponse = client(clusterManagerNodeAfterDecommission.getName()).execute(
DeleteDecommissionStateAction.INSTANCE,
new DeleteDecommissionStateRequest()
).get();
assertTrue(deleteDecommissionStateResponse.isAcknowledged());
logger.info("--> Deleting decommission done.");

// will wait for cluster to stabilise with a timeout of 2 min (findPeerInterval for decommissioned nodes)
// as by then all nodes should have joined the cluster
Expand Down Expand Up @@ -201,6 +218,7 @@ public void testDecommissionFailedWhenAttributeNotWeighedAway() throws Exception

DecommissionAttribute decommissionAttribute = new DecommissionAttribute("zone", "c");
DecommissionRequest decommissionRequest = new DecommissionRequest(decommissionAttribute);
decommissionRequest.setNoDelay(true);
assertBusy(() -> {
DecommissioningFailedException ex = expectThrows(
DecommissioningFailedException.class,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import org.opensearch.common.Strings;
import org.opensearch.common.io.stream.StreamInput;
import org.opensearch.common.io.stream.StreamOutput;
import org.opensearch.common.unit.TimeValue;

import java.io.IOException;

Expand All @@ -28,8 +29,15 @@
*/
public class DecommissionRequest extends ClusterManagerNodeRequest<DecommissionRequest> {

public static final TimeValue DEFAULT_NODE_DRAINING_TIMEOUT = TimeValue.timeValueSeconds(120);

private DecommissionAttribute decommissionAttribute;

private TimeValue delayTimeout = DEFAULT_NODE_DRAINING_TIMEOUT;

// holder for no_delay param. To avoid draining time timeout.
private boolean noDelay = false;

public DecommissionRequest() {}

public DecommissionRequest(DecommissionAttribute decommissionAttribute) {
Expand All @@ -39,12 +47,14 @@ public DecommissionRequest(DecommissionAttribute decommissionAttribute) {
public DecommissionRequest(StreamInput in) throws IOException {
super(in);
decommissionAttribute = new DecommissionAttribute(in);
this.delayTimeout = in.readTimeValue();
}

@Override
public void writeTo(StreamOutput out) throws IOException {
super.writeTo(out);
decommissionAttribute.writeTo(out);
out.writeTimeValue(delayTimeout);
}

/**
Expand All @@ -65,6 +75,19 @@ public DecommissionAttribute getDecommissionAttribute() {
return this.decommissionAttribute;
}

public TimeValue getDelayTimeout() {
return this.delayTimeout;
}

public void setNoDelay(boolean noDelay) {
this.delayTimeout = TimeValue.ZERO;
this.noDelay = noDelay;
}

public boolean isNoDelay() {
return noDelay;
}

@Override
public ActionRequestValidationException validate() {
ActionRequestValidationException validationException = null;
Expand All @@ -74,6 +97,14 @@ public ActionRequestValidationException validate() {
if (decommissionAttribute.attributeValue() == null || Strings.isEmpty(decommissionAttribute.attributeValue())) {
validationException = addValidationError("attribute value is missing", validationException);
}
// This validation should not fail since we are not allowing delay timeout to be set externally.
// Still keeping it for double check.
if (noDelay && delayTimeout.getSeconds() > 0) {
final String validationMessage = "Invalid decommission request. no_delay is true and delay_timeout is set to "
+ delayTimeout.getSeconds()
+ "] Seconds";
validationException = addValidationError(validationMessage, validationException);
}
return validationException;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,6 @@ protected ClusterBlockException checkBlock(DecommissionRequest request, ClusterS
protected void clusterManagerOperation(DecommissionRequest request, ClusterState state, ActionListener<DecommissionResponse> listener)
throws Exception {
logger.info("starting awareness attribute [{}] decommissioning", request.getDecommissionAttribute().toString());
decommissionService.startDecommissionAction(request.getDecommissionAttribute(), listener);
decommissionService.startDecommissionAction(request, listener);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import java.io.IOException;
import java.util.EnumSet;
import java.util.Objects;
import java.util.Set;

/**
* Contains metadata about decommission attribute
Expand Down Expand Up @@ -88,11 +89,14 @@ public synchronized void validateNewStatus(DecommissionStatus newStatus) {
}
// We don't expect that INIT will be new status, as it is registered only when starting the decommission action
switch (newStatus) {
case DRAINING:
validateStatus(Set.of(DecommissionStatus.INIT), newStatus);
break;
case IN_PROGRESS:
validateStatus(DecommissionStatus.INIT, newStatus);
validateStatus(Set.of(DecommissionStatus.DRAINING, DecommissionStatus.INIT), newStatus);
break;
case SUCCESSFUL:
validateStatus(DecommissionStatus.IN_PROGRESS, newStatus);
validateStatus(Set.of(DecommissionStatus.IN_PROGRESS), newStatus);
break;
default:
throw new IllegalArgumentException(
Expand All @@ -101,17 +105,17 @@ public synchronized void validateNewStatus(DecommissionStatus newStatus) {
}
}

private void validateStatus(DecommissionStatus expected, DecommissionStatus next) {
if (status.equals(expected) == false) {
private void validateStatus(Set<DecommissionStatus> expectedStatuses, DecommissionStatus next) {
if (expectedStatuses.contains(status) == false) {
assert false : "can't move decommission status to ["
+ next
+ "]. current status: ["
+ status
+ "] (expected ["
+ expected
+ "] (allowed statuses ["
+ expectedStatuses
+ "])";
throw new IllegalStateException(
"can't move decommission status to [" + next + "]. current status: [" + status + "] (expected [" + expected + "])"
"can't move decommission status to [" + next + "]. current status: [" + status + "] (expected [" + expectedStatuses + "])"
);
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,10 @@
import org.opensearch.action.admin.cluster.configuration.ClearVotingConfigExclusionsAction;
import org.opensearch.action.admin.cluster.configuration.ClearVotingConfigExclusionsRequest;
import org.opensearch.action.admin.cluster.configuration.ClearVotingConfigExclusionsResponse;
import org.opensearch.action.admin.cluster.node.stats.NodeStats;
import org.opensearch.action.admin.cluster.node.stats.NodesStatsAction;
import org.opensearch.action.admin.cluster.node.stats.NodesStatsRequest;
import org.opensearch.action.admin.cluster.node.stats.NodesStatsResponse;
import org.opensearch.cluster.ClusterState;
import org.opensearch.cluster.ClusterStateObserver;
import org.opensearch.cluster.ClusterStateTaskConfig;
Expand All @@ -32,14 +36,17 @@
import org.opensearch.common.Strings;
import org.opensearch.common.io.stream.StreamInput;
import org.opensearch.common.unit.TimeValue;
import org.opensearch.http.HttpStats;
import org.opensearch.threadpool.ThreadPool;
import org.opensearch.transport.TransportException;
import org.opensearch.transport.TransportResponseHandler;
import org.opensearch.transport.TransportService;

import java.io.IOException;
import java.util.Arrays;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.function.Predicate;
Expand Down Expand Up @@ -271,4 +278,61 @@ public void clusterStateProcessed(String source, ClusterState oldState, ClusterS
}
});
}

private void logActiveConnections(NodesStatsResponse nodesStatsResponse) {
if (nodesStatsResponse == null || nodesStatsResponse.getNodes() == null) {
logger.info("Node stats response received is null/empty.");
return;
}

Map<String, Long> nodeActiveConnectionMap = new HashMap<>();
List<NodeStats> responseNodes = nodesStatsResponse.getNodes();
for (int i = 0; i < responseNodes.size(); i++) {
HttpStats httpStats = responseNodes.get(i).getHttp();
DiscoveryNode node = responseNodes.get(i).getNode();
nodeActiveConnectionMap.put(node.getId(), httpStats.getServerOpen());
}
logger.info("Decommissioning node with connections : [{}]", nodeActiveConnectionMap);
}

void getActiveRequestCountOnDecommissionedNodes(Set<DiscoveryNode> decommissionedNodes) {
if (decommissionedNodes == null || decommissionedNodes.isEmpty()) {
return;
}
String[] nodes = decommissionedNodes.stream().map(DiscoveryNode::getId).toArray(String[]::new);
if (nodes.length == 0) {
return;
}

final NodesStatsRequest nodesStatsRequest = new NodesStatsRequest(nodes);
nodesStatsRequest.clear();
nodesStatsRequest.addMetric(NodesStatsRequest.Metric.HTTP.metricName());

transportService.sendRequest(
transportService.getLocalNode(),
NodesStatsAction.NAME,
nodesStatsRequest,
new TransportResponseHandler<NodesStatsResponse>() {
@Override
public void handleResponse(NodesStatsResponse response) {
logActiveConnections(response);
}

@Override
public void handleException(TransportException exp) {
logger.error("Failure occurred while dumping connection for decommission nodes - ", exp.unwrapCause());
}

@Override
public String executor() {
return ThreadPool.Names.SAME;
}

@Override
public NodesStatsResponse read(StreamInput in) throws IOException {
return new NodesStatsResponse(in);
}
}
);
}
}
Loading

0 comments on commit a1b6ad6

Please sign in to comment.