Skip to content

[8.0] Return 200 OK response code for a cluster health timeout (#78968) #80464

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Nov 8, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -334,14 +334,9 @@ public void testClusterHealthNotFoundIndex() throws IOException {

assertThat(response, notNullValue());
assertThat(response.isTimedOut(), equalTo(true));
assertThat(response.status(), equalTo(RestStatus.REQUEST_TIMEOUT));
assertThat(response.status(), equalTo(RestStatus.OK));
assertThat(response.getStatus(), equalTo(ClusterHealthStatus.RED));
assertNoIndices(response);
assertCriticalWarnings(
"The HTTP status code for a cluster health timeout will be changed from 408 to 200 in a "
+ "future version. Set the [return_200_for_cluster_health_timeout] query parameter to [true] to suppress this message and "
+ "opt in to the future behaviour now."
);
}

public void testRemoteInfo() throws Exception {
Expand Down
20 changes: 20 additions & 0 deletions docs/changelog/78968.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
pr: 78968
summary: HTTP Status code has changed for the Cluster Health API in case of a server timeout
area: CRUD
type: breaking
issues: []
breaking:
title: HTTP Status code has changed for the Cluster Health API in case of a server timeout
area: API
details: |-
The cluster health API includes options for waiting
for certain health conditions to be satisfied. If the requested conditions are
not satisfied within a timeout then ES will send back a normal response
including the field `"timed_out": true`. In earlier versions it would also use
the HTTP response code `408 Request timeout` if the request timed out, and `200
OK` otherwise. The `408 Request timeout` response code is not appropriate for
this situation, so from version 8.0.0 ES will use the response code `200 OK`
for both cases.
impact: |-
To detect a server timeout, check the `timed_out` field of the JSON response.
notable: true
17 changes: 17 additions & 0 deletions docs/reference/migration/migrate_8_0/cluster.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -32,4 +32,21 @@ time out.
*Impact* +
Do not set `cluster.join.timeout` in your `elasticsearch.yml` file.
====

.HTTP Status code has changed for the Cluster Health API in case of a server timeout.
[%collapsible]
====
*Details* +
The {ref}/cluster-health.html[cluster health API] includes options for waiting
for certain health conditions to be satisfied. If the requested conditions are
not satisfied within a timeout then {es} will send back a normal response
including the field `"timed_out": true`. In earlier versions it would also use
the HTTP response code `408 Request timeout` if the request timed out, and `200
OK` otherwise. The `408 Request timeout` response code is not appropriate for
this situation, so from version 8.0.0 {es} will use the response code `200 OK`
for both cases.

*Impact* +
To detect a server timeout, check the `timed_out` field of the JSON response.
====
// end::notable-breaking-changes[]
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
---
"cluster health request timeout on waiting for nodes":
- skip:
version: " - 8.0.99"
reason: "Set for 7.99.99 when back-ported to 8.0"
- do:
catch: request_timeout
cluster.health:
wait_for_nodes: 10
timeout: 1ms
Expand All @@ -19,8 +21,10 @@

---
"cluster health request timeout waiting for active shards":
- skip:
version: " - 8.0.99"
reason: "Set for 7.99.99 when back-ported to 8.0"
- do:
catch: request_timeout
cluster.health:
timeout: 1ms
wait_for_active_shards: 5
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,10 @@
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.logging.DeprecationLogger;
import org.elasticsearch.common.xcontent.StatusToXContentObject;
import org.elasticsearch.core.RestApiVersion;
import org.elasticsearch.core.TimeValue;
import org.elasticsearch.rest.RestStatus;
import org.elasticsearch.rest.action.search.RestSearchAction;
import org.elasticsearch.xcontent.ConstructingObjectParser;
import org.elasticsearch.xcontent.ObjectParser;
import org.elasticsearch.xcontent.ParseField;
Expand Down Expand Up @@ -57,7 +56,6 @@ public class ClusterHealthResponse extends ActionResponse implements StatusToXCo
private static final String INITIALIZING_SHARDS = "initializing_shards";
private static final String UNASSIGNED_SHARDS = "unassigned_shards";
private static final String INDICES = "indices";
private static final DeprecationLogger deprecationLogger = DeprecationLogger.getLogger(RestSearchAction.class);

private static final ConstructingObjectParser<ClusterHealthResponse, Void> PARSER = new ConstructingObjectParser<>(
"cluster_health_response",
Expand Down Expand Up @@ -122,12 +120,6 @@ public class ClusterHealthResponse extends ActionResponse implements StatusToXCo
XContentParser parser,
Void context,
String index) -> ClusterIndexHealth.innerFromXContent(parser, index);
static final String ES_CLUSTER_HEALTH_REQUEST_TIMEOUT_200_KEY = "return_200_for_cluster_health_timeout";
static final String CLUSTER_HEALTH_REQUEST_TIMEOUT_DEPRECATION_MSG = "The HTTP status code for a cluster health timeout "
+ "will be changed from 408 to 200 in a future version. Set the ["
+ ES_CLUSTER_HEALTH_REQUEST_TIMEOUT_200_KEY
+ "] "
+ "query parameter to [true] to suppress this message and opt in to the future behaviour now.";

static {
// ClusterStateHealth fields
Expand Down Expand Up @@ -351,15 +343,16 @@ public String toString() {

@Override
public RestStatus status() {
if (isTimedOut() == false) {
return RestStatus.OK;
}
if (return200ForClusterHealthTimeout) {
return RestStatus.OK;
} else {
deprecationLogger.compatibleCritical("cluster_health_request_timeout", CLUSTER_HEALTH_REQUEST_TIMEOUT_DEPRECATION_MSG);
return status(RestApiVersion.current());
}

@Override
public RestStatus status(RestApiVersion restApiVersion) {
// Legacy behaviour
if (isTimedOut() && restApiVersion == RestApiVersion.V_7 && return200ForClusterHealthTimeout == false) {
return RestStatus.REQUEST_TIMEOUT;
}
return RestStatus.OK;
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
*/
package org.elasticsearch.common.xcontent;

import org.elasticsearch.core.RestApiVersion;
import org.elasticsearch.rest.RestStatus;
import org.elasticsearch.xcontent.ToXContentObject;

Expand All @@ -20,4 +21,8 @@ public interface StatusToXContentObject extends ToXContentObject {
* Returns the REST status to make sure it is returned correctly
*/
RestStatus status();

default RestStatus status(RestApiVersion restApiVersion) {
return status();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ public RestStatusToXContentListener(RestChannel channel, Function<Response, Stri
public RestResponse buildResponse(Response response, XContentBuilder builder) throws Exception {
assert response.isFragment() == false; // would be nice if we could make default methods final
response.toXContent(builder, channel.request());
RestResponse restResponse = new BytesRestResponse(response.status(), builder);
RestResponse restResponse = new BytesRestResponse(response.status(builder.getRestApiVersion()), builder);
if (RestStatus.CREATED == restResponse.status()) {
final String location = extractLocation.apply(response);
if (location != null) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,12 @@
import org.elasticsearch.cluster.health.ClusterHealthStatus;
import org.elasticsearch.common.Priority;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.logging.DeprecationCategory;
import org.elasticsearch.common.logging.DeprecationLogger;
import org.elasticsearch.rest.BaseRestHandler;
import org.elasticsearch.rest.RestRequest;
import org.elasticsearch.rest.action.RestStatusToXContentListener;
import org.elasticsearch.rest.action.search.RestSearchAction;

import java.io.IOException;
import java.util.Collections;
Expand All @@ -30,6 +33,12 @@

public class RestClusterHealthAction extends BaseRestHandler {

private static final DeprecationLogger deprecationLogger = DeprecationLogger.getLogger(RestSearchAction.class);
private static final String RETURN_200_FOR_CLUSTER_HEALTH_TIMEOUT = "return_200_for_cluster_health_timeout";
private static final String CLUSTER_HEALTH_REQUEST_TIMEOUT_DEPRECATION_MSG = "the ["
+ RETURN_200_FOR_CLUSTER_HEALTH_TIMEOUT
+ "] parameter is deprecated and will be removed in a future release.";

@Override
public List<Route> routes() {
return List.of(new Route(GET, "/_cluster/health"), new Route(GET, "/_cluster/health/{index}"));
Expand Down Expand Up @@ -81,9 +90,15 @@ public static ClusterHealthRequest fromRequest(final RestRequest request) {
if (request.param("wait_for_events") != null) {
clusterHealthRequest.waitForEvents(Priority.valueOf(request.param("wait_for_events").toUpperCase(Locale.ROOT)));
}
clusterHealthRequest.return200ForClusterHealthTimeout(
request.paramAsBoolean("return_200_for_cluster_health_timeout", clusterHealthRequest.doesReturn200ForClusterHealthTimeout())
);
String return200ForClusterHealthTimeout = request.param(RETURN_200_FOR_CLUSTER_HEALTH_TIMEOUT);
if (return200ForClusterHealthTimeout != null) {
deprecationLogger.warn(
DeprecationCategory.API,
"cluster_health_request_timeout",
CLUSTER_HEALTH_REQUEST_TIMEOUT_DEPRECATION_MSG
);
}
clusterHealthRequest.return200ForClusterHealthTimeout(Boolean.parseBoolean(return200ForClusterHealthTimeout));
return clusterHealthRequest;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.Writeable;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.core.RestApiVersion;
import org.elasticsearch.core.TimeValue;
import org.elasticsearch.rest.RestStatus;
import org.elasticsearch.test.AbstractSerializingTestCase;
Expand All @@ -43,24 +44,39 @@
public class ClusterHealthResponsesTests extends AbstractSerializingTestCase<ClusterHealthResponse> {
private final ClusterHealthRequest.Level level = randomFrom(ClusterHealthRequest.Level.values());

public void testIsTimeout() {
public void testIsTimeoutReturns200ByDefault() {
ClusterHealthResponse res = new ClusterHealthResponse("", Strings.EMPTY_ARRAY, ClusterState.EMPTY_STATE, false);
for (int i = 0; i < 5; i++) {
res.setTimedOut(randomBoolean());
if (res.isTimedOut()) {
assertEquals(RestStatus.REQUEST_TIMEOUT, res.status());
assertCriticalWarnings(ClusterHealthResponse.CLUSTER_HEALTH_REQUEST_TIMEOUT_DEPRECATION_MSG);
} else {
assertEquals(RestStatus.OK, res.status());
}
assertEquals(RestStatus.OK, res.status(RestApiVersion.V_8));
}
}

public void testTimeoutReturns200IfOptedIn() {
ClusterHealthResponse res = new ClusterHealthResponse("", Strings.EMPTY_ARRAY, ClusterState.EMPTY_STATE, true);
for (int i = 0; i < 5; i++) {
res.setTimedOut(randomBoolean());
assertEquals(RestStatus.OK, res.status());
assertEquals(RestStatus.OK, res.status(RestApiVersion.V_8));
}
}

public void testTimeoutReturns200InIfOptedInV7CompatibilityMode() {
ClusterHealthResponse res = new ClusterHealthResponse("", Strings.EMPTY_ARRAY, ClusterState.EMPTY_STATE, true);
for (int i = 0; i < 5; i++) {
res.setTimedOut(randomBoolean());
assertEquals(RestStatus.OK, res.status(RestApiVersion.V_7));
}
}

public void testTimeoutReturns408InV7CompatibilityMode() {
ClusterHealthResponse res = new ClusterHealthResponse("", Strings.EMPTY_ARRAY, ClusterState.EMPTY_STATE, false);
for (int i = 0; i < 5; i++) {
res.setTimedOut(randomBoolean());
if (res.isTimedOut()) {
assertEquals(RestStatus.REQUEST_TIMEOUT, res.status(RestApiVersion.V_7));
} else {
assertEquals(RestStatus.OK, res.status(RestApiVersion.V_7));
}
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

package org.elasticsearch.rest.action.admin.cluster;

import org.apache.logging.log4j.Level;
import org.elasticsearch.action.admin.cluster.health.ClusterHealthRequest;
import org.elasticsearch.action.support.ActiveShardCount;
import org.elasticsearch.cluster.health.ClusterHealthStatus;
Expand Down Expand Up @@ -68,6 +69,14 @@ public void testFromRequest() {
assertThat(clusterHealthRequest.waitForNodes(), equalTo(waitForNodes));
assertThat(clusterHealthRequest.waitForEvents(), equalTo(waitForEvents));
assertThat(clusterHealthRequest.doesReturn200ForClusterHealthTimeout(), equalTo(requestTimeout200));

assertWarnings(
true,
new DeprecationWarning(
Level.WARN,
"the [return_200_for_cluster_health_timeout] parameter is deprecated and will be removed in a future release."
)
);
}

private FakeRestRequest buildRestRequest(Map<String, String> params) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,6 @@ teardown:

# this is a hacky way to sleep for 5s, since we will never have 10 nodes
- do:
catch: request_timeout
cluster.health:
wait_for_nodes: 10
timeout: "5s"
Expand Down Expand Up @@ -286,7 +285,6 @@ teardown:

# this is a hacky way to sleep for 5s, since we will never have 10 nodes
- do:
catch: request_timeout
cluster.health:
wait_for_nodes: 10
timeout: "5s"
Expand Down