Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),

## [Unreleased 2.x]
### Added
- Add request param in _cluster/stats API for including/excluding mapping stats and analysis stats in response. ([#15377](https://github.com/opensearch-project/OpenSearch/pull/15377))
- [Offline Nodes] Adds offline-tasks library containing various interfaces to be used for Offline Background Tasks. ([#13574](https://github.com/opensearch-project/OpenSearch/pull/13574))
- Fix for hasInitiatedFetching to fix allocation explain and manual reroute APIs (([#14972](https://github.com/opensearch-project/OpenSearch/pull/14972))
- [Workload Management] Add queryGroupId to Task ([14708](https://github.com/opensearch-project/OpenSearch/pull/14708))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -384,6 +384,49 @@ public void testFieldTypes() {
}
}

public void testIndicesStatsAnalysisAndMappingStatsInclusion() {
internalCluster().startNode();
ensureGreen();

client().admin().indices().prepareCreate("test1").setMapping("{\"properties\":{\"foo\":{\"type\": \"keyword\"}}}").get();
client().admin()
.indices()
.prepareCreate("test2")
.setMapping(
"{\"properties\":{\"foo\":{\"type\": \"keyword\"},\"bar\":{\"properties\":{\"baz\":{\"type\":\"keyword\"},"
+ "\"eggplant\":{\"type\":\"integer\"}}}}}"
)
.get();

ClusterStatsResponse response = client().admin()
.cluster()
.prepareClusterStats()
.useAggregatedNodeLevelResponses(randomBoolean())
.includeMappingStats(false)
.includeAnalysisStats(false)
.get();
assertNull(response.getIndicesStats().getMappings());
assertNull(response.getIndicesStats().getAnalysis());

response = client().admin().cluster().prepareClusterStats().useAggregatedNodeLevelResponses(randomBoolean()).get();
assertNotNull(response.getIndicesStats().getAnalysis());
assertThat(response.getIndicesStats().getMappings().getFieldTypeStats().size(), equalTo(3));
Set<IndexFeatureStats> stats = response.getIndicesStats().getMappings().getFieldTypeStats();
for (IndexFeatureStats stat : stats) {
switch (stat.getName()) {
case "integer":
assertThat(stat.getCount(), greaterThanOrEqualTo(1));
break;
case "keyword":
assertThat(stat.getCount(), greaterThanOrEqualTo(3));
break;
case "object":
assertThat(stat.getCount(), greaterThanOrEqualTo(1));
break;
}
}
}

public void testNodeRolesWithMasterLegacySettings() throws ExecutionException, InterruptedException {
int total = 1;
Settings legacyMasterSettings = Settings.builder()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,10 @@
@PublicApi(since = "1.0.0")
public class ClusterStatsRequest extends BaseNodesRequest<ClusterStatsRequest> {

private boolean includeMappingStats = true;

private boolean includeAnalysisStats = true;

public ClusterStatsRequest(StreamInput in) throws IOException {
super(in);
if (in.getVersion().onOrAfter(Version.V_2_16_0)) {
Expand All @@ -73,6 +77,22 @@ public void useAggregatedNodeLevelResponses(boolean useAggregatedNodeLevelRespon
this.useAggregatedNodeLevelResponses = useAggregatedNodeLevelResponses;
}

public void setIncludeMappingStats(boolean setIncludeMappingStats) {
this.includeMappingStats = setIncludeMappingStats;
}

public boolean isIncludeMappingStats() {
return includeMappingStats;
}

public void setIncludeAnalysisStats(boolean setIncludeAnalysisStats) {
this.includeAnalysisStats = setIncludeAnalysisStats;
}

public boolean isIncludeAnalysisStats() {
return includeAnalysisStats;
}

@Override
public void writeTo(StreamOutput out) throws IOException {
super.writeTo(out);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,4 +55,14 @@ public final ClusterStatsRequestBuilder useAggregatedNodeLevelResponses(boolean
request.useAggregatedNodeLevelResponses(useAggregatedNodeLevelResponses);
return this;
}

public ClusterStatsRequestBuilder includeMappingStats(boolean value) {
request.setIncludeMappingStats(value);
return this;
}

public ClusterStatsRequestBuilder includeAnalysisStats(boolean value) {
request.setIncludeAnalysisStats(value);
return this;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,35 @@
this.status = status;
}

public ClusterStatsResponse(
long timestamp,
String clusterUUID,
ClusterName clusterName,
List<ClusterStatsNodeResponse> nodes,
List<FailedNodeException> failures,
ClusterState state,
ClusterStatsRequest request
) {
super(clusterName, nodes, failures);
this.clusterUUID = clusterUUID;
this.timestamp = timestamp;
nodesStats = new ClusterStatsNodes(nodes);
indicesStats = new ClusterStatsIndices(
nodes,
request.isIncludeMappingStats() ? MappingStats.of(state) : null,
request.isIncludeAnalysisStats() ? AnalysisStats.of(state) : null
);
ClusterHealthStatus status = null;
for (ClusterStatsNodeResponse response : nodes) {
// only the cluster-manager node populates the status
if (response.clusterStatus() != null) {
status = response.clusterStatus();
break;
}
}

Check warning on line 133 in server/src/main/java/org/opensearch/action/admin/cluster/stats/ClusterStatsResponse.java

View check run for this annotation

Codecov / codecov/patch

server/src/main/java/org/opensearch/action/admin/cluster/stats/ClusterStatsResponse.java#L133

Added line #L133 was not covered by tests
this.status = status;
}

public String getClusterUUID() {
return this.clusterUUID;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,8 @@ protected ClusterStatsResponse newResponse(
clusterService.getClusterName(),
responses,
failures,
state
state,
request
);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,8 @@ public RestChannelConsumer prepareRequest(final RestRequest request, final NodeC
clusterStatsRequest.timeout(request.param("timeout"));
clusterStatsRequest.setIncludeDiscoveryNodes(false);
clusterStatsRequest.useAggregatedNodeLevelResponses(true);
clusterStatsRequest.setIncludeMappingStats(request.paramAsBoolean("include_mapping_stats", true));
clusterStatsRequest.setIncludeAnalysisStats(request.paramAsBoolean("include_analysis_stats", true));
Comment on lines +71 to +72
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

APIs like Index Stats, Node Stats already provide a way to handle filtering the required stats through URI Path. Is it possible to implement a similar logic here?

e.g. index stats /{index}/_stats/{metric}
e.g. node stats /_nodes/{nodeId}/stats/{metric}/{index_metric}

We can add a similar route like /_cluster/stats/{metrics}/{sub_metrics} where metrics can be nodes or indices or both. And sub metrics can be specific details within stats like mappings, analysis

This should help maintain a common pattern across different stats APIS

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for the inputs @mgodwan but the use-case here is little different. In the the URI path approach client can fetch stats for a specific metric whereas for this issue we want to add support for filtering out some information from the overall response that client does not need.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we want to add support for filtering out some information from the overall response that client does not need.

The motivation behind this PR seems to be #14447 which mentions Reduced Overhead and Fine-Grained Control as the benefits

Hence, opting-in to fetch required metrics still seem to be a way which we should explore to handle as it aligns with these benefits while keeping the API signature streamlined across different stats APIs.

e.g. if new fields are added to the cluster stats response in a new OS version, users may start to see the overhead similar to what is mentioned in the parent issue due to some new computation happening. Opt-in mechanism like URI path ensures users only get what they want from cluster stats if they are sensitive to performance of these APIs or are concerned about computation overhead of these on the cluster. And as existing APIs work, if no filter is provided, it defaults to ALL which means all stats should be returned, and continues to maintain a backward compatible way for this existing API as well.

Copy link
Contributor Author

@SwethaGuptha SwethaGuptha Aug 28, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I get the concerns, but I don't understand how we can use the URI paths to selective exclude some stats from the default response. For APIs like _cluster/health we support request params include_defaults to additionally fetch default settings in addition to the default response. Conversely, in this use-case request params is being used to exclude some information from the default response.

By default the value of the params is set to true which ensures the default behavior of API remains unchanged and is backward compatible. If the confusion is arising from the choice of param names, would exclude_mapping_stats and exclude_analysis_stats be a better logical choice?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@mgodwan I would prefer the uri based approach. But, like @SwethaGuptha mentioned we need to maintain backward compatibility so can't turn it off by default. Now, we can explore the URI approach where metric can excluded/ included in via sub-metric

Comment on lines +71 to +72
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Have we discussed these param names anywhere?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No there was no separate discussion for these param names. It can be part of this PR itself? open for suggestions.

return channel -> client.admin().cluster().clusterStats(clusterStatsRequest, new NodesResponseRestListener<>(channel));
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/

package org.opensearch.action.admin.cluster.stats;

import org.opensearch.test.OpenSearchTestCase;
import org.opensearch.test.client.NoOpClient;
import org.junit.After;
import org.junit.Before;

public class ClusterStatsRequestBuilderTests extends OpenSearchTestCase {

private NoOpClient testClient;

@Override
@Before
public void setUp() throws Exception {
super.setUp();
this.testClient = new NoOpClient(getTestName());
}

@Override
@After
public void tearDown() throws Exception {
this.testClient.close();
super.tearDown();
}

public void testUseAggregatedNodeLevelResponses() {
ClusterStatsRequestBuilder clusterStatsRequestBuilder = new ClusterStatsRequestBuilder(
this.testClient,
ClusterStatsAction.INSTANCE
);
clusterStatsRequestBuilder.useAggregatedNodeLevelResponses(false);
assertFalse(clusterStatsRequestBuilder.request().useAggregatedNodeLevelResponses());
}

public void testIncludeMappingStats() {
ClusterStatsRequestBuilder clusterStatsRequestBuilder = new ClusterStatsRequestBuilder(
this.testClient,
ClusterStatsAction.INSTANCE
);
clusterStatsRequestBuilder.includeMappingStats(false);
assertFalse(clusterStatsRequestBuilder.request().isIncludeMappingStats());
}

public void testIncludeAnalysisStats() {
ClusterStatsRequestBuilder clusterStatsRequestBuilder = new ClusterStatsRequestBuilder(
this.testClient,
ClusterStatsAction.INSTANCE
);
clusterStatsRequestBuilder.includeAnalysisStats(false);
assertFalse(clusterStatsRequestBuilder.request().isIncludeAnalysisStats());
}
}