Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add cluster state stats #10670

Merged
merged 12 commits into from
Oct 24, 2023
Prev Previous commit
Next Next commit
Add tests for failed count and cleanup attempt failed count
Signed-off-by: Aman Khare <amkhar@amazon.com>
  • Loading branch information
Aman Khare committed Oct 23, 2023
commit c8bfed45b627ca2ec73d71acdad8ef2e9d5fe076
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
import org.opensearch.cluster.ClusterState;
import org.opensearch.cluster.coordination.NoClusterManagerBlockService;
import org.opensearch.cluster.metadata.IndexMetadata;
import org.opensearch.cluster.service.ClusterStateStats;
import org.opensearch.common.settings.Settings;
import org.opensearch.common.unit.TimeValue;
import org.opensearch.core.xcontent.MediaTypeRegistry;
Expand Down Expand Up @@ -199,6 +200,8 @@ public void testIsolateClusterManagerAndVerifyClusterStateConsensus() throws Exc
}

}
ClusterStateStats clusterStateStats = internalCluster().clusterService().getClusterManagerService().getClusterStateStats();
assertTrue(clusterStateStats.getUpdateFailed() > 0);
});
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -866,13 +866,12 @@ protected void doStart() {

@Override
public DiscoveryStats stats() {
ClusterStateStats clusterStateStats = clusterManagerService.getStateStats();
ClusterStateStats clusterStateStats = clusterManagerService.getClusterStateStats();
ArrayList<PersistedStateStats> stats = new ArrayList<>();
Stream.of(PersistedStateRegistry.PersistedStateType.values()).forEach(stateType -> {
if (persistedStateRegistry.getPersistedState(stateType) != null) {
if (persistedStateRegistry.getPersistedState(stateType).getStats() != null) {
stats.add(persistedStateRegistry.getPersistedState(stateType).getStats());
}
if (persistedStateRegistry.getPersistedState(stateType) != null
&& persistedStateRegistry.getPersistedState(stateType).getStats() != null) {
stats.add(persistedStateRegistry.getPersistedState(stateType).getStats());
}
});
clusterStateStats.setPersistenceStats(stats);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ public PersistedStateStats(StreamInput in) throws IOException {
@Override
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
builder.startObject(statsName);
builder.field(Fields.UPDATE_COUNT, getSuccessCount());
builder.field(Fields.SUCCESS_COUNT, getSuccessCount());
builder.field(Fields.FAILED_COUNT, getFailedCount());
builder.field(Fields.TOTAL_TIME_IN_MILLIS, getTotalTimeInMillis());
if (extendedFields.size() > 0) {
Expand Down Expand Up @@ -119,7 +119,7 @@ protected void addToExtendedFields(String extendedField, AtomicLong extendedFiel
* @opensearch.internal
*/
static final class Fields {
static final String UPDATE_COUNT = "update_count";
static final String SUCCESS_COUNT = "success_count";
static final String TOTAL_TIME_IN_MILLIS = "total_time_in_millis";
static final String FAILED_COUNT = "failed_count";
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1001,7 +1001,7 @@ public <T> void submitStateUpdateTasks(
}
}

public ClusterStateStats getStateStats() {
public ClusterStateStats getClusterStateStats() {
return stateStats;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1011,7 +1011,7 @@ public static String encodeString(String content) {
}

public void writeMetadataFailed() {
remoteStateStats.stateFailed();
getRemoteClusterStateStats().stateFailed();
}

/**
Expand Down Expand Up @@ -1048,7 +1048,7 @@ public GlobalMetadataTransferException(String errorDesc, Throwable cause) {
* @param clusterName name of the cluster
* @param clusterUUIDs clusteUUIDs for which the remote state needs to be purged
*/
private void deleteStaleUUIDsClusterMetadata(String clusterName, List<String> clusterUUIDs) {
void deleteStaleUUIDsClusterMetadata(String clusterName, List<String> clusterUUIDs) {
clusterUUIDs.forEach(clusterUUID -> {
getBlobStoreTransferService().deleteAsync(
ThreadPool.Names.REMOTE_PURGE,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -692,8 +692,8 @@ public void clusterStateProcessed(String source, ClusterState oldState, ClusterS
);
}
// verify stats values after state is published
assertEquals(1, clusterManagerService.getStateStats().getUpdateSuccess());
assertEquals(0, clusterManagerService.getStateStats().getUpdateFailed());
assertEquals(1, clusterManagerService.getClusterStateStats().getUpdateSuccess());
assertEquals(0, clusterManagerService.getClusterStateStats().getUpdateFailed());
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@
import org.opensearch.gateway.PersistedClusterStateService.Writer;
import org.opensearch.gateway.remote.ClusterMetadataManifest;
import org.opensearch.gateway.remote.RemoteClusterStateService;
import org.opensearch.gateway.remote.RemotePersistenceStats;
import org.opensearch.index.recovery.RemoteStoreRestoreService;
import org.opensearch.index.recovery.RemoteStoreRestoreService.RemoteRestoreResult;
import org.opensearch.node.Node;
Expand Down Expand Up @@ -104,6 +105,7 @@
import static org.hamcrest.Matchers.nullValue;
import static org.mockito.ArgumentMatchers.any;
import static org.mockito.ArgumentMatchers.anyBoolean;
import static org.mockito.Mockito.doCallRealMethod;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.times;
import static org.mockito.Mockito.verify;
Expand Down Expand Up @@ -779,6 +781,26 @@ public void testRemotePersistedStateExceptionOnFullStateUpload() throws IOExcept
assertThrows(OpenSearchException.class, () -> remotePersistedState.setLastAcceptedState(clusterState));
}

public void testRemotePersistedStateFailureStats() throws IOException {
RemotePersistenceStats remoteStateStats = new RemotePersistenceStats();
final RemoteClusterStateService remoteClusterStateService = Mockito.mock(RemoteClusterStateService.class);
final String previousClusterUUID = "prev-cluster-uuid";
Mockito.doThrow(IOException.class).when(remoteClusterStateService).writeFullMetadata(Mockito.any(), Mockito.any());
when(remoteClusterStateService.getRemoteClusterStateStats()).thenReturn(remoteStateStats);
doCallRealMethod().when(remoteClusterStateService).writeMetadataFailed();
CoordinationState.PersistedState remotePersistedState = new RemotePersistedState(remoteClusterStateService, previousClusterUUID);

final long clusterTerm = randomNonNegativeLong();
final ClusterState clusterState = createClusterState(
randomNonNegativeLong(),
Metadata.builder().coordinationMetadata(CoordinationMetadata.builder().term(clusterTerm).build()).build()
);

assertThrows(OpenSearchException.class, () -> remotePersistedState.setLastAcceptedState(clusterState));
assertEquals(1, remoteClusterStateService.getRemoteClusterStateStats().getFailedCount());
assertEquals(0, remoteClusterStateService.getRemoteClusterStateStats().getSuccessCount());
}

public void testGatewayForRemoteState() throws IOException {
MockGatewayMetaState gateway = null;
try {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -977,24 +977,33 @@ public void testRemoteStateStats() throws IOException {
mockBlobStoreObjects();
remoteClusterStateService.start();
final ClusterMetadataManifest manifest = remoteClusterStateService.writeFullMetadata(clusterState, "prev-cluster-uuid");
final UploadedIndexMetadata uploadedIndexMetadata = new UploadedIndexMetadata("test-index", "index-uuid", "metadata-filename");
List<UploadedIndexMetadata> indices = List.of(uploadedIndexMetadata);

final ClusterMetadataManifest expectedManifest = ClusterMetadataManifest.builder()
.indices(indices)
.clusterTerm(1L)
.stateVersion(1L)
.stateUUID("state-uuid")
.clusterUUID("cluster-uuid")
.previousClusterUUID("prev-cluster-uuid")
.build();

assertTrue(remoteClusterStateService.getRemoteClusterStateStats() != null);
assertEquals(1, remoteClusterStateService.getRemoteClusterStateStats().getSuccessCount());
assertEquals(0, remoteClusterStateService.getRemoteClusterStateStats().getCleanupAttemptFailedCount());
assertEquals(0, remoteClusterStateService.getRemoteClusterStateStats().getFailedCount());
}

public void testRemoteStateCleanupFailureStats() throws IOException {
BlobContainer blobContainer = mock(BlobContainer.class);
doThrow(IOException.class).when(blobContainer).delete();
when(blobStore.blobContainer(any())).thenReturn(blobContainer);
BlobPath blobPath = new BlobPath().add("random-path");
when((blobStoreRepository.basePath())).thenReturn(blobPath);
remoteClusterStateService.start();
remoteClusterStateService.deleteStaleUUIDsClusterMetadata("cluster1", Arrays.asList("cluster-uuid1"));
try {
assertBusy(() -> {
// wait for stats to get updated
assertTrue(remoteClusterStateService.getRemoteClusterStateStats() != null);
assertEquals(0, remoteClusterStateService.getRemoteClusterStateStats().getSuccessCount());
assertEquals(1, remoteClusterStateService.getRemoteClusterStateStats().getCleanupAttemptFailedCount());
});
} catch (Exception e) {
throw new RuntimeException(e);
}
}

public void testFileNames() {
final Index index = new Index("test-index", "index-uuid");
final Settings idxSettings = Settings.builder()
Expand Down
Loading