Skip to content

Commit b1a7743

Browse files
authored
Separating remote download and publication stats (#16682)
* Separating remote download and publication stats Signed-off-by: Himshikha Gupta <himshikh@amazon.com>
1 parent 05513df commit b1a7743

File tree

7 files changed

+228
-167
lines changed

7 files changed

+228
-167
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
2020
- Add vertical scaling and SoftReference for snapshot repository data cache ([#16489](https://github.com/opensearch-project/OpenSearch/pull/16489))
2121
- Support prefix list for remote repository attributes([#16271](https://github.com/opensearch-project/OpenSearch/pull/16271))
2222
- Add new configuration setting `synonym_analyzer`, to the `synonym` and `synonym_graph` filters, enabling the specification of a custom analyzer for reading the synonym file ([#16488](https://github.com/opensearch-project/OpenSearch/pull/16488)).
23+
- Add stats for remote publication failure and move download failure stats to remote methods([#16682](https://github.com/opensearch-project/OpenSearch/pull/16682/))
2324

2425
### Dependencies
2526
- Bump `com.google.cloud:google-cloud-core-http` from 2.23.0 to 2.47.0 ([#16504](https://github.com/opensearch-project/OpenSearch/pull/16504))

server/src/main/java/org/opensearch/cluster/coordination/PublicationTransportHandler.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -298,9 +298,9 @@ PublishWithJoinResponse handleIncomingRemotePublishRequest(RemotePublishRequest
298298
}
299299
} catch (Exception e) {
300300
if (applyFullState) {
301-
remoteClusterStateService.fullDownloadFailed();
301+
remoteClusterStateService.fullIncomingPublicationFailed();
302302
} else {
303-
remoteClusterStateService.diffDownloadFailed();
303+
remoteClusterStateService.diffIncomingPublicationFailed();
304304
}
305305
throw e;
306306
}

server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateService.java

Lines changed: 174 additions & 148 deletions
Large diffs are not rendered by default.

server/src/main/java/org/opensearch/gateway/remote/RemoteDownloadStats.java

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,10 +20,13 @@
2020
public class RemoteDownloadStats extends PersistedStateStats {
2121
static final String CHECKSUM_VALIDATION_FAILED_COUNT = "checksum_validation_failed_count";
2222
private AtomicLong checksumValidationFailedCount = new AtomicLong(0);
23+
public static final String INCOMING_PUBLICATION_FAILED_COUNT = "incoming_publication_failed_count";
24+
private AtomicLong incomingPublicationFailedCount = new AtomicLong(0);
2325

2426
public RemoteDownloadStats(String statsName) {
2527
super(statsName);
2628
addToExtendedFields(CHECKSUM_VALIDATION_FAILED_COUNT, checksumValidationFailedCount);
29+
addToExtendedFields(INCOMING_PUBLICATION_FAILED_COUNT, incomingPublicationFailedCount);
2730
}
2831

2932
public void checksumValidationFailedCount() {
@@ -33,4 +36,12 @@ public void checksumValidationFailedCount() {
3336
public long getChecksumValidationFailedCount() {
3437
return checksumValidationFailedCount.get();
3538
}
39+
40+
public void incomingPublicationFailedCount() {
41+
incomingPublicationFailedCount.incrementAndGet();
42+
}
43+
44+
public long getIncomingPublicationFailedCount() {
45+
return incomingPublicationFailedCount.get();
46+
}
3647
}

server/src/main/java/org/opensearch/gateway/remote/RemotePersistenceStats.java

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,14 @@ public long getStateFullDownloadValidationFailed() {
106106
return remoteFullDownloadStats.getChecksumValidationFailedCount();
107107
}
108108

109+
public void stateDiffIncomingPublicationFailed() {
110+
remoteDiffDownloadStats.incomingPublicationFailedCount();
111+
}
112+
113+
public void stateFullIncomingPublicationFailed() {
114+
remoteFullDownloadStats.incomingPublicationFailedCount();
115+
}
116+
109117
public PersistedStateStats getUploadStats() {
110118
return remoteUploadStats;
111119
}

server/src/test/java/org/opensearch/cluster/coordination/PublicationTransportHandlerTests.java

Lines changed: 20 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@
5252
import org.opensearch.gateway.remote.ClusterMetadataManifest;
5353
import org.opensearch.gateway.remote.ClusterStateDiffManifest;
5454
import org.opensearch.gateway.remote.RemoteClusterStateService;
55+
import org.opensearch.gateway.remote.RemoteDownloadStats;
5556
import org.opensearch.node.Node;
5657
import org.opensearch.telemetry.tracing.noop.NoopTracer;
5758
import org.opensearch.test.OpenSearchTestCase;
@@ -64,10 +65,12 @@
6465
import java.util.Collections;
6566
import java.util.Map;
6667
import java.util.Optional;
68+
import java.util.concurrent.atomic.AtomicLong;
6769
import java.util.function.Function;
6870

6971
import org.mockito.Mockito;
7072

73+
import static org.opensearch.gateway.remote.RemoteDownloadStats.INCOMING_PUBLICATION_FAILED_COUNT;
7174
import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_CLUSTER_STATE_REPOSITORY_NAME_ATTRIBUTE_KEY;
7275
import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_ROUTING_TABLE_REPOSITORY_NAME_ATTRIBUTE_KEY;
7376
import static org.hamcrest.Matchers.containsString;
@@ -180,8 +183,8 @@ public void testHandleIncomingRemotePublishRequestWhenNoCurrentPublishRequest()
180183
() -> handler.handleIncomingRemotePublishRequest(remotePublishRequest)
181184
);
182185
assertThat(e.getMessage(), containsString("publication to self failed"));
183-
verify(remoteClusterStateService, times(0)).fullDownloadFailed();
184-
verify(remoteClusterStateService, times(1)).diffDownloadFailed();
186+
verify(remoteClusterStateService, times(0)).fullIncomingPublicationFailed();
187+
verify(remoteClusterStateService, times(1)).diffIncomingPublicationFailed();
185188
verifyNoMoreInteractions(remoteClusterStateService);
186189
}
187190

@@ -207,8 +210,8 @@ public void testHandleIncomingRemotePublishRequestWhenTermMismatch() {
207210
() -> handler.handleIncomingRemotePublishRequest(remotePublishRequest)
208211
);
209212
assertThat(e.getMessage(), containsString("publication to self failed"));
210-
verify(remoteClusterStateService, times(0)).fullDownloadFailed();
211-
verify(remoteClusterStateService, times(1)).diffDownloadFailed();
213+
verify(remoteClusterStateService, times(0)).fullIncomingPublicationFailed();
214+
verify(remoteClusterStateService, times(1)).diffIncomingPublicationFailed();
212215
verifyNoMoreInteractions(remoteClusterStateService);
213216
}
214217

@@ -234,8 +237,8 @@ public void testHandleIncomingRemotePublishRequestWhenVersionMismatch() {
234237
() -> handler.handleIncomingRemotePublishRequest(remotePublishRequest)
235238
);
236239
assertThat(e.getMessage(), containsString("publication to self failed"));
237-
verify(remoteClusterStateService, times(1)).diffDownloadFailed();
238-
verify(remoteClusterStateService, times(0)).fullDownloadFailed();
240+
verify(remoteClusterStateService, times(1)).diffIncomingPublicationFailed();
241+
verify(remoteClusterStateService, times(0)).fullIncomingPublicationFailed();
239242
verifyNoMoreInteractions(remoteClusterStateService);
240243
}
241244

@@ -263,20 +266,20 @@ public void testHandleIncomingRemotePublishRequestForLocalNode() throws IOExcept
263266

264267
public void testDownloadRemotePersistedFullStateFailedStats() throws IOException {
265268
RemoteClusterStateService remoteClusterStateService = mock(RemoteClusterStateService.class);
266-
PersistedStateStats remoteFullDownloadStats = new PersistedStateStats("dummy_full_stats");
267-
PersistedStateStats remoteDiffDownloadStats = new PersistedStateStats("dummy_diff_stats");
269+
PersistedStateStats remoteFullDownloadStats = new RemoteDownloadStats("dummy_full_stats");
270+
PersistedStateStats remoteDiffDownloadStats = new RemoteDownloadStats("dummy_diff_stats");
268271
when(remoteClusterStateService.getFullDownloadStats()).thenReturn(remoteFullDownloadStats);
269272
when(remoteClusterStateService.getDiffDownloadStats()).thenReturn(remoteDiffDownloadStats);
270273

271274
doAnswer((i) -> {
272-
remoteFullDownloadStats.stateFailed();
275+
remoteFullDownloadStats.getExtendedFields().put(INCOMING_PUBLICATION_FAILED_COUNT, new AtomicLong(1));
273276
return null;
274-
}).when(remoteClusterStateService).fullDownloadFailed();
277+
}).when(remoteClusterStateService).fullIncomingPublicationFailed();
275278

276279
doAnswer((i) -> {
277-
remoteDiffDownloadStats.stateFailed();
280+
remoteDiffDownloadStats.getExtendedFields().put(INCOMING_PUBLICATION_FAILED_COUNT, new AtomicLong(1));
278281
return null;
279-
}).when(remoteClusterStateService).diffDownloadFailed();
282+
}).when(remoteClusterStateService).diffIncomingPublicationFailed();
280283

281284
PublishWithJoinResponse expectedPublishResponse = new PublishWithJoinResponse(new PublishResponse(TERM, VERSION), Optional.empty());
282285
Function<PublishRequest, PublishWithJoinResponse> handlePublishRequest = p -> expectedPublishResponse;
@@ -294,8 +297,8 @@ public void testDownloadRemotePersistedFullStateFailedStats() throws IOException
294297
handler.setCurrentPublishRequestToSelf(publishRequest);
295298

296299
assertThrows(IllegalStateException.class, () -> handler.handleIncomingRemotePublishRequest(remotePublishRequest));
297-
assertEquals(1, remoteClusterStateService.getDiffDownloadStats().getFailedCount());
298-
assertEquals(0, remoteClusterStateService.getFullDownloadStats().getFailedCount());
300+
assertEquals(1, remoteClusterStateService.getDiffDownloadStats().getExtendedFields().get(INCOMING_PUBLICATION_FAILED_COUNT).get());
301+
assertEquals(0, remoteClusterStateService.getFullDownloadStats().getExtendedFields().get(INCOMING_PUBLICATION_FAILED_COUNT).get());
299302
}
300303

301304
public void testDownloadRemotePersistedDiffStateFailedStats() throws IOException {
@@ -309,9 +312,9 @@ public void testDownloadRemotePersistedDiffStateFailedStats() throws IOException
309312
when(remoteClusterStateService.getClusterMetadataManifestByFileName(any(), any())).thenReturn(metadataManifest);
310313

311314
doAnswer((i) -> {
312-
remoteDiffDownloadStats.stateFailed();
315+
remoteDiffDownloadStats.getExtendedFields().put(INCOMING_PUBLICATION_FAILED_COUNT, new AtomicLong(1));
313316
return null;
314-
}).when(remoteClusterStateService).diffDownloadFailed();
317+
}).when(remoteClusterStateService).diffIncomingPublicationFailed();
315318

316319
PublishWithJoinResponse expectedPublishResponse = new PublishWithJoinResponse(new PublishResponse(TERM, VERSION), Optional.empty());
317320
Function<PublishRequest, PublishWithJoinResponse> handlePublishRequest = p -> expectedPublishResponse;
@@ -333,7 +336,7 @@ public void testDownloadRemotePersistedDiffStateFailedStats() throws IOException
333336
handler.setCurrentPublishRequestToSelf(publishRequest);
334337

335338
assertThrows(NullPointerException.class, () -> handler.handleIncomingRemotePublishRequest(remotePublishRequest));
336-
assertEquals(1, remoteClusterStateService.getDiffDownloadStats().getFailedCount());
339+
assertEquals(1, remoteClusterStateService.getDiffDownloadStats().getExtendedFields().get(INCOMING_PUBLICATION_FAILED_COUNT).get());
337340

338341
}
339342

server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateServiceTests.java

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -962,6 +962,9 @@ public void testGetClusterStateForManifest_ExcludeEphemeral() throws IOException
962962
eq(false)
963963

964964
);
965+
assertNotNull(remoteClusterStateService.getFullDownloadStats());
966+
assertEquals(1, remoteClusterStateService.getFullDownloadStats().getSuccessCount());
967+
assertEquals(0, remoteClusterStateService.getFullDownloadStats().getFailedCount());
965968
}
966969

967970
public void testGetClusterStateFromManifest_CodecV1() throws IOException {
@@ -1296,6 +1299,9 @@ public void testGetClusterStateUsingDiff() throws IOException {
12961299
diffManifest.getClusterStateCustomDeleted().forEach(clusterStateCustomName -> {
12971300
assertFalse(updatedClusterState.customs().containsKey(clusterStateCustomName));
12981301
});
1302+
assertNotNull(remoteClusterStateService.getDiffDownloadStats());
1303+
assertEquals(1, remoteClusterStateService.getDiffDownloadStats().getSuccessCount());
1304+
assertEquals(0, remoteClusterStateService.getDiffDownloadStats().getFailedCount());
12991305
}
13001306

13011307
public void testReadClusterStateInParallel_TimedOut() throws IOException {
@@ -3421,6 +3427,9 @@ public void testGetClusterStateForManifestWithChecksumValidationEnabledWithMisma
34213427
true
34223428
);
34233429
assertEquals(1, remoteClusterStateService.getRemoteStateStats().getStateFullDownloadValidationFailed());
3430+
assertNotNull(remoteClusterStateService.getFullDownloadStats());
3431+
assertEquals(0, remoteClusterStateService.getFullDownloadStats().getSuccessCount());
3432+
assertEquals(1, remoteClusterStateService.getFullDownloadStats().getFailedCount());
34243433
}
34253434

34263435
public void testGetClusterStateForManifestWithChecksumValidationDebugWithMismatch() throws IOException {
@@ -3717,6 +3726,9 @@ public void testGetClusterStateUsingDiffWithChecksumMismatch() throws IOExceptio
37173726
eq(false)
37183727
);
37193728
assertEquals(1, remoteClusterStateService.getRemoteStateStats().getStateDiffDownloadValidationFailed());
3729+
assertNotNull(remoteClusterStateService.getDiffDownloadStats());
3730+
assertEquals(0, remoteClusterStateService.getDiffDownloadStats().getSuccessCount());
3731+
assertEquals(1, remoteClusterStateService.getDiffDownloadStats().getFailedCount());
37203732
}
37213733

37223734
private void mockObjectsForGettingPreviousClusterUUID(Map<String, String> clusterUUIDsPointers) throws IOException {

0 commit comments

Comments
 (0)