From 1abe6b1e1bbab5b8f27ce90b4c38da16d2ce7f81 Mon Sep 17 00:00:00 2001 From: Sachin Kale Date: Fri, 5 Jul 2024 09:16:37 +0530 Subject: [PATCH] Fix assertion failure while deleting remote backed index (#14601) Signed-off-by: Sachin Kale Signed-off-by: Kaushal Kumar --- .../RemoteMigrationIndexMetadataUpdateIT.java | 2 -- .../opensearch/remotestore/RemoteStoreIT.java | 16 +++++++++++++++- .../java/org/opensearch/index/IndexService.java | 17 +++++++++++++++-- 3 files changed, 30 insertions(+), 5 deletions(-) diff --git a/server/src/internalClusterTest/java/org/opensearch/remotemigration/RemoteMigrationIndexMetadataUpdateIT.java b/server/src/internalClusterTest/java/org/opensearch/remotemigration/RemoteMigrationIndexMetadataUpdateIT.java index 6885d37c4aab0..216c104dfecc1 100644 --- a/server/src/internalClusterTest/java/org/opensearch/remotemigration/RemoteMigrationIndexMetadataUpdateIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/remotemigration/RemoteMigrationIndexMetadataUpdateIT.java @@ -275,7 +275,6 @@ initalMetadataVersion < internalCluster().client() * After shard relocation completes, shuts down the docrep nodes and asserts remote * index settings are applied even when the index is in YELLOW state */ - @AwaitsFix(bugUrl = "https://github.com/opensearch-project/OpenSearch/issues/13737") public void testIndexSettingsUpdatedEvenForMisconfiguredReplicas() throws Exception { internalCluster().startClusterManagerOnlyNode(); @@ -332,7 +331,6 @@ public void testIndexSettingsUpdatedEvenForMisconfiguredReplicas() throws Except * After shard relocation completes, restarts the docrep node holding extra replica shard copy * and asserts remote index settings are applied as soon as the docrep replica copy is unassigned */ - @AwaitsFix(bugUrl = "https://github.com/opensearch-project/OpenSearch/issues/13871") public void testIndexSettingsUpdatedWhenDocrepNodeIsRestarted() throws Exception { internalCluster().startClusterManagerOnlyNode(); diff --git a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreIT.java b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreIT.java index 96d6338e5913b..194dce5f4a57a 100644 --- a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreIT.java @@ -65,7 +65,6 @@ import static org.opensearch.index.remote.RemoteStoreEnums.DataType.METADATA; import static org.opensearch.index.shard.IndexShardTestCase.getTranslog; import static org.opensearch.indices.RemoteStoreSettings.CLUSTER_REMOTE_TRANSLOG_BUFFER_INTERVAL_SETTING; -import static org.opensearch.test.OpenSearchTestCase.getShardLevelBlobPath; import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertAcked; import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertHitCount; import static org.hamcrest.Matchers.comparesEqualTo; @@ -133,6 +132,21 @@ private void testPeerRecovery(int numberOfIterations, boolean invokeFlush) throw ); } + public void testRemoteStoreIndexCreationAndDeletionWithReferencedStore() throws InterruptedException, ExecutionException { + String dataNode = internalCluster().startNodes(1).get(0); + createIndex(INDEX_NAME, remoteStoreIndexSettings(0)); + ensureYellowAndNoInitializingShards(INDEX_NAME); + ensureGreen(INDEX_NAME); + + IndexShard indexShard = getIndexShard(dataNode, INDEX_NAME); + + // Simulating a condition where store is already in use by increasing ref count, this helps in testing index + // deletion when refresh is in-progress. + indexShard.store().incRef(); + assertAcked(client().admin().indices().prepareDelete(INDEX_NAME)); + indexShard.store().decRef(); + } + public void testPeerRecoveryWithRemoteStoreAndRemoteTranslogNoDataFlush() throws Exception { testPeerRecovery(1, true); } diff --git a/server/src/main/java/org/opensearch/index/IndexService.java b/server/src/main/java/org/opensearch/index/IndexService.java index 1c0db0095bb98..12b02d3dbd6fa 100644 --- a/server/src/main/java/org/opensearch/index/IndexService.java +++ b/server/src/main/java/org/opensearch/index/IndexService.java @@ -602,7 +602,21 @@ public synchronized IndexShard createShard( this.indexSettings.getRemoteStorePathStrategy() ); } - remoteStore = new Store(shardId, this.indexSettings, remoteDirectory, lock, Store.OnClose.EMPTY, path); + // When an instance of Store is created, a shardlock is created which is released on closing the instance of store. + // Currently, we create 2 instances of store for remote store backed indices: store and remoteStore. + // As there can be only one shardlock acquired for a given shard, the lock is shared between store and remoteStore. + // This creates an issue when we are deleting the index as it results in closing both store and remoteStore. + // Sample test failure: https://github.com/opensearch-project/OpenSearch/issues/13871 + // The following method provides ShardLock that is not maintained by NodeEnvironment. + // As part of https://github.com/opensearch-project/OpenSearch/issues/13075, we want to move away from keeping 2 + // store instances. + ShardLock remoteStoreLock = new ShardLock(shardId) { + @Override + protected void closeInternal() { + // Do nothing for shard lock on remote store + } + }; + remoteStore = new Store(shardId, this.indexSettings, remoteDirectory, remoteStoreLock, Store.OnClose.EMPTY, path); } else { // Disallow shards with remote store based settings to be created on non-remote store enabled nodes // Even though we have `RemoteStoreMigrationAllocationDecider` in place to prevent something like this from happening at the @@ -625,7 +639,6 @@ public synchronized IndexShard createShard( } else { directory = directoryFactory.newDirectory(this.indexSettings, path); } - store = new Store( shardId, this.indexSettings,