Skip to content

Commit

Permalink
SOLR-15087: Allow restoration to existing collections (apache#2380)
Browse files Browse the repository at this point in the history
The recent addition of support for a "readonly" mode for collections
opens the door to restoring to already-existing collections.

This commit adds a codepath to allow this.  Any compatible existing
collection may be used for restoration, including the collection that
was the original source of the backup.
  • Loading branch information
gerlowskija authored Feb 17, 2021
1 parent 58acfed commit 15bd858
Show file tree
Hide file tree
Showing 6 changed files with 151 additions and 51 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -563,7 +563,7 @@ void addPropertyParams(ZkNodeProps message, Map<String, Object> map) {
}


private void modifyCollection(ClusterState clusterState, ZkNodeProps message, @SuppressWarnings({"rawtypes"})NamedList results)
void modifyCollection(ClusterState clusterState, ZkNodeProps message, @SuppressWarnings({"rawtypes"})NamedList results)
throws Exception {

final String collectionName = message.getStr(ZkStateReader.COLLECTION_PROP);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,25 +17,6 @@

package org.apache.solr.cloud.api.collections;

import java.io.Closeable;
import java.io.IOException;
import java.lang.invoke.MethodHandles;
import java.net.URI;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
import java.util.Set;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;

import org.apache.solr.cloud.DistributedClusterStateUpdater;
import org.apache.solr.cloud.Overseer;
import org.apache.solr.cloud.api.collections.OverseerCollectionMessageHandler.ShardRequestTracker;
Expand All @@ -51,6 +32,7 @@
import org.apache.solr.common.cloud.ZkNodeProps;
import org.apache.solr.common.cloud.ZkStateReader;
import org.apache.solr.common.params.CollectionAdminParams;
import org.apache.solr.common.params.CollectionParams;
import org.apache.solr.common.params.CoreAdminParams;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.util.NamedList;
Expand All @@ -67,6 +49,26 @@
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.Closeable;
import java.io.IOException;
import java.lang.invoke.MethodHandles;
import java.net.URI;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
import java.util.Set;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;

import static org.apache.solr.common.cloud.ZkStateReader.*;
import static org.apache.solr.common.params.CollectionParams.CollectionAction.CREATE;
import static org.apache.solr.common.params.CollectionParams.CollectionAction.CREATESHARD;
Expand All @@ -87,8 +89,8 @@ public RestoreCmd(OverseerCollectionMessageHandler ocmh) {
public void call(ClusterState state, ZkNodeProps message, NamedList results) throws Exception {
try (RestoreContext restoreContext = new RestoreContext(message, ocmh)) {
if (state.hasCollection(restoreContext.restoreCollectionName)) {
throw new SolrException(ErrorCode.BAD_REQUEST, "Restoration collection [" + restoreContext.restoreCollectionName +
"] must be created by the backup process and cannot exist");
RestoreOnExistingCollection restoreOnExistingCollection = new RestoreOnExistingCollection(restoreContext);
restoreOnExistingCollection.process(restoreContext, results);
} else {
RestoreOnANewCollection restoreOnANewCollection = new RestoreOnANewCollection(message, restoreContext.backupCollectionState);
restoreOnANewCollection.validate(restoreContext.backupCollectionState, restoreContext.nodeList.size());
Expand Down Expand Up @@ -194,8 +196,7 @@ public void close() throws IOException {
/**
* Restoration 'strategy' that takes responsibility for creating the collection to restore to.
*
* This is currently the only supported 'strategy' for backup restoration. Though in-place restoration has been
* proposed and may be added soon (see SOLR-15087)
* @see RestoreOnExistingCollection
*/
private class RestoreOnANewCollection {
private int numNrtReplicas;
Expand Down Expand Up @@ -551,4 +552,54 @@ private void restoringAlias(BackupProperties properties) {
}
}
}

/**
* Restoration 'strategy' that ensures the collection being restored to already exists.
*
* @see RestoreOnANewCollection
*/
private class RestoreOnExistingCollection {

private RestoreOnExistingCollection(RestoreContext rc) {
int numShardsOfBackup = rc.backupCollectionState.getSlices().size();
int numShards = rc.zkStateReader.getClusterState().getCollection(rc.restoreCollectionName).getSlices().size();

if (numShardsOfBackup != numShards) {
String msg = String.format(Locale.ROOT, "Unable to restoring since number of shards in backup " +
"and specified collection does not match, numShardsOfBackup:%d numShardsOfCollection:%d", numShardsOfBackup, numShards);
throw new SolrException(ErrorCode.BAD_REQUEST, msg);
}
}

public void process(RestoreContext rc, @SuppressWarnings({"rawtypes"}) NamedList results) throws Exception {
ClusterState clusterState = rc.zkStateReader.getClusterState();
DocCollection restoreCollection = clusterState.getCollection(rc.restoreCollectionName);

enableReadOnly(clusterState, restoreCollection);
try {
requestReplicasToRestore(results, restoreCollection, clusterState, rc.backupProperties,
rc.backupPath, rc.repo, rc.shardHandler, rc.asyncId);
} finally {
disableReadOnly(clusterState, restoreCollection);
}
}

private void disableReadOnly(ClusterState clusterState, DocCollection restoreCollection) throws Exception {
ZkNodeProps params = new ZkNodeProps(
Overseer.QUEUE_OPERATION, CollectionParams.CollectionAction.MODIFYCOLLECTION.toString(),
ZkStateReader.COLLECTION_PROP, restoreCollection.getName(),
ZkStateReader.READ_ONLY, null
);
ocmh.modifyCollection(clusterState, params, new NamedList<>());
}

private void enableReadOnly(ClusterState clusterState, DocCollection restoreCollection) throws Exception {
ZkNodeProps params = new ZkNodeProps(
Overseer.QUEUE_OPERATION, CollectionParams.CollectionAction.MODIFYCOLLECTION.toString(),
ZkStateReader.COLLECTION_PROP, restoreCollection.getName(),
ZkStateReader.READ_ONLY, "true"
);
ocmh.modifyCollection(clusterState, params, new NamedList<>());
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -1084,11 +1084,6 @@ public Map<String, Object> execute(SolrQueryRequest req, SolrQueryResponse rsp,
req.getParams().required().check(NAME, COLLECTION_PROP);

final String collectionName = SolrIdentifierValidator.validateCollectionName(req.getParams().get(COLLECTION_PROP));
final ClusterState clusterState = h.coreContainer.getZkController().getClusterState();
//We always want to restore into an collection name which doesn't exist yet.
if (clusterState.hasCollection(collectionName)) {
throw new SolrException(ErrorCode.BAD_REQUEST, "Collection '" + collectionName + "' exists, no action taken.");
}
if (h.coreContainer.getZkController().getZkStateReader().getAliases().hasAlias(collectionName)) {
throw new SolrException(ErrorCode.BAD_REQUEST, "Collection '" + collectionName + "' is an existing alias, no action taken.");
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,9 +65,9 @@ public void execute(CoreAdminHandler.CallInfo it) throws Exception {
// this core must be the only replica in its shard otherwise
// we cannot guarantee consistency between replicas because when we add data (or restore index) to this replica
Slice slice = zkController.getClusterState().getCollection(cd.getCollectionName()).getSlice(cd.getShardId());
if (slice.getReplicas().size() != 1) {
if (slice.getReplicas().size() != 1 && !core.readOnly) {
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR,
"Failed to restore core=" + core.getName() + ", the core must be the only replica in its shard");
"Failed to restore core=" + core.getName() + ", the core must be the only replica in its shard or it must be read only");
}

RestoreCore restoreCore;
Expand Down
6 changes: 4 additions & 2 deletions solr/solr-ref-guide/src/collection-management.adoc
Original file line number Diff line number Diff line change
Expand Up @@ -1360,12 +1360,14 @@ POST http://localhost:8983/v2/collections/backups
[[restore]]
== RESTORE: Restore Collection

Restores Solr indexes and associated configurations.
Restores Solr indexes and associated configurations to a specified collection.

`/admin/collections?action=RESTORE&name=myBackupName&location=/path/to/my/shared/drive&collection=myRestoredCollectionName`

The RESTORE operation will create a collection with the specified name in the collection parameter. You cannot restore into the same collection the backup was taken from. Also the target collection should not be present at the time the API is called as Solr will create it for you.
The RESTORE operation will replace the content of a collection with files from the specified backup.

If the provided `collection` value matches an existing collection, Solr will use it for restoration, assuming it is compatible (same number of shards, etc.) with the stored backup files.
If the provided `collection` value doesn't exist, a new collection with that name is created in a way compatible with the stored backup files.
The collection created will be have the same number of shards and replicas as the original collection, preserving routing information, etc. Optionally, you can override some parameters documented below.

While restoring, if a configset with the same name exists in ZooKeeper then Solr will reuse that, or else it will upload the backed up configset in ZooKeeper and use that.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,12 @@
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.TestUtil;
import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.embedded.JettySolrRunner;
import org.apache.solr.client.solrj.impl.CloudSolrClient;
import org.apache.solr.client.solrj.request.CollectionAdminRequest;
import org.apache.solr.client.solrj.request.QueryRequest;
import org.apache.solr.client.solrj.request.UpdateRequest;
import org.apache.solr.client.solrj.response.CollectionAdminResponse;
import org.apache.solr.client.solrj.response.RequestStatusState;
Expand All @@ -48,6 +48,7 @@
import org.apache.solr.core.backup.ShardBackupId;
import org.apache.solr.core.backup.ShardBackupMetadata;
import org.apache.solr.core.backup.repository.BackupRepository;
import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.Test;
import org.slf4j.Logger;
Expand Down Expand Up @@ -87,20 +88,23 @@ public abstract class AbstractIncrementalBackupTest extends SolrCloudTestCase {

private static long docsSeed; // see indexDocs()
protected static final int NUM_SHARDS = 2;//granted we sometimes shard split to get more
protected static final int REPL_FACTOR = 2;
protected static final String BACKUPNAME_PREFIX = "mytestbackup";
protected static final String BACKUP_REPO_NAME = "trackingBackupRepository";

protected String testSuffix = "test1";
protected int replFactor;
protected int numTlogReplicas;
protected int numPullReplicas;

@BeforeClass
public static void createCluster() throws Exception {
docsSeed = random().nextLong();
System.setProperty("solr.directoryFactory", "solr.StandardDirectoryFactory");
}

@Before
public void setUpTrackingRepo() {
TrackingBackupRepository.clear();
}

/**
* @return The name of the collection to use.
*/
Expand All @@ -114,12 +118,6 @@ public void setTestSuffix(String testSuffix) {
this.testSuffix = testSuffix;
}

private void randomizeReplicaTypes() {
replFactor = TestUtil.nextInt(random(), 1, 2);
// numTlogReplicas = TestUtil.nextInt(random(), 0, 1);
// numPullReplicas = TestUtil.nextInt(random(), 0, 1);
}

/**
* @return The absolute path for the backup location.
* Could return null.
Expand All @@ -128,11 +126,10 @@ private void randomizeReplicaTypes() {

@Test
public void testSimple() throws Exception {
setTestSuffix("testbackupincsimple");
final String backupCollectionName = getCollectionName();
final String restoreCollectionName = backupCollectionName + "_restore";
TrackingBackupRepository.clear();

setTestSuffix("testbackupincsimple");
CloudSolrClient solrClient = cluster.getSolrClient();

CollectionAdminRequest
Expand Down Expand Up @@ -166,9 +163,12 @@ public void testSimple() throws Exception {
log.info("Created backup with {} docs, took {}ms", numFound, timeTaken);

t = System.nanoTime();
randomlyPrecreateRestoreCollection(restoreCollectionName, "conf1", NUM_SHARDS, 1);
CollectionAdminRequest.restoreCollection(restoreCollectionName, backupName)
.setBackupId(0)
.setLocation(backupLocation).setRepositoryName(BACKUP_REPO_NAME).processAndWait(solrClient, 500);
.setLocation(backupLocation)
.setRepositoryName(BACKUP_REPO_NAME)
.processAndWait(solrClient, 500);
timeTaken = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - t);
log.info("Restored from backup, took {}ms", timeTaken);
numFound = cluster.getSolrClient().query(restoreCollectionName,
Expand All @@ -177,18 +177,55 @@ public void testSimple() throws Exception {
}
}

@Test
public void testRestoreToOriginalCollection() throws Exception {
setTestSuffix("testbackuprestoretooriginal");
final String backupCollectionName = getCollectionName();
final String backupName = BACKUPNAME_PREFIX + testSuffix;

// Bootstrap the backup collection with seed docs
CollectionAdminRequest
.createCollection(backupCollectionName, "conf1", NUM_SHARDS, REPL_FACTOR)
.process(cluster.getSolrClient());
final int firstBatchNumDocs = indexDocs(backupCollectionName, true);

// Backup and immediately add more docs to the collection
try (BackupRepository repository = cluster.getJettySolrRunner(0).getCoreContainer()
.newBackupRepository(BACKUP_REPO_NAME)) {
final String backupLocation = repository.getBackupLocation(getBackupLocation());
final RequestStatusState result = CollectionAdminRequest.backupCollection(backupCollectionName, backupName)
.setLocation(backupLocation)
.setRepositoryName(BACKUP_REPO_NAME)
.processAndWait(cluster.getSolrClient(), 10 * 1000);
assertEquals(RequestStatusState.COMPLETED, result);
}
final int secondBatchNumDocs = indexDocs(backupCollectionName, true);
final int maxDocs = secondBatchNumDocs + firstBatchNumDocs;
assertEquals(maxDocs, getNumDocsInCollection(backupCollectionName));

// Restore original docs and validate that doc count is correct
try (BackupRepository repository = cluster.getJettySolrRunner(0).getCoreContainer()
.newBackupRepository(BACKUP_REPO_NAME)) {
final String backupLocation = repository.getBackupLocation(getBackupLocation());
final RequestStatusState result = CollectionAdminRequest.restoreCollection(backupCollectionName, backupName)
.setLocation(backupLocation)
.setRepositoryName(BACKUP_REPO_NAME)
.processAndWait(cluster.getSolrClient(), 20 * 1000);
assertEquals(RequestStatusState.COMPLETED, result);
}
assertEquals(firstBatchNumDocs, getNumDocsInCollection(backupCollectionName));

}

@Test
@Slow
@SuppressWarnings("rawtypes")
public void testBackupIncremental() throws Exception {
TrackingBackupRepository.clear();

setTestSuffix("testbackupinc");
randomizeReplicaTypes();
CloudSolrClient solrClient = cluster.getSolrClient();

CollectionAdminRequest
.createCollection(getCollectionName(), "conf1", NUM_SHARDS, replFactor, numTlogReplicas, numPullReplicas)
.createCollection(getCollectionName(), "conf1", NUM_SHARDS, REPL_FACTOR)
.process(solrClient);

indexDocs(getCollectionName(), false);
Expand Down Expand Up @@ -341,8 +378,11 @@ private void simpleRestoreAndCheckDocCount(CloudSolrClient solrClient, String ba

String restoreCollectionName = getCollectionName() + "_restored";

randomlyPrecreateRestoreCollection(restoreCollectionName, "conf1", NUM_SHARDS, REPL_FACTOR);
CollectionAdminRequest.restoreCollection(restoreCollectionName, backupName)
.setLocation(backupLocation).setRepositoryName(BACKUP_REPO_NAME).process(solrClient);
.setLocation(backupLocation)
.setRepositoryName(BACKUP_REPO_NAME)
.process(solrClient);

AbstractDistribZkTestBase.waitForRecoveriesToFinish(
restoreCollectionName, cluster.getSolrClient().getZkStateReader(), log.isDebugEnabled(), true, 30);
Expand Down Expand Up @@ -380,6 +420,18 @@ private int indexDocs(String collectionName, boolean useUUID) throws Exception {
return numDocs;
}

private void randomlyPrecreateRestoreCollection(String restoreCollectionName, String configName, int numShards, int numReplicas) throws Exception {
if (random().nextBoolean()) {
CollectionAdminRequest.createCollection(restoreCollectionName, configName, numShards, numReplicas)
.process(cluster.getSolrClient());
cluster.waitForActiveCollection(restoreCollectionName, numShards, numShards*numReplicas);
}
}

private long getNumDocsInCollection(String collectionName) throws Exception {
return new QueryRequest(new SolrQuery("*:*")).process(cluster.getSolrClient(), collectionName).getResults().getNumFound();
}

private class IncrementalBackupVerifier {
private BackupRepository repository;
private URI backupURI;
Expand Down

0 comments on commit 15bd858

Please sign in to comment.