-
Notifications
You must be signed in to change notification settings - Fork 2.6k
SOLR-12708: Aggregate failures from downstream async jobs; add error … #444
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -32,6 +32,9 @@ | |
import java.util.Optional; | ||
import java.util.Properties; | ||
import java.util.Set; | ||
import java.util.concurrent.CountDownLatch; | ||
import java.util.concurrent.TimeUnit; | ||
import java.util.concurrent.TimeoutException; | ||
|
||
import org.apache.solr.client.solrj.cloud.DistributedQueue; | ||
import org.apache.solr.client.solrj.cloud.autoscaling.PolicyHelper; | ||
|
@@ -51,6 +54,7 @@ | |
import org.apache.solr.common.params.CoreAdminParams; | ||
import org.apache.solr.common.params.ModifiableSolrParams; | ||
import org.apache.solr.common.util.NamedList; | ||
import org.apache.solr.common.util.SimpleOrderedMap; | ||
import org.apache.solr.common.util.StrUtils; | ||
import org.apache.solr.common.util.Utils; | ||
import org.apache.solr.core.CoreContainer; | ||
|
@@ -238,9 +242,12 @@ public void call(ClusterState state, ZkNodeProps message, NamedList results) thr | |
message, sliceNames, | ||
numNrtReplicas, numTlogReplicas, numPullReplicas); | ||
sessionWrapper = PolicyHelper.getLastSessionWrapper(true); | ||
|
||
CountDownLatch countDownLatch = new CountDownLatch(restoreCollection.getSlices().size()); | ||
|
||
//Create one replica per shard and copy backed up data to it | ||
for (Slice slice : restoreCollection.getSlices()) { | ||
log.debug("Adding replica for shard={} collection={} ", slice.getName(), restoreCollection); | ||
log.info("Adding replica for shard={} collection={} ", slice.getName(), restoreCollection); | ||
HashMap<String, Object> propMap = new HashMap<>(); | ||
propMap.put(Overseer.QUEUE_OPERATION, CREATESHARD); | ||
propMap.put(COLLECTION_PROP, restoreCollectionName); | ||
|
@@ -271,7 +278,37 @@ public void call(ClusterState state, ZkNodeProps message, NamedList results) thr | |
propMap.put(ASYNC, asyncId); | ||
} | ||
ocmh.addPropertyParams(message, propMap); | ||
ocmh.addReplica(clusterState, new ZkNodeProps(propMap), new NamedList(), null); | ||
final NamedList addResult = new NamedList(); | ||
ocmh.addReplica(clusterState, new ZkNodeProps(propMap), addResult, () -> { | ||
countDownLatch.countDown(); | ||
Object addResultFailure = addResult.get("failure"); | ||
if (addResultFailure != null) { | ||
SimpleOrderedMap failure = (SimpleOrderedMap) results.get("failure"); | ||
if (failure == null) { | ||
failure = new SimpleOrderedMap(); | ||
results.add("failure", failure); | ||
} | ||
failure.addAll((NamedList) addResultFailure); | ||
} else { | ||
SimpleOrderedMap success = (SimpleOrderedMap) results.get("success"); | ||
if (success == null) { | ||
success = new SimpleOrderedMap(); | ||
results.add("success", success); | ||
} | ||
success.addAll((NamedList) addResult.get("success")); | ||
} | ||
}); | ||
} | ||
|
||
boolean allIsDone = countDownLatch.await(1, TimeUnit.HOURS); | ||
if (!allIsDone) { | ||
throw new TimeoutException("Initial replicas were not created within 10 minutes. Timing out."); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Outdated error, this is now 1 hour |
||
} | ||
Object failures = results.get("failure"); | ||
if (failures != null && ((SimpleOrderedMap) failures).size() > 0) { | ||
log.error("Restore failed to create initial replicas."); | ||
ocmh.cleanupCollection(restoreCollectionName, new NamedList()); | ||
return; | ||
} | ||
|
||
//refresh the location copy of collection state | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
<?xml version="1.0" encoding="UTF-8" ?> | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe there is a way to not have to create this new configset? either use the Config API, or the configset API, or maybe just updating one file? or maybe set a system property that one of the existing configsets will use to something invalid? |
||
<!-- | ||
Licensed to the Apache Software Foundation (ASF) under one or more | ||
contributor license agreements. See the NOTICE file distributed with | ||
this work for additional information regarding copyright ownership. | ||
The ASF licenses this file to You under the Apache License, Version 2.0 | ||
(the "License"); you may not use this file except in compliance with | ||
the License. You may obtain a copy of the License at | ||
|
||
http://www.apache.org/licenses/LICENSE-2.0 | ||
|
||
Unless required by applicable law or agreed to in writing, software | ||
distributed under the License is distributed on an "AS IS" BASIS, | ||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
See the License for the specific language governing permissions and | ||
limitations under the License. | ||
--> | ||
<schema name="minimal" version="1.1"> | ||
<fieldType name="string" class="solr.StrField"/> | ||
<fieldType name="int" class="${solr.tests.IntegerFieldType}" docValues="${solr.tests.numeric.dv}" precisionStep="0" omitNorms="true" positionIncrementGap="0"/> | ||
<fieldType name="long" class="${solr.tests.LongFieldType}" docValues="${solr.tests.numeric.dv}" precisionStep="0" omitNorms="true" positionIncrementGap="0"/> | ||
<dynamicField name="*" type="string" indexed="true" stored="true"/> | ||
<!-- for versioning --> | ||
<field name="_version_" type="long" indexed="true" stored="true"/> | ||
<field name="_root_" type="string" indexed="true" stored="true" multiValued="false" required="false"/> | ||
<field name="id" type="string" indexed="true" stored="true"/> | ||
<dynamicField name="*_s" type="string" indexed="true" stored="true" /> | ||
<uniqueKey>id</uniqueKey> | ||
</schema> |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
<?xml version="1.0" ?> | ||
|
||
<!-- | ||
Licensed to the Apache Software Foundation (ASF) under one or more | ||
contributor license agreements. See the NOTICE file distributed with | ||
this work for additional information regarding copyright ownership. | ||
The ASF licenses this file to You under the Apache License, Version 2.0 | ||
(the "License"); you may not use this file except in compliance with | ||
the License. You may obtain a copy of the License at | ||
|
||
http://www.apache.org/licenses/LICENSE-2.0 | ||
|
||
Unless required by applicable law or agreed to in writing, software | ||
distributed under the License is distributed on an "AS IS" BASIS, | ||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
See the License for the specific language governing permissions and | ||
limitations under the License. | ||
--> | ||
|
||
<!-- Minimal solrconfig.xml with /select, /admin and /update only --> | ||
|
||
<config> | ||
|
||
<dataDir>${solr.data.dir:}</dataDir> | ||
|
||
<directoryFactory name="DirectoryFactory" | ||
class="${solr.directoryFactory:solr.NRTCachingDirectoryFactory}"/> | ||
<schemaFactory class="ClassicIndexSchemaFactory"/> | ||
|
||
<luceneMatchVersion>${tests.luceneMatchVersion:LATEST}</luceneMatchVersion> | ||
<updateHandler class="solr.DirectUpdateHandler2"> | ||
<commitWithin> | ||
<softCommit>${solr.commitwithin.softcommit:true}</softCommit> | ||
</commitWithin> | ||
<updateLog class="${solr.ulog:solr.UpdateLog}"></updateLog> | ||
</updateHandler> | ||
|
||
<requestHandler name="/select" class="solr.SearchHandler"> | ||
<lst name="defaults"> | ||
<str name="echoParams">explicit</str> | ||
<str name="indent">true</str> | ||
<str name="df">text</str> | ||
</lst> | ||
|
||
</requestHandler> | ||
|
||
<requestHandler name="/nope" class="solr.NonExistinghHandler"> | ||
</requestHandler> | ||
</config> | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think we should move this to the end of the callback, or there can be a race condition