Skip to content

Commit e7688fa

Browse files
authored
wait a BackOff Time when rereplicate failed (apache#2967)
* reduce CPU Resource: when the other bookie restart,rereplicate will run failed for BookieIdNotResolvedException, so wait a BackOff Time when rereplicate run failed * clean error code Co-authored-by: lushiji <lushiji@didiglobal.com>
1 parent 3f30d22 commit e7688fa

File tree

1 file changed

+7
-3
lines changed

1 file changed

+7
-3
lines changed

bookkeeper-server/src/main/java/org/apache/bookkeeper/replication/ReplicationWorker.java

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -227,9 +227,12 @@ public void run() {
227227
workerRunning = true;
228228
while (workerRunning) {
229229
try {
230-
rereplicate();
230+
if (!rereplicate()) {
231+
LOG.warn("failed while replicating fragments");
232+
waitBackOffTime(rwRereplicateBackoffMs);
233+
}
231234
} catch (InterruptedException e) {
232-
LOG.info("InterruptedException "
235+
LOG.error("InterruptedException "
233236
+ "while replicating fragments", e);
234237
shutdown();
235238
Thread.currentThread().interrupt();
@@ -258,7 +261,7 @@ private static void waitBackOffTime(long backoffMs) {
258261
* Replicates the under replicated fragments from failed bookie ledger to
259262
* targetBookie.
260263
*/
261-
private void rereplicate() throws InterruptedException, BKException,
264+
private boolean rereplicate() throws InterruptedException, BKException,
262265
UnavailableException {
263266
long ledgerIdToReplicate = underreplicationManager
264267
.getLedgerToRereplicate();
@@ -275,6 +278,7 @@ private void rereplicate() throws InterruptedException, BKException,
275278
rereplicateOpStats.registerFailedEvent(latencyMillis, TimeUnit.MILLISECONDS);
276279
}
277280
}
281+
return success;
278282
}
279283

280284
private void logBKExceptionAndReleaseLedger(BKException e, long ledgerIdToReplicate)

0 commit comments

Comments
 (0)