Skip to content

Commit cdf53a5

Browse files
committed
HBASE-28850 Only return from ReplicationSink.replicationEntries while all background tasks are finished (#6263)
Signed-off-by: Andrew Purtell <apurtell@apache.org> (cherry picked from commit 52082bc)
1 parent 192f640 commit cdf53a5

File tree

1 file changed

+19
-3
lines changed

1 file changed

+19
-3
lines changed

hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSink.java

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -509,17 +509,33 @@ private void batch(TableName tableName, Collection<List<Row>> allRows, int batch
509509
}
510510
futures.addAll(batchRows.stream().map(table::batchAll).collect(Collectors.toList()));
511511
}
512-
512+
// Here we will always wait until all futures are finished, even if there are failures when
513+
// getting from a future in the middle. This is because this method may be called in a rpc call,
514+
// so the batch operations may reference some off heap cells(through CellScanner). If we return
515+
// earlier here, the rpc call may be finished and they will release the off heap cells before
516+
// some of the batch operations finish, and then cause corrupt data or even crash the region
517+
// server. See HBASE-28584 and HBASE-28850 for more details.
518+
IOException error = null;
513519
for (Future<?> future : futures) {
514520
try {
515521
FutureUtils.get(future);
516522
} catch (RetriesExhaustedException e) {
523+
IOException ioe;
517524
if (e.getCause() instanceof TableNotFoundException) {
518-
throw new TableNotFoundException("'" + tableName + "'");
525+
ioe = new TableNotFoundException("'" + tableName + "'");
526+
} else {
527+
ioe = e;
528+
}
529+
if (error == null) {
530+
error = ioe;
531+
} else {
532+
error.addSuppressed(ioe);
519533
}
520-
throw e;
521534
}
522535
}
536+
if (error != null) {
537+
throw error;
538+
}
523539
}
524540

525541
/**

0 commit comments

Comments
 (0)