Skip to content

Commit 1057da8

Browse files
authored
HBASE-26768 Avoid unnecessary replication suspending in RegionReplica… (#4127)
1 parent 1dd29db commit 1057da8

File tree

3 files changed

+359
-28
lines changed

3 files changed

+359
-28
lines changed

hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -587,7 +587,7 @@ public Result getResult() {
587587
}
588588

589589
/** A result object from prepare flush cache stage */
590-
static class PrepareFlushResult {
590+
protected static class PrepareFlushResult {
591591
final FlushResultImpl result; // indicating a failure result from prepare
592592
final TreeMap<byte[], StoreFlushContext> storeFlushCtxs;
593593
final TreeMap<byte[], List<Path>> committedFiles;
@@ -729,7 +729,7 @@ void sawNoSuchFamily() {
729729

730730
private final StoreHotnessProtector storeHotnessProtector;
731731

732-
private Optional<RegionReplicationSink> regionReplicationSink = Optional.empty();
732+
protected Optional<RegionReplicationSink> regionReplicationSink = Optional.empty();
733733

734734
/**
735735
* HRegion constructor. This constructor should only be used for testing and

hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/regionreplication/RegionReplicationSink.java

Lines changed: 35 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -17,11 +17,11 @@
1717
*/
1818
package org.apache.hadoop.hbase.regionserver.regionreplication;
1919

20+
import com.google.errorprone.annotations.RestrictedApi;
2021
import java.io.IOException;
2122
import java.util.ArrayDeque;
2223
import java.util.ArrayList;
2324
import java.util.HashMap;
24-
import java.util.HashSet;
2525
import java.util.List;
2626
import java.util.Map;
2727
import java.util.Optional;
@@ -204,7 +204,7 @@ public RegionReplicationSink(Configuration conf, RegionInfo primary, TableDescri
204204
this.failedReplicas = new IntHashSet(regionReplication - 1);
205205
}
206206

207-
private void onComplete(List<SinkEntry> sent,
207+
void onComplete(List<SinkEntry> sent,
208208
Map<Integer, MutableObject<Throwable>> replica2Error) {
209209
long maxSequenceId = Long.MIN_VALUE;
210210
long toReleaseSize = 0;
@@ -214,31 +214,32 @@ private void onComplete(List<SinkEntry> sent,
214214
toReleaseSize += entry.size;
215215
}
216216
manager.decrease(toReleaseSize);
217-
Set<Integer> failed = new HashSet<>();
218-
for (Map.Entry<Integer, MutableObject<Throwable>> entry : replica2Error.entrySet()) {
219-
Integer replicaId = entry.getKey();
220-
Throwable error = entry.getValue().getValue();
221-
if (error != null) {
222-
if (maxSequenceId > lastFlushedSequenceId) {
223-
LOG.warn(
224-
"Failed to replicate to secondary replica {} for {}, since the max sequence" +
225-
" id of sunk entris is {}, which is greater than the last flush SN {}," +
226-
" we will stop replicating for a while and trigger a flush",
227-
replicaId, primary, maxSequenceId, lastFlushedSequenceId, error);
228-
failed.add(replicaId);
229-
} else {
230-
LOG.warn(
231-
"Failed to replicate to secondary replica {} for {}, since the max sequence" +
232-
" id of sunk entris is {}, which is less than or equal to the last flush SN {}," +
233-
" we will not stop replicating",
234-
replicaId, primary, maxSequenceId, lastFlushedSequenceId, error);
235-
}
236-
}
237-
}
238217
synchronized (entries) {
239218
pendingSize -= toReleaseSize;
240-
if (!failed.isEmpty()) {
241-
failedReplicas.addAll(failed);
219+
boolean addFailedReplicas = false;
220+
for (Map.Entry<Integer, MutableObject<Throwable>> entry : replica2Error.entrySet()) {
221+
Integer replicaId = entry.getKey();
222+
Throwable error = entry.getValue().getValue();
223+
if (error != null) {
224+
if (maxSequenceId > lastFlushedSequenceId) {
225+
LOG.warn(
226+
"Failed to replicate to secondary replica {} for {}, since the max sequence"
227+
+ " id of sunk entris is {}, which is greater than the last flush SN {},"
228+
+ " we will stop replicating for a while and trigger a flush",
229+
replicaId, primary, maxSequenceId, lastFlushedSequenceId, error);
230+
failedReplicas.add(replicaId);
231+
addFailedReplicas = true;
232+
} else {
233+
LOG.warn(
234+
"Failed to replicate to secondary replica {} for {}, since the max sequence"
235+
+ " id of sunk entris is {}, which is less than or equal to the last flush SN {},"
236+
+ " we will not stop replicating",
237+
replicaId, primary, maxSequenceId, lastFlushedSequenceId, error);
238+
}
239+
}
240+
}
241+
242+
if (addFailedReplicas) {
242243
flushRequester.requestFlush(maxSequenceId);
243244
}
244245
sending = false;
@@ -323,7 +324,7 @@ private boolean isStartFlushAllStores(FlushDescriptor flushDesc) {
323324
return storesFlushed.containsAll(tableDesc.getColumnFamilyNames());
324325
}
325326

326-
private Optional<FlushDescriptor> getStartFlushAllDescriptor(Cell metaCell) {
327+
Optional<FlushDescriptor> getStartFlushAllDescriptor(Cell metaCell) {
327328
if (!CellUtil.matchingFamily(metaCell, WALEdit.METAFAMILY)) {
328329
return Optional.empty();
329330
}
@@ -448,4 +449,12 @@ public void waitUntilStopped() throws InterruptedException {
448449
}
449450
}
450451
}
452+
453+
@RestrictedApi(explanation = "Should only be called in tests", link = "",
454+
allowedOnPath = ".*/src/test/.*")
455+
IntHashSet getFailedReplicas() {
456+
synchronized (entries) {
457+
return this.failedReplicas;
458+
}
459+
}
451460
}

0 commit comments

Comments
 (0)