Skip to content

Commit 3060fb9

Browse files
committed
HBASE-27215 Add support for sync replication (#4762)
Signed-off-by: Xiaolin Ha <haxiaolin@apache.org>
1 parent 081b865 commit 3060fb9

File tree

3 files changed

+28
-30
lines changed

3 files changed

+28
-30
lines changed

hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSource.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -462,7 +462,7 @@ private void uncaughtException(Thread t, Throwable e, ReplicationSourceManager m
462462
t.getName());
463463
manager.refreshSources(peerId);
464464
break;
465-
} catch (IOException e1) {
465+
} catch (IOException | ReplicationException e1) {
466466
LOG.error("Replication sources refresh failed.", e1);
467467
sleepForRetries("Sleeping before try refreshing sources again", maxRetriesMultiplier);
468468
}

hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSourceManager.java

Lines changed: 27 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -404,38 +404,44 @@ public void drainSources(String peerId) throws IOException, ReplicationException
404404
// TODO: use empty initial offsets for now, revisit when adding support for sync replication
405405
ReplicationSourceInterface src =
406406
createSource(new ReplicationQueueData(queueId, ImmutableMap.of()), peer);
407-
// synchronized here to avoid race with preLogRoll where we add new log to source and also
407+
// synchronized here to avoid race with postLogRoll where we add new log to source and also
408408
// walsById.
409409
ReplicationSourceInterface toRemove;
410-
Map<String, NavigableSet<String>> wals = new HashMap<>();
410+
ReplicationQueueData queueData;
411411
synchronized (latestPaths) {
412+
// Here we make a copy of all the remaining wal files and then delete them from the
413+
// replication queue storage after releasing the lock. It is not safe to just remove the old
414+
// map from walsById since later we may fail to update the replication queue storage, and when
415+
// we retry next time, we can not know the wal files that needs to be set to the replication
416+
// queue storage
417+
ImmutableMap.Builder<String, ReplicationGroupOffset> builder = ImmutableMap.builder();
418+
synchronized (walsById) {
419+
walsById.get(queueId).forEach((group, wals) -> {
420+
if (!wals.isEmpty()) {
421+
builder.put(group, new ReplicationGroupOffset(wals.last(), -1));
422+
}
423+
});
424+
}
425+
queueData = new ReplicationQueueData(queueId, builder.build());
426+
src = createSource(queueData, peer);
412427
toRemove = sources.put(peerId, src);
413428
if (toRemove != null) {
414429
LOG.info("Terminate replication source for " + toRemove.getPeerId());
415430
toRemove.terminate(terminateMessage);
416431
toRemove.getSourceMetrics().clear();
417432
}
418-
// Here we make a copy of all the remaining wal files and then delete them from the
419-
// replication queue storage after releasing the lock. It is not safe to just remove the old
420-
// map from walsById since later we may fail to delete them from the replication queue
421-
// storage, and when we retry next time, we can not know the wal files that need to be deleted
422-
// from the replication queue storage.
423-
walsById.get(queueId).forEach((k, v) -> wals.put(k, new TreeSet<>(v)));
433+
}
434+
for (Map.Entry<String, ReplicationGroupOffset> entry : queueData.getOffsets().entrySet()) {
435+
queueStorage.setOffset(queueId, entry.getKey(), entry.getValue(), Collections.emptyMap());
424436
}
425437
LOG.info("Startup replication source for " + src.getPeerId());
426438
src.startup();
427-
for (NavigableSet<String> walsByGroup : wals.values()) {
428-
// TODO: just need to reset the replication offset
429-
// for (String wal : walsByGroup) {
430-
// queueStorage.removeWAL(server.getServerName(), peerId, wal);
431-
// }
432-
}
433439
synchronized (walsById) {
434-
Map<String, NavigableSet<String>> oldWals = walsById.get(queueId);
435-
wals.forEach((k, v) -> {
436-
NavigableSet<String> walsByGroup = oldWals.get(k);
440+
Map<String, NavigableSet<String>> wals = walsById.get(queueId);
441+
queueData.getOffsets().forEach((group, offset) -> {
442+
NavigableSet<String> walsByGroup = wals.get(group);
437443
if (walsByGroup != null) {
438-
walsByGroup.removeAll(v);
444+
walsByGroup.headSet(offset.getWal(), true).clear();
439445
}
440446
});
441447
}
@@ -458,13 +464,8 @@ public void drainSources(String peerId) throws IOException, ReplicationException
458464
}
459465

460466
private ReplicationSourceInterface createRefreshedSource(ReplicationQueueId queueId,
461-
ReplicationPeer peer) throws IOException {
462-
Map<String, ReplicationGroupOffset> offsets;
463-
try {
464-
offsets = queueStorage.getOffsets(queueId);
465-
} catch (ReplicationException e) {
466-
throw new IOException(e);
467-
}
467+
ReplicationPeer peer) throws IOException, ReplicationException {
468+
Map<String, ReplicationGroupOffset> offsets = queueStorage.getOffsets(queueId);
468469
return createSource(new ReplicationQueueData(queueId, ImmutableMap.copyOf(offsets)), peer);
469470
}
470471

@@ -474,7 +475,7 @@ private ReplicationSourceInterface createRefreshedSource(ReplicationQueueId queu
474475
* replication queue storage and only to enqueue all logs to the new replication source
475476
* @param peerId the id of the replication peer
476477
*/
477-
public void refreshSources(String peerId) throws IOException {
478+
public void refreshSources(String peerId) throws ReplicationException, IOException {
478479
String terminateMessage = "Peer " + peerId
479480
+ " state or config changed. Will close the previous replication source and open a new one";
480481
ReplicationPeer peer = replicationPeers.getPeer(peerId);

hbase-server/src/test/java/org/apache/hadoop/hbase/replication/regionserver/TestDrainReplicationQueuesForStandBy.java

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -35,12 +35,9 @@
3535
import org.apache.hadoop.hbase.util.Bytes;
3636
import org.apache.hadoop.hbase.wal.AbstractFSWALProvider;
3737
import org.junit.ClassRule;
38-
import org.junit.Ignore;
3938
import org.junit.Test;
4039
import org.junit.experimental.categories.Category;
4140

42-
// TODO: revisit later
43-
@Ignore
4441
@Category({ ReplicationTests.class, MediumTests.class })
4542
public class TestDrainReplicationQueuesForStandBy extends SyncReplicationTestBase {
4643

0 commit comments

Comments
 (0)