Skip to content

Commit 60e263e

Browse files
zhengzhuobinzzbApache9
authored andcommitted
HBASE-26482 HMaster may clean wals that is replicating in rare cases (#3876)
Signed-off-by: Duo Zhang <zhangduo@apache.org>
1 parent e4ad6e9 commit 60e263e

File tree

2 files changed

+33
-4
lines changed

2 files changed

+33
-4
lines changed

hbase-replication/src/main/java/org/apache/hadoop/hbase/replication/ZKReplicationQueueStorage.java

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -379,6 +379,11 @@ public long getWALPosition(ServerName serverName, String queueId, String fileNam
379379
return 0;
380380
}
381381

382+
/**
383+
* This implement must update the cversion of root {@link #queuesZNode}. The optimistic lock of
384+
* the {@link #getAllWALs()} method is based on the cversion of root {@link #queuesZNode}.
385+
* @see #getAllWALs() to show the usage of the cversion of root {@link #queuesZNode} .
386+
*/
382387
@Override
383388
public Pair<String, SortedSet<String>> claimQueue(ServerName sourceServerName, String queueId,
384389
ServerName destServerName) throws ReplicationException {
@@ -417,6 +422,12 @@ public Pair<String, SortedSet<String>> claimQueue(ServerName sourceServerName, S
417422
}
418423
// add delete op for peer
419424
listOfOps.add(ZKUtilOp.deleteNodeFailSilent(oldQueueNode));
425+
// Append new queue id for prevent lock competition in zookeeper server.
426+
String claimLockZNode = ZNodePaths.joinZNode(queuesZNode, "cversion_" + newQueueId);
427+
// A trick for update the cversion of root queuesZNode .
428+
// The optimistic lock of the getAllWALs() method is based on the cversion of root queuesZNode
429+
listOfOps.add(ZKUtilOp.createAndFailSilent(claimLockZNode, HConstants.EMPTY_BYTE_ARRAY));
430+
listOfOps.add(ZKUtilOp.deleteNodeFailSilent(claimLockZNode));
420431

421432
LOG.trace("The multi list size is {}", listOfOps.size());
422433
ZKUtil.multiOrSequential(zookeeper, listOfOps, false);
@@ -505,6 +516,13 @@ protected int getQueuesZNodeCversion() throws KeeperException {
505516
return stat.getCversion();
506517
}
507518

519+
/**
520+
* The optimistic lock of this implement is based on the cversion of root {@link #queuesZNode}.
521+
* Therefore, we must update the cversion of root {@link #queuesZNode} when migrate wal nodes to
522+
* other queues.
523+
* @see #claimQueue(ServerName, String, ServerName) as an example of updating root
524+
* {@link #queuesZNode} cversion.
525+
*/
508526
@Override
509527
public Set<String> getAllWALs() throws ReplicationException {
510528
try {

hbase-replication/src/test/java/org/apache/hadoop/hbase/replication/TestZKReplicationQueueStorage.java

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -206,18 +206,29 @@ public void testAddRemoveLog() throws ReplicationException {
206206
}
207207
}
208208

209-
// For HBASE-12865
209+
// For HBASE-12865, HBASE-26482
210210
@Test
211211
public void testClaimQueueChangeCversion() throws ReplicationException, KeeperException {
212212
ServerName serverName1 = ServerName.valueOf("127.0.0.1", 8000, 10000);
213213
STORAGE.addWAL(serverName1, "1", "file");
214+
STORAGE.addWAL(serverName1, "2", "file");
214215

215-
int v0 = STORAGE.getQueuesZNodeCversion();
216216
ServerName serverName2 = ServerName.valueOf("127.0.0.1", 8001, 10001);
217+
// Avoid claimQueue update cversion for prepare server2 rsNode.
218+
STORAGE.addWAL(serverName2, "1", "file");
219+
STORAGE.addWAL(serverName2, "2", "file");
220+
221+
int v0 = STORAGE.getQueuesZNodeCversion();
222+
217223
STORAGE.claimQueue(serverName1, "1", serverName2);
218224
int v1 = STORAGE.getQueuesZNodeCversion();
219-
// cversion should increase by 1 since a child node is deleted
220-
assertEquals(1, v1 - v0);
225+
// cversion should be increased by claimQueue method.
226+
assertTrue(v1 > v0);
227+
228+
STORAGE.claimQueue(serverName1, "2", serverName2);
229+
int v2 = STORAGE.getQueuesZNodeCversion();
230+
// cversion should be increased by claimQueue method.
231+
assertTrue(v2 > v1);
221232
}
222233

223234
private ZKReplicationQueueStorage createWithUnstableVersion() throws IOException {

0 commit comments

Comments
 (0)