Skip to content

Commit b608121

Browse files
committed
HBASE-23275: Track active master's address in ActiveMasterManager (apache#812)
Currently we just track whether an active master exists. It helps to also track the address of the active master in all the masters to help serve the client RPC requests to know which master is active. Signed-off-by: Nick Dimiduk <ndimiduk@apache.org> Signed-off-by: Andrew Purtell <apurtell@apache.org> (cherry picked from commit efebb84) (cherry picked from commit 7429491)
1 parent 4ad0e78 commit b608121

File tree

3 files changed

+61
-6
lines changed

3 files changed

+61
-6
lines changed

hbase-server/src/main/java/org/apache/hadoop/hbase/master/ActiveMasterManager.java

Lines changed: 46 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
/**
1+
/*
22
*
33
* Licensed to the Apache Software Foundation (ASF) under one
44
* or more contributor license agreements. See the NOTICE file
@@ -17,7 +17,6 @@
1717
* limitations under the License.
1818
*/
1919
package org.apache.hadoop.hbase.master;
20-
2120
import java.io.IOException;
2221
import java.util.concurrent.atomic.AtomicBoolean;
2322

@@ -57,12 +56,18 @@ public class ActiveMasterManager extends ZooKeeperListener {
5756
final AtomicBoolean clusterHasActiveMaster = new AtomicBoolean(false);
5857
final AtomicBoolean clusterShutDown = new AtomicBoolean(false);
5958

59+
// This server's information.
6060
private final ServerName sn;
6161
private int infoPort;
6262
private final Server master;
6363

64+
// Active master's server name. Invalidated anytime active master changes (based on ZK
65+
// notifications) and lazily fetched on-demand.
66+
// ServerName is immutable, so we don't need heavy synchronization around it.
67+
private volatile ServerName activeMasterServerName;
68+
6469
/**
65-
* @param watcher
70+
* @param watcher ZK watcher
6671
* @param sn ServerName
6772
* @param master In an instance of a Master.
6873
*/
@@ -106,6 +111,33 @@ void handle(final String path) {
106111
}
107112
}
108113

114+
/**
115+
* Fetches the active master's ServerName from zookeeper.
116+
*/
117+
private void fetchAndSetActiveMasterServerName() {
118+
LOG.debug("Attempting to fetch active master sn from zk");
119+
try {
120+
activeMasterServerName = MasterAddressTracker.getMasterAddress(watcher);
121+
} catch (IOException | KeeperException e) {
122+
// Log and ignore for now and re-fetch later if needed.
123+
LOG.error("Error fetching active master information", e);
124+
}
125+
}
126+
127+
/**
128+
* @return the currently active master as seen by us or null if one does not exist.
129+
*/
130+
public ServerName getActiveMasterServerName() {
131+
if (!clusterHasActiveMaster.get()) {
132+
return null;
133+
}
134+
if (activeMasterServerName == null) {
135+
fetchAndSetActiveMasterServerName();
136+
}
137+
// It could still be null, but return whatever we have.
138+
return activeMasterServerName;
139+
}
140+
109141
/**
110142
* Handle a change in the master node. Doesn't matter whether this was called
111143
* from a nodeCreated or nodeDeleted event because there are no guarantees
@@ -134,6 +166,9 @@ private void handleMasterNodeChange() {
134166
// Notify any thread waiting to become the active master
135167
clusterHasActiveMaster.notifyAll();
136168
}
169+
// Reset the active master sn. Will be re-fetched later if needed.
170+
// We don't want to make a synchronous RPC under a monitor.
171+
activeMasterServerName = null;
137172
}
138173
} catch (KeeperException ke) {
139174
master.abort("Received an unexpected KeeperException, aborting", ke);
@@ -151,8 +186,8 @@ private void handleMasterNodeChange() {
151186
* @param checkInterval the interval to check if the master is stopped
152187
* @param startupStatus the monitor status to track the progress
153188
* @return True if no issue becoming active master else false if another
154-
* master was running or if some other problem (zookeeper, stop flag has been
155-
* set on this Master)
189+
* master was running or if some other problem (zookeeper, stop flag has been
190+
* set on this Master)
156191
*/
157192
boolean blockUntilBecomingActiveMaster(
158193
int checkInterval, MonitoredTask startupStatus) {
@@ -179,9 +214,13 @@ boolean blockUntilBecomingActiveMaster(
179214
startupStatus.setStatus("Successfully registered as active master.");
180215
this.clusterHasActiveMaster.set(true);
181216
LOG.info("Registered Active Master=" + this.sn);
217+
activeMasterServerName = sn;
182218
return true;
183219
}
184220

221+
// Invalidate the active master name so that subsequent requests do not get any stale
222+
// master information. Will be re-fetched if needed.
223+
activeMasterServerName = null;
185224
// There is another active master running elsewhere or this is a restart
186225
// and the master ephemeral node has not expired yet.
187226
this.clusterHasActiveMaster.set(true);
@@ -208,7 +247,8 @@ boolean blockUntilBecomingActiveMaster(
208247
ZKUtil.deleteNode(this.watcher, this.watcher.getMasterAddressZNode());
209248

210249
// We may have failed to delete the znode at the previous step, but
211-
// we delete the file anyway: a second attempt to delete the znode is likely to fail again.
250+
// we delete the file anyway: a second attempt to delete the znode is likely to fail
251+
// again.
212252
ZNodeClearer.deleteMyEphemeralNodeOnDisk();
213253
} else {
214254
msg = "Another master is the active master, " + currentMaster +

hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3434,6 +3434,10 @@ public LoadBalancer getLoadBalancer() {
34343434
return replicationLoadSourceMap;
34353435
}
34363436

3437+
public ServerName getActiveMaster() {
3438+
return activeMasterManager.getActiveMasterServerName();
3439+
}
3440+
34373441
public String getClusterId() {
34383442
if (activeMaster) {
34393443
return super.getClusterId();

hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestActiveMasterManager.java

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,10 @@
1818
*/
1919
package org.apache.hadoop.hbase.master;
2020

21+
import static org.junit.Assert.assertEquals;
2122
import static org.junit.Assert.assertFalse;
2223
import static org.junit.Assert.assertNotNull;
24+
import static org.junit.Assert.assertNull;
2325
import static org.junit.Assert.assertTrue;
2426

2527
import java.io.IOException;
@@ -84,6 +86,7 @@ public static void tearDownAfterClass() throws Exception {
8486
ActiveMasterManager activeMasterManager =
8587
dummyMaster.getActiveMasterManager();
8688
assertFalse(activeMasterManager.clusterHasActiveMaster.get());
89+
assertNull(activeMasterManager.getActiveMasterServerName());
8790

8891
// First test becoming the active master uninterrupted
8992
MonitoredTask status = Mockito.mock(MonitoredTask.class);
@@ -92,6 +95,7 @@ public static void tearDownAfterClass() throws Exception {
9295
activeMasterManager.blockUntilBecomingActiveMaster(100, status);
9396
assertTrue(activeMasterManager.clusterHasActiveMaster.get());
9497
assertMaster(zk, master);
98+
assertMaster(zk, activeMasterManager.getActiveMasterServerName());
9599

96100
// Now pretend master restart
97101
DummyMaster secondDummyMaster = new DummyMaster(zk,master);
@@ -101,6 +105,8 @@ public static void tearDownAfterClass() throws Exception {
101105
activeMasterManager.blockUntilBecomingActiveMaster(100, status);
102106
assertTrue(activeMasterManager.clusterHasActiveMaster.get());
103107
assertMaster(zk, master);
108+
assertMaster(zk, activeMasterManager.getActiveMasterServerName());
109+
assertMaster(zk, secondActiveMasterManager.getActiveMasterServerName());
104110
}
105111

106112
/**
@@ -128,6 +134,7 @@ public void testActiveMasterManagerFromZK() throws Exception {
128134
ActiveMasterManager activeMasterManager =
129135
ms1.getActiveMasterManager();
130136
assertFalse(activeMasterManager.clusterHasActiveMaster.get());
137+
assertNull(activeMasterManager.getActiveMasterServerName());
131138

132139
// First test becoming the active master uninterrupted
133140
ClusterStatusTracker clusterStatusTracker =
@@ -137,6 +144,7 @@ public void testActiveMasterManagerFromZK() throws Exception {
137144
Mockito.mock(MonitoredTask.class));
138145
assertTrue(activeMasterManager.clusterHasActiveMaster.get());
139146
assertMaster(zk, firstMasterAddress);
147+
assertMaster(zk, activeMasterManager.getActiveMasterServerName());
140148

141149
// New manager will now try to become the active master in another thread
142150
WaitToBeMasterThread t = new WaitToBeMasterThread(zk, secondMasterAddress);
@@ -154,6 +162,8 @@ public void testActiveMasterManagerFromZK() throws Exception {
154162
assertTrue(t.manager.clusterHasActiveMaster.get());
155163
// But secondary one should not be the active master
156164
assertFalse(t.isActiveMaster);
165+
// Verify the active master ServerName is populated in standby master.
166+
assertEquals(firstMasterAddress, t.manager.getActiveMasterServerName());
157167

158168
// Close the first server and delete it's master node
159169
ms1.stop("stopping first server");
@@ -181,6 +191,7 @@ public void testActiveMasterManagerFromZK() throws Exception {
181191

182192
assertTrue(t.manager.clusterHasActiveMaster.get());
183193
assertTrue(t.isActiveMaster);
194+
assertEquals(secondMasterAddress, t.manager.getActiveMasterServerName());
184195

185196
LOG.info("Deleting master node");
186197

0 commit comments

Comments
 (0)