Skip to content

Commit f61b1e3

Browse files
committed
HBASE-23275: Track active master's address in ActiveMasterManager
Currently we just track whether an active master exists. It helps to also track the address of the active master in all the masters to help serve the client RPC requests to know which master is active.
1 parent 08aae42 commit f61b1e3

File tree

3 files changed

+53
-0
lines changed

3 files changed

+53
-0
lines changed

hbase-server/src/main/java/org/apache/hadoop/hbase/master/ActiveMasterManager.java

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020

2121
import java.io.IOException;
2222
import java.util.concurrent.atomic.AtomicBoolean;
23+
import java.util.concurrent.atomic.AtomicReference;
2324

2425
import org.apache.hadoop.hbase.zookeeper.MasterAddressTracker;
2526
import org.apache.hadoop.hbase.zookeeper.ZKUtil;
@@ -57,10 +58,16 @@ public class ActiveMasterManager extends ZKListener {
5758
final AtomicBoolean clusterHasActiveMaster = new AtomicBoolean(false);
5859
final AtomicBoolean clusterShutDown = new AtomicBoolean(false);
5960

61+
// This server's information.
6062
private final ServerName sn;
6163
private int infoPort;
6264
private final Server master;
6365

66+
// Active master's server name. Non-null if cluster has an active master unless there
67+
// is an issue fetching the active master's server name.
68+
// ServerName is immutable, so we don't need heavy synchronization around it.
69+
private final AtomicReference<ServerName> activeMasterServerName;
70+
6471
/**
6572
* @param watcher
6673
* @param sn ServerName
@@ -71,6 +78,7 @@ public class ActiveMasterManager extends ZKListener {
7178
watcher.registerListener(this);
7279
this.sn = sn;
7380
this.master = master;
81+
activeMasterServerName = new AtomicReference<>();
7482
}
7583

7684
// will be set after jetty server is started
@@ -106,6 +114,31 @@ void handle(final String path) {
106114
}
107115
}
108116

117+
/*
118+
* Fetches the active master's ServerName from zookeeper.
119+
*/
120+
private void fetchAndSetActiveMasterServerName() {
121+
LOG.debug("Attempting to fetch active master sn from zk");
122+
try {
123+
activeMasterServerName.set(MasterAddressTracker.getMasterAddress(watcher));
124+
} catch (IOException | KeeperException e) {
125+
// Log and ignore for now and re-fetch later if needed.
126+
LOG.error("Error fetching active master information", e);
127+
}
128+
}
129+
130+
public ServerName getActiveMasterServerName() {
131+
if (!clusterHasActiveMaster.get()) return null;
132+
ServerName sname = activeMasterServerName.get();
133+
if (sname != null) {
134+
return sname;
135+
}
136+
// This happens if the data was not fetched earlier for some reason.
137+
fetchAndSetActiveMasterServerName();
138+
// It could still be null, but return whatever we have.
139+
return activeMasterServerName.get();
140+
}
141+
109142
/**
110143
* Handle a change in the master node. Doesn't matter whether this was called
111144
* from a nodeCreated or nodeDeleted event because there are no guarantees
@@ -134,6 +167,9 @@ private void handleMasterNodeChange() {
134167
// Notify any thread waiting to become the active master
135168
clusterHasActiveMaster.notifyAll();
136169
}
170+
// Reset the active master sn. Will be re-fetched later if needed.
171+
// We don't want to make a synchronous RPC under a monitor.
172+
activeMasterServerName.set(null);
137173
}
138174
} catch (KeeperException ke) {
139175
master.abort("Received an unexpected KeeperException, aborting", ke);
@@ -178,10 +214,14 @@ boolean blockUntilBecomingActiveMaster(
178214
// We are the master, return
179215
startupStatus.setStatus("Successfully registered as active master.");
180216
this.clusterHasActiveMaster.set(true);
217+
activeMasterServerName.set(sn);
181218
LOG.info("Registered as active master=" + this.sn);
182219
return true;
183220
}
184221

222+
// Invalidate the active master name so that subsequent requests do not get any stale
223+
// master information. Will ne re-fetched if needed.
224+
activeMasterServerName.set(null);
185225
// There is another active master running elsewhere or this is a restart
186226
// and the master ephemeral node has not expired yet.
187227
this.clusterHasActiveMaster.set(true);

hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3869,4 +3869,7 @@ public String getClusterId() {
38693869
return cachedClusterId.getFromCacheOrFetch();
38703870
}
38713871

3872+
public ServerName getActiveMaster() {
3873+
return activeMasterManager.getActiveMasterServerName();
3874+
}
38723875
}

hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestActiveMasterManager.java

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
*/
1818
package org.apache.hadoop.hbase.master;
1919

20+
import static org.junit.Assert.assertEquals;
2021
import static org.junit.Assert.assertFalse;
2122
import static org.junit.Assert.assertNotNull;
2223
import static org.junit.Assert.assertTrue;
@@ -91,6 +92,7 @@ public static void tearDownAfterClass() throws Exception {
9192
ActiveMasterManager activeMasterManager =
9293
dummyMaster.getActiveMasterManager();
9394
assertFalse(activeMasterManager.clusterHasActiveMaster.get());
95+
assertEquals(null, activeMasterManager.getActiveMasterServerName());
9496

9597
// First test becoming the active master uninterrupted
9698
MonitoredTask status = Mockito.mock(MonitoredTask.class);
@@ -99,6 +101,7 @@ public static void tearDownAfterClass() throws Exception {
99101
activeMasterManager.blockUntilBecomingActiveMaster(100, status);
100102
assertTrue(activeMasterManager.clusterHasActiveMaster.get());
101103
assertMaster(zk, master);
104+
assertMaster(zk, activeMasterManager.getActiveMasterServerName());
102105

103106
// Now pretend master restart
104107
DummyMaster secondDummyMaster = new DummyMaster(zk,master);
@@ -108,6 +111,8 @@ public static void tearDownAfterClass() throws Exception {
108111
activeMasterManager.blockUntilBecomingActiveMaster(100, status);
109112
assertTrue(activeMasterManager.clusterHasActiveMaster.get());
110113
assertMaster(zk, master);
114+
assertMaster(zk, activeMasterManager.getActiveMasterServerName());
115+
assertMaster(zk, secondActiveMasterManager.getActiveMasterServerName());
111116
}
112117

113118
/**
@@ -135,6 +140,7 @@ public void testActiveMasterManagerFromZK() throws Exception {
135140
ActiveMasterManager activeMasterManager =
136141
ms1.getActiveMasterManager();
137142
assertFalse(activeMasterManager.clusterHasActiveMaster.get());
143+
assertEquals(activeMasterManager.getActiveMasterServerName(), null);
138144

139145
// First test becoming the active master uninterrupted
140146
ClusterStatusTracker clusterStatusTracker =
@@ -144,6 +150,7 @@ public void testActiveMasterManagerFromZK() throws Exception {
144150
Mockito.mock(MonitoredTask.class));
145151
assertTrue(activeMasterManager.clusterHasActiveMaster.get());
146152
assertMaster(zk, firstMasterAddress);
153+
assertMaster(zk, activeMasterManager.getActiveMasterServerName());
147154

148155
// New manager will now try to become the active master in another thread
149156
WaitToBeMasterThread t = new WaitToBeMasterThread(zk, secondMasterAddress);
@@ -161,6 +168,8 @@ public void testActiveMasterManagerFromZK() throws Exception {
161168
assertTrue(t.manager.clusterHasActiveMaster.get());
162169
// But secondary one should not be the active master
163170
assertFalse(t.isActiveMaster);
171+
// Verify the active master ServerName is populated in standby master.
172+
assertEquals(firstMasterAddress, t.manager.getActiveMasterServerName());
164173

165174
// Close the first server and delete it's master node
166175
ms1.stop("stopping first server");
@@ -189,6 +198,7 @@ public void testActiveMasterManagerFromZK() throws Exception {
189198

190199
assertTrue(t.manager.clusterHasActiveMaster.get());
191200
assertTrue(t.isActiveMaster);
201+
assertEquals(secondMasterAddress, t.manager.getActiveMasterServerName());
192202

193203
LOG.info("Deleting master node");
194204

0 commit comments

Comments
 (0)