Skip to content

Commit c25977d

Browse files
authored
Merge branch 'apache:trunk' into YARN-7707
2 parents 44ea2cd + 3f4de91 commit c25977d

File tree

34 files changed

+42455
-28617
lines changed

34 files changed

+42455
-28617
lines changed

hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3075,6 +3075,10 @@ public static boolean isAclEnabled(Configuration conf) {
30753075
+ "amrmproxy.enabled";
30763076
public static final boolean DEFAULT_AMRM_PROXY_ENABLED = false;
30773077

3078+
public static final String AMRM_PROXY_WAIT_UAM_REGISTER_DONE =
3079+
NM_PREFIX + "amrmproxy.wait.uam-register.done";
3080+
public static final boolean DEFAULT_AMRM_PROXY_WAIT_UAM_REGISTER_DONE = false;
3081+
30783082
public static final String AMRM_PROXY_ADDRESS = NM_PREFIX
30793083
+ "amrmproxy.address";
30803084
public static final int DEFAULT_AMRM_PROXY_PORT = 8049;

hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5354,6 +5354,16 @@
53545354
<value></value>
53555355
</property>
53565356

5357+
<property>
5358+
<description>
5359+
Whether we wait for uam registration to complete.
5360+
The default value is false. If we set it to true,
5361+
the UAM needs to be registered before attempting to allocate a container.
5362+
</description>
5363+
<name>yarn.nodemanager.amrmproxy.wait.uam-register.done</name>
5364+
<value>false</value>
5365+
</property>
5366+
53575367
<property>
53585368
<description>
53595369
YARN Federation supports Non-HA mode.

hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/federation/utils/FederationStateStoreFacade.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -819,7 +819,7 @@ public boolean existsApplicationHomeSubCluster(ApplicationId applicationId) {
819819
return true;
820820
}
821821
} catch (YarnException e) {
822-
LOG.warn("get homeSubCluster by applicationId = {} error.", applicationId, e);
822+
LOG.debug("get homeSubCluster by applicationId = {} error.", applicationId, e);
823823
}
824824
return false;
825825
}
@@ -906,7 +906,7 @@ public boolean existsReservationHomeSubCluster(ReservationId reservationId) {
906906
return true;
907907
}
908908
} catch (YarnException e) {
909-
LOG.warn("get homeSubCluster by reservationId = {} error.", reservationId, e);
909+
LOG.debug("get homeSubCluster by reservationId = {} error.", reservationId, e);
910910
}
911911
return false;
912912
}

hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/test/java/org/apache/hadoop/yarn/server/MockResourceManagerFacade.java

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818

1919
package org.apache.hadoop.yarn.server;
2020

21+
import java.io.Closeable;
2122
import java.io.IOException;
2223
import java.net.ConnectException;
2324
import java.util.ArrayList;
@@ -183,7 +184,7 @@
183184
* change the implementation with care.
184185
*/
185186
public class MockResourceManagerFacade implements ApplicationClientProtocol,
186-
ApplicationMasterProtocol, ResourceManagerAdministrationProtocol {
187+
ApplicationMasterProtocol, ResourceManagerAdministrationProtocol, Closeable {
187188

188189
private static final Logger LOG =
189190
LoggerFactory.getLogger(MockResourceManagerFacade.class);
@@ -967,4 +968,9 @@ public DeregisterSubClusterResponse deregisterSubCluster(DeregisterSubClusterReq
967968
public HashMap<ApplicationId, List<ContainerId>> getApplicationContainerIdMap() {
968969
return applicationContainerIdMap;
969970
}
971+
972+
@Override
973+
public void close() throws IOException {
974+
LOG.info("MockResourceManagerFacade Close.");
975+
}
970976
}

hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/amrmproxy/FederationInterceptor.java

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -251,6 +251,8 @@ public class FederationInterceptor extends AbstractRequestInterceptor {
251251
// the maximum wait time for the first async heart beat response
252252
private long heartbeatMaxWaitTimeMs;
253253

254+
private boolean waitUamRegisterDone;
255+
254256
private MonotonicClock clock = new MonotonicClock();
255257

256258
/**
@@ -353,6 +355,8 @@ public void init(AMRMProxyApplicationContext appContext) {
353355
this.subClusterTimeOut =
354356
YarnConfiguration.DEFAULT_FEDERATION_AMRMPROXY_SUBCLUSTER_TIMEOUT;
355357
}
358+
this.waitUamRegisterDone = conf.getBoolean(YarnConfiguration.AMRM_PROXY_WAIT_UAM_REGISTER_DONE,
359+
YarnConfiguration.DEFAULT_AMRM_PROXY_WAIT_UAM_REGISTER_DONE);
356360
}
357361

358362
@Override
@@ -1332,6 +1336,18 @@ public void run() {
13321336
});
13331337
this.uamRegisterFutures.put(scId, future);
13341338
}
1339+
1340+
if (this.waitUamRegisterDone) {
1341+
for (Map.Entry<SubClusterId, Future<?>> entry : this.uamRegisterFutures.entrySet()) {
1342+
SubClusterId subClusterId = entry.getKey();
1343+
Future<?> future = entry.getValue();
1344+
while (!future.isDone()) {
1345+
LOG.info("subClusterId {} Wait Uam Register done.", subClusterId);
1346+
}
1347+
}
1348+
}
1349+
1350+
13351351
return newSubClusters;
13361352
}
13371353

hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/amrmproxy/TestFederationInterceptor.java

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
import org.apache.hadoop.registry.client.api.RegistryOperations;
3939
import org.apache.hadoop.registry.client.impl.FSRegistryOperationsService;
4040
import org.apache.hadoop.security.UserGroupInformation;
41+
import org.apache.hadoop.test.GenericTestUtils;
4142
import org.apache.hadoop.test.LambdaTestUtils;
4243
import org.apache.hadoop.util.Time;
4344
import org.apache.hadoop.yarn.api.protocolrecords.AllocateRequest;
@@ -178,6 +179,9 @@ protected YarnConfiguration createConfiguration() {
178179
conf.setLong(YarnConfiguration.FEDERATION_AMRMPROXY_SUBCLUSTER_TIMEOUT,
179180
500);
180181

182+
// Wait UAM Register Down
183+
conf.setBoolean(YarnConfiguration.AMRM_PROXY_WAIT_UAM_REGISTER_DONE, true);
184+
181185
return conf;
182186
}
183187

@@ -593,6 +597,10 @@ public Object run() throws Exception {
593597
interceptor.recover(recoveredDataMap);
594598

595599
Assert.assertEquals(1, interceptor.getUnmanagedAMPoolSize());
600+
601+
// Waiting for SC-1 to time out.
602+
GenericTestUtils.waitFor(() -> interceptor.getTimedOutSCs(true).size() == 1, 100, 1000);
603+
596604
// SC1 should be initialized to be timed out
597605
Assert.assertEquals(1, interceptor.getTimedOutSCs(true).size());
598606

@@ -851,7 +859,7 @@ public Object run() throws Exception {
851859
List<Container> containers =
852860
getContainersAndAssert(numberOfContainers, numberOfContainers * 2);
853861
for (Container c : containers) {
854-
LOG.info("Allocated container " + c.getId());
862+
LOG.info("Allocated container {}", c.getId());
855863
}
856864
Assert.assertEquals(1, interceptor.getUnmanagedAMPoolSize());
857865

@@ -885,6 +893,10 @@ public Object run() throws Exception {
885893
int numberOfContainers = 3;
886894
// Should re-attach secondaries and get the three running containers
887895
Assert.assertEquals(1, interceptor.getUnmanagedAMPoolSize());
896+
897+
// Waiting for SC-1 to time out.
898+
GenericTestUtils.waitFor(() -> interceptor.getTimedOutSCs(true).size() == 1, 100, 1000);
899+
888900
// SC1 should be initialized to be timed out
889901
Assert.assertEquals(1, interceptor.getTimedOutSCs(true).size());
890902
Assert.assertEquals(numberOfContainers,

0 commit comments

Comments
 (0)