Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext;
import org.apache.hadoop.yarn.api.protocolrecords.ReservationSubmissionRequest;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.exceptions.YarnException;
import org.apache.hadoop.yarn.server.federation.policies.exceptions.FederationPolicyException;
Expand Down Expand Up @@ -136,7 +137,7 @@ public SubClusterId getHomeSubcluster(

if (appSubmissionContext == null) {
throw new FederationPolicyException(
"The ApplicationSubmissionContext " + "cannot be null.");
"The ApplicationSubmissionContext cannot be null.");
}

String queue = appSubmissionContext.getQueue();
Expand All @@ -148,51 +149,7 @@ public SubClusterId getHomeSubcluster(
queue = YarnConfiguration.DEFAULT_QUEUE_NAME;
}

// the facade might cache this request, based on its parameterization
SubClusterPolicyConfiguration configuration = null;

try {
configuration = federationFacade.getPolicyConfiguration(queue);
} catch (YarnException e) {
String errMsg = "There is no policy configured for the queue: " + queue
+ ", falling back to defaults.";
LOG.warn(errMsg, e);
}

// If there is no policy configured for this queue, fallback to the baseline
// policy that is configured either in the store or via XML config (and
// cached)
if (configuration == null) {
LOG.warn("There is no policies configured for queue: " + queue + " we"
+ " fallback to default policy for: "
+ YarnConfiguration.DEFAULT_FEDERATION_POLICY_KEY);

queue = YarnConfiguration.DEFAULT_FEDERATION_POLICY_KEY;
try {
configuration = federationFacade.getPolicyConfiguration(queue);
} catch (YarnException e) {
String errMsg = "Cannot retrieve policy configured for the queue: "
+ queue + ", falling back to defaults.";
LOG.warn(errMsg, e);

}
}

// the fallback is not configure via store, but via XML, using
// previously loaded configuration.
if (configuration == null) {
configuration =
cachedConfs.get(YarnConfiguration.DEFAULT_FEDERATION_POLICY_KEY);
}

// if the configuration has changed since last loaded, reinit the policy
// based on current configuration
if (!cachedConfs.containsKey(queue)
|| !cachedConfs.get(queue).equals(configuration)) {
singlePolicyReinit(policyMap, cachedConfs, queue, configuration);
}

FederationRouterPolicy policy = policyMap.get(queue);
FederationRouterPolicy policy = getFederationRouterPolicy(cachedConfs, policyMap, queue);
if (policy == null) {
// this should never happen, as the to maps are updated together
throw new FederationPolicyException("No FederationRouterPolicy found "
Expand Down Expand Up @@ -262,4 +219,79 @@ public synchronized void reset() {

}

public SubClusterId getReservationHomeSubCluster(
ReservationSubmissionRequest request) throws YarnException {

// the maps are concurrent, but we need to protect from reset()
// reinitialization mid-execution by creating a new reference local to this
// method.
Map<String, SubClusterPolicyConfiguration> cachedConfs = globalConfMap;
Map<String, FederationRouterPolicy> policyMap = globalPolicyMap;

if (request == null) {
throw new FederationPolicyException(
"The ReservationSubmissionRequest cannot be null.");
}

String queue = request.getQueue();
FederationRouterPolicy policy = getFederationRouterPolicy(cachedConfs, policyMap, queue);

if (policy == null) {
// this should never happen, as the to maps are updated together
throw new FederationPolicyException("No FederationRouterPolicy found "
+ "for queue: " + request.getQueue() + " (while routing "
+ "reservation: " + request.getReservationId() + ") "
+ "and no default specified.");
}

return policy.getReservationHomeSubcluster(request);
}

private FederationRouterPolicy getFederationRouterPolicy(
Map<String, SubClusterPolicyConfiguration> cachedConfiguration,
Map<String, FederationRouterPolicy> policyMap, String queue)
throws FederationPolicyInitializationException {

// the facade might cache this request, based on its parameterization
SubClusterPolicyConfiguration configuration = null;
String copyQueue = queue;

try {
configuration = federationFacade.getPolicyConfiguration(copyQueue);
} catch (YarnException e) {
LOG.warn("There is no policy configured for the queue: {}, " +
"falling back to defaults.", copyQueue, e);
}

// If there is no policy configured for this queue, fallback to the baseline
// policy that is configured either in the store or via XML config (and
// cached)
if (configuration == null) {
final String policyKey = YarnConfiguration.DEFAULT_FEDERATION_POLICY_KEY;
LOG.warn("There is no policies configured for queue: {} " +
"we fallback to default policy for: {}. ", copyQueue, policyKey);
copyQueue = YarnConfiguration.DEFAULT_FEDERATION_POLICY_KEY;
try {
configuration = federationFacade.getPolicyConfiguration(copyQueue);
} catch (YarnException e) {
LOG.warn("Cannot retrieve policy configured for the queue: {}, " +
"falling back to defaults.", copyQueue, e);
}
}

// the fallback is not configure via store, but via XML, using
// previously loaded configuration.
if (configuration == null) {
configuration = cachedConfiguration.get(YarnConfiguration.DEFAULT_FEDERATION_POLICY_KEY);
}

// if the configuration has changed since last loaded, reinit the policy
// based on current configuration
if (!cachedConfiguration.containsKey(copyQueue)
|| !cachedConfiguration.get(copyQueue).equals(configuration)) {
singlePolicyReinit(policyMap, cachedConfiguration, copyQueue, configuration);
}

return policyMap.get(copyQueue);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,24 @@

package org.apache.hadoop.yarn.server.federation.policies.router;

import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.Collections;

import org.apache.hadoop.yarn.api.protocolrecords.ReservationSubmissionRequest;
import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext;
import org.apache.hadoop.yarn.api.records.ReservationId;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.exceptions.YarnException;
import org.apache.hadoop.yarn.server.federation.policies.AbstractConfigurableFederationPolicy;
import org.apache.hadoop.yarn.server.federation.policies.FederationPolicyUtils;
import org.apache.hadoop.yarn.server.federation.policies.dao.WeightedPolicyInfo;
import org.apache.hadoop.yarn.server.federation.policies.exceptions.FederationPolicyException;
import org.apache.hadoop.yarn.server.federation.policies.exceptions.FederationPolicyInitializationException;
import org.apache.hadoop.yarn.server.federation.store.records.SubClusterId;
import org.apache.hadoop.yarn.server.federation.store.records.SubClusterIdInfo;
import org.apache.hadoop.yarn.server.federation.store.records.SubClusterInfo;

/**
* Base abstract class for {@link FederationRouterPolicy} implementations, that
Expand Down Expand Up @@ -63,4 +72,70 @@ public void validate(ApplicationSubmissionContext appSubmissionContext)
}
}

protected abstract SubClusterId chooseSubCluster(String queue,
Map<SubClusterId, SubClusterInfo> preSelectSubClusters) throws YarnException;

protected Map<SubClusterId, SubClusterInfo> prefilterSubClusters(
ReservationId reservationId, Map<SubClusterId, SubClusterInfo> activeSubClusters)
throws YarnException {

// if a reservation exists limit scope to the sub-cluster this
// reservation is mapped to
if (reservationId != null) {

// note this might throw YarnException if the reservation is
// unknown. This is to be expected, and should be handled by
// policy invoker.
SubClusterId resSubCluster =
getPolicyContext().getFederationStateStoreFacade().
getReservationHomeSubCluster(reservationId);

return Collections.singletonMap(resSubCluster, activeSubClusters.get(resSubCluster));
}

return activeSubClusters;
}

@Override
public SubClusterId getHomeSubcluster(ApplicationSubmissionContext appContext,
List<SubClusterId> blackLists) throws YarnException {

// null checks and default-queue behavior
validate(appContext);

// apply filtering based on reservation location and active sub-clusters
Map<SubClusterId, SubClusterInfo> filteredSubClusters = prefilterSubClusters(
appContext.getReservationID(), getActiveSubclusters());

FederationPolicyUtils.validateSubClusterAvailability(
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can't we have a method that takes both arguments separate?

new ArrayList<>(filteredSubClusters.keySet()), blackLists);

// remove black SubCluster
if (blackLists != null) {
blackLists.forEach(filteredSubClusters::remove);
}

// pick the chosen subCluster from the active ones
return chooseSubCluster(appContext.getQueue(), filteredSubClusters);
}


@Override
public SubClusterId getReservationHomeSubcluster(ReservationSubmissionRequest request)
throws YarnException {
if (request == null) {
throw new FederationPolicyException(
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

One line.

"The ReservationSubmissionRequest cannot be null.");
}

if (request.getQueue() == null) {
request.setQueue(YarnConfiguration.DEFAULT_QUEUE_NAME);
}

// apply filtering based on reservation location and active sub-clusters
Map<SubClusterId, SubClusterInfo> filteredSubClusters = getActiveSubclusters();

// pick the chosen subCluster from the active ones
return chooseSubCluster(request.getQueue(), filteredSubClusters);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

import java.util.List;

import org.apache.hadoop.yarn.api.protocolrecords.ReservationSubmissionRequest;
import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext;
import org.apache.hadoop.yarn.exceptions.YarnException;
import org.apache.hadoop.yarn.server.federation.policies.ConfigurableFederationPolicy;
Expand Down Expand Up @@ -49,4 +50,16 @@ public interface FederationRouterPolicy extends ConfigurableFederationPolicy {
SubClusterId getHomeSubcluster(
ApplicationSubmissionContext appSubmissionContext,
List<SubClusterId> blackListSubClusters) throws YarnException;

/**
* Determines the sub-cluster where a ReservationSubmissionRequest should be
* sent to.
*
* @param request the original request
* @return a mapping of sub-clusters and the requests
*
* @throws YarnException if the policy fails to choose a sub-cluster
*/
SubClusterId getReservationHomeSubcluster(
ReservationSubmissionRequest request) throws YarnException;
}
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,9 @@
import java.util.List;
import java.util.Map;

import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext;
import org.apache.hadoop.yarn.exceptions.YarnException;
import org.apache.hadoop.yarn.server.federation.policies.FederationPolicyInitializationContext;
import org.apache.hadoop.yarn.server.federation.policies.FederationPolicyInitializationContextValidator;
import org.apache.hadoop.yarn.server.federation.policies.FederationPolicyUtils;
import org.apache.hadoop.yarn.server.federation.policies.exceptions.FederationPolicyInitializationException;
import org.apache.hadoop.yarn.server.federation.store.records.SubClusterId;
import org.apache.hadoop.yarn.server.federation.store.records.SubClusterInfo;
Expand All @@ -50,53 +48,12 @@ public void reinitialize(
setPolicyContext(federationPolicyContext);
}

/**
* Simply picks from alphabetically-sorted active subclusters based on the
* hash of quey name. Jobs of the same queue will all be routed to the same
* sub-cluster, as far as the number of active sub-cluster and their names
* remain the same.
*
* @param appSubmissionContext the {@link ApplicationSubmissionContext} that
* has to be routed to an appropriate subCluster for execution.
*
* @param blackListSubClusters the list of subClusters as identified by
* {@link SubClusterId} to blackList from the selection of the home
* subCluster.
*
* @return a hash-based chosen {@link SubClusterId} that will be the "home"
* for this application.
*
* @throws YarnException if there are no active subclusters.
*/
@Override
public SubClusterId getHomeSubcluster(
ApplicationSubmissionContext appSubmissionContext,
List<SubClusterId> blackListSubClusters) throws YarnException {

// throws if no active subclusters available
Map<SubClusterId, SubClusterInfo> activeSubclusters =
getActiveSubclusters();

FederationPolicyUtils.validateSubClusterAvailability(
new ArrayList<SubClusterId>(activeSubclusters.keySet()),
blackListSubClusters);

if (blackListSubClusters != null) {

// Remove from the active SubClusters from StateStore the blacklisted ones
for (SubClusterId scId : blackListSubClusters) {
activeSubclusters.remove(scId);
}
}

validate(appSubmissionContext);

int chosenPosition = Math.abs(
appSubmissionContext.getQueue().hashCode() % activeSubclusters.size());

List<SubClusterId> list = new ArrayList<>(activeSubclusters.keySet());
protected SubClusterId chooseSubCluster(String queue,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we have a javadoc in the parent?

Map<SubClusterId, SubClusterInfo> preSelectSubClusters) throws YarnException {
int chosenPosition = Math.abs(queue.hashCode() % preSelectSubClusters.size());
List<SubClusterId> list = new ArrayList<>(preSelectSubClusters.keySet());
Collections.sort(list);
return list.get(chosenPosition);
}

}
Loading