YARN-11005. Implement the core QUEUE_LENGTH_THEN_RESOURCES OContainer allocation policy (#3717)

This commit is contained in:
Andrew Chung 2021-12-08 13:02:10 -05:00 committed by GitHub
parent 66a08e933a
commit ffee92bbf1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 982 additions and 105 deletions

View File

@ -404,18 +404,51 @@ public boolean isAllInvalidDivisor(Resource r) {
@Override @Override
public float ratio(Resource a, Resource b) { public float ratio(Resource a, Resource b) {
float ratio = 0.0f; return ratio(a, b, true);
}
/**
* Computes the ratio of resource a over resource b,
* where the boolean flag {@literal isDominantShare} allows
* specification of whether the max- or min-share should be computed.
* @param a the numerator resource.
* @param b the denominator resource.
* @param isDominantShare whether the dominant (max) share should be computed,
* computes the min-share if false.
* @return the max- or min-share ratio of the resources.
*/
private float ratio(Resource a, Resource b, boolean isDominantShare) {
float ratio = isDominantShare ? 0.0f : 1.0f;
int maxLength = ResourceUtils.getNumberOfCountableResourceTypes(); int maxLength = ResourceUtils.getNumberOfCountableResourceTypes();
for (int i = 0; i < maxLength; i++) { for (int i = 0; i < maxLength; i++) {
ResourceInformation aResourceInformation = a.getResourceInformation(i); ResourceInformation aResourceInformation = a.getResourceInformation(i);
ResourceInformation bResourceInformation = b.getResourceInformation(i); ResourceInformation bResourceInformation = b.getResourceInformation(i);
final float tmp = divideSafelyAsFloat(aResourceInformation.getValue(), final float tmp = divideSafelyAsFloat(aResourceInformation.getValue(),
bResourceInformation.getValue()); bResourceInformation.getValue());
ratio = ratio > tmp ? ratio : tmp; if (isDominantShare) {
ratio = Math.max(ratio, tmp);
} else {
ratio = Math.min(ratio, tmp);
}
} }
return ratio; return ratio;
} }
/**
* Computes the ratio of resource a over resource b.
* However, different from ratio(Resource, Resource),
* this returns the min-share of the resources.
* For example, ratio(Resource(10, 50), Resource(100, 100)) would return 0.5,
* whereas minRatio(Resource(10, 50), Resource(100, 100)) would return 0.1.
* @param a the numerator resource.
* @param b the denominator resource.
* @return the min-share ratio of the resources.
*/
@Unstable
public float minRatio(Resource a, Resource b) {
return ratio(a, b, false);
}
@Override @Override
public Resource divideAndCeil(Resource numerator, int denominator) { public Resource divideAndCeil(Resource numerator, int denominator) {
return divideAndCeil(numerator, (long) denominator); return divideAndCeil(numerator, (long) denominator);

View File

@ -257,7 +257,9 @@ public OpportunisticContainerAllocatorAMService(RMContext rmContext,
int limitMin, limitMax; int limitMin, limitMax;
if (comparator == NodeQueueLoadMonitor.LoadComparator.QUEUE_LENGTH) { if (comparator == NodeQueueLoadMonitor.LoadComparator.QUEUE_LENGTH ||
comparator ==
NodeQueueLoadMonitor.LoadComparator.QUEUE_LENGTH_THEN_RESOURCES) {
limitMin = rmContext.getYarnConfiguration() limitMin = rmContext.getYarnConfiguration()
.getInt(YarnConfiguration.NM_CONTAINER_QUEUING_MIN_QUEUE_LENGTH, .getInt(YarnConfiguration.NM_CONTAINER_QUEUING_MIN_QUEUE_LENGTH,
YarnConfiguration. YarnConfiguration.

View File

@ -251,14 +251,15 @@ private List<Container> allocateNodeLocal(
String userName, Map<Resource, List<Allocation>> allocations) String userName, Map<Resource, List<Allocation>> allocations)
throws YarnException { throws YarnException {
List<Container> allocatedContainers = new ArrayList<>(); List<Container> allocatedContainers = new ArrayList<>();
final ResourceRequest resourceRequest = enrichedAsk.getRequest();
while (toAllocate > 0) { while (toAllocate > 0) {
RMNode node = nodeQueueLoadMonitor.selectLocalNode(nodeLocation, RMNode node = nodeQueueLoadMonitor.selectLocalNode(nodeLocation,
blacklist); blacklist, resourceRequest.getCapability());
if (node != null) { if (node != null) {
toAllocate--; toAllocate--;
Container container = createContainer(rmIdentifier, appParams, Container container = createContainer(rmIdentifier, appParams,
idCounter, id, userName, allocations, nodeLocation, idCounter, id, userName, allocations, nodeLocation,
enrichedAsk.getRequest(), convertToRemoteNode(node)); resourceRequest, convertToRemoteNode(node));
allocatedContainers.add(container); allocatedContainers.add(container);
LOG.info("Allocated [{}] as opportunistic at location [{}]", LOG.info("Allocated [{}] as opportunistic at location [{}]",
container.getId(), nodeLocation); container.getId(), nodeLocation);
@ -280,14 +281,15 @@ private List<Container> allocateRackLocal(EnrichedResourceRequest enrichedAsk,
String userName, Map<Resource, List<Allocation>> allocations) String userName, Map<Resource, List<Allocation>> allocations)
throws YarnException { throws YarnException {
List<Container> allocatedContainers = new ArrayList<>(); List<Container> allocatedContainers = new ArrayList<>();
final ResourceRequest resourceRequest = enrichedAsk.getRequest();
while (toAllocate > 0) { while (toAllocate > 0) {
RMNode node = nodeQueueLoadMonitor.selectRackLocalNode(rackLocation, RMNode node = nodeQueueLoadMonitor.selectRackLocalNode(rackLocation,
blacklist); blacklist, resourceRequest.getCapability());
if (node != null) { if (node != null) {
toAllocate--; toAllocate--;
Container container = createContainer(rmIdentifier, appParams, Container container = createContainer(rmIdentifier, appParams,
idCounter, id, userName, allocations, rackLocation, idCounter, id, userName, allocations, rackLocation,
enrichedAsk.getRequest(), convertToRemoteNode(node)); resourceRequest, convertToRemoteNode(node));
allocatedContainers.add(container); allocatedContainers.add(container);
metrics.incrRackLocalOppContainers(); metrics.incrRackLocalOppContainers();
LOG.info("Allocated [{}] as opportunistic at location [{}]", LOG.info("Allocated [{}] as opportunistic at location [{}]",
@ -309,13 +311,15 @@ private List<Container> allocateAny(EnrichedResourceRequest enrichedAsk,
String userName, Map<Resource, List<Allocation>> allocations) String userName, Map<Resource, List<Allocation>> allocations)
throws YarnException { throws YarnException {
List<Container> allocatedContainers = new ArrayList<>(); List<Container> allocatedContainers = new ArrayList<>();
final ResourceRequest resourceRequest = enrichedAsk.getRequest();
while (toAllocate > 0) { while (toAllocate > 0) {
RMNode node = nodeQueueLoadMonitor.selectAnyNode(blacklist); RMNode node = nodeQueueLoadMonitor.selectAnyNode(
blacklist, resourceRequest.getCapability());
if (node != null) { if (node != null) {
toAllocate--; toAllocate--;
Container container = createContainer(rmIdentifier, appParams, Container container = createContainer(rmIdentifier, appParams,
idCounter, id, userName, allocations, ResourceRequest.ANY, idCounter, id, userName, allocations, ResourceRequest.ANY,
enrichedAsk.getRequest(), convertToRemoteNode(node)); resourceRequest, convertToRemoteNode(node));
allocatedContainers.add(container); allocatedContainers.add(container);
metrics.incrOffSwitchOppContainers(); metrics.incrOffSwitchOppContainers();
LOG.info("Allocated [{}] as opportunistic at location [{}]", LOG.info("Allocated [{}] as opportunistic at location [{}]",

View File

@ -20,74 +20,243 @@
import java.util.Collection; import java.util.Collection;
import java.util.HashSet; import java.util.HashSet;
import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.locks.ReentrantReadWriteLock;
import org.apache.hadoop.yarn.api.records.NodeId; import org.apache.hadoop.yarn.api.records.NodeId;
import org.apache.hadoop.yarn.api.records.Resource;
import org.apache.hadoop.yarn.util.resource.ResourceCalculator;
import org.apache.hadoop.yarn.util.resource.Resources;
/** /**
* Represents a node in the cluster from the NodeQueueLoadMonitor's perspective * Represents a node in the cluster from the NodeQueueLoadMonitor's perspective
*/ */
public class ClusterNode { public class ClusterNode {
private final AtomicInteger queueLength = new AtomicInteger(0); /**
private final AtomicInteger queueWaitTime = new AtomicInteger(-1); * Properties class used to initialize/change fields in ClusterNode.
*/
public static final class Properties {
private int queueLength = 0;
private int queueWaitTime = -1;
private long timestamp; private long timestamp;
final NodeId nodeId;
private int queueCapacity = 0; private int queueCapacity = 0;
private boolean queueCapacityIsSet = false;
private final HashSet<String> labels; private final HashSet<String> labels;
private Resource capability = null;
private Resource allocatedResource = null;
public ClusterNode(NodeId nodeId) { public static Properties newInstance() {
this.nodeId = nodeId; return new Properties();
this.labels = new HashSet<>();
updateTimestamp();
} }
public ClusterNode setQueueLength(int qLength) { Properties setQueueLength(int qLength) {
this.queueLength.set(qLength); this.queueLength = qLength;
return this; return this;
} }
public ClusterNode setQueueWaitTime(int wTime) { Properties setQueueWaitTime(int wTime) {
this.queueWaitTime.set(wTime); this.queueWaitTime = wTime;
return this; return this;
} }
public ClusterNode updateTimestamp() { Properties updateTimestamp() {
this.timestamp = System.currentTimeMillis(); this.timestamp = System.currentTimeMillis();
return this; return this;
} }
public ClusterNode setQueueCapacity(int capacity) { Properties setQueueCapacity(int capacity) {
this.queueCapacity = capacity; this.queueCapacity = capacity;
this.queueCapacityIsSet = true;
return this; return this;
} }
public ClusterNode setNodeLabels(Collection<String> labelsToAdd) { Properties setNodeLabels(Collection<String> labelsToAdd) {
labels.clear(); labels.clear();
labels.addAll(labelsToAdd); labels.addAll(labelsToAdd);
return this; return this;
} }
Properties setCapability(Resource nodeCapability) {
this.capability = nodeCapability;
return this;
}
Properties setAllocatedResource(Resource allocResource) {
this.allocatedResource = allocResource;
return this;
}
private Properties() {
labels = new HashSet<>();
}
}
private int queueLength = 0;
private int queueWaitTime = -1;
private long timestamp;
final NodeId nodeId;
private int queueCapacity = 0;
private final HashSet<String> labels;
private Resource capability = Resources.none();
private Resource allocatedResource = Resources.none();
private final ReentrantReadWriteLock.WriteLock writeLock;
private final ReentrantReadWriteLock.ReadLock readLock;
public ClusterNode(NodeId nodeId) {
this.nodeId = nodeId;
this.labels = new HashSet<>();
final ReentrantReadWriteLock lock = new ReentrantReadWriteLock();
this.writeLock = lock.writeLock();
this.readLock = lock.readLock();
this.timestamp = System.currentTimeMillis();
}
public ClusterNode setProperties(final Properties properties) {
writeLock.lock();
try {
if (properties.capability == null) {
this.capability = Resources.none();
} else {
this.capability = properties.capability;
}
if (properties.allocatedResource == null) {
this.allocatedResource = Resources.none();
} else {
this.allocatedResource = properties.allocatedResource;
}
this.queueLength = properties.queueLength;
this.queueWaitTime = properties.queueWaitTime;
this.timestamp = properties.timestamp;
if (properties.queueCapacityIsSet) {
// queue capacity is only set on node add, not on node updates
this.queueCapacity = properties.queueCapacity;
}
this.labels.clear();
this.labels.addAll(properties.labels);
return this;
} finally {
writeLock.unlock();
}
}
public Resource getAllocatedResource() {
readLock.lock();
try {
return this.allocatedResource;
} finally {
readLock.unlock();
}
}
public Resource getAvailableResource() {
readLock.lock();
try {
return Resources.subtractNonNegative(capability, allocatedResource);
} finally {
readLock.unlock();
}
}
public Resource getCapability() {
readLock.lock();
try {
return this.capability;
} finally {
readLock.unlock();
}
}
public boolean hasLabel(String label) { public boolean hasLabel(String label) {
readLock.lock();
try {
return this.labels.contains(label); return this.labels.contains(label);
} finally {
readLock.unlock();
}
} }
public long getTimestamp() { public long getTimestamp() {
readLock.lock();
try {
return this.timestamp; return this.timestamp;
} finally {
readLock.unlock();
}
} }
public AtomicInteger getQueueLength() { public int getQueueLength() {
readLock.lock();
try {
return this.queueLength; return this.queueLength;
} finally {
readLock.unlock();
}
} }
public AtomicInteger getQueueWaitTime() { public int getQueueWaitTime() {
readLock.lock();
try {
return this.queueWaitTime; return this.queueWaitTime;
} finally {
readLock.unlock();
}
} }
public int getQueueCapacity() { public int getQueueCapacity() {
readLock.lock();
try {
return this.queueCapacity; return this.queueCapacity;
} finally {
readLock.unlock();
}
}
public boolean compareAndIncrementAllocation(
final int incrementQLen,
final ResourceCalculator resourceCalculator,
final Resource requested) {
writeLock.lock();
try {
final Resource currAvailable = Resources.subtractNonNegative(
capability, allocatedResource);
if (resourceCalculator.fitsIn(requested, currAvailable)) {
allocatedResource = Resources.add(allocatedResource, requested);
return true;
}
if (!resourceCalculator.fitsIn(requested, capability)) {
// If does not fit at all, do not allocate
return false;
}
return compareAndIncrementAllocation(incrementQLen);
} finally {
writeLock.unlock();
}
}
public boolean compareAndIncrementAllocation(final int incrementQLen) {
writeLock.lock();
try {
final int added = queueLength + incrementQLen;
if (added <= queueCapacity) {
queueLength = added;
return true;
}
return false;
} finally {
writeLock.unlock();
}
} }
public boolean isQueueFull() { public boolean isQueueFull() {
readLock.lock();
try {
return this.queueCapacity > 0 && return this.queueCapacity > 0 &&
this.queueLength.get() >= this.queueCapacity; this.queueLength >= this.queueCapacity;
} finally {
readLock.unlock();
}
} }
} }

View File

@ -18,8 +18,13 @@
package org.apache.hadoop.yarn.server.resourcemanager.scheduler.distributed; package org.apache.hadoop.yarn.server.resourcemanager.scheduler.distributed;
import org.apache.commons.math3.util.Precision;
import org.apache.hadoop.classification.VisibleForTesting; import org.apache.hadoop.classification.VisibleForTesting;
import org.apache.hadoop.yarn.api.records.NodeState; import org.apache.hadoop.yarn.api.records.NodeState;
import org.apache.hadoop.yarn.api.records.Resource;
import org.apache.hadoop.yarn.util.resource.DominantResourceCalculator;
import org.apache.hadoop.yarn.util.resource.ResourceCalculator;
import org.apache.hadoop.yarn.util.resource.Resources;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import org.apache.hadoop.yarn.api.records.NodeId; import org.apache.hadoop.yarn.api.records.NodeId;
@ -64,39 +69,165 @@ public class NodeQueueLoadMonitor implements ClusterMonitor {
* of two Nodes are compared. * of two Nodes are compared.
*/ */
public enum LoadComparator implements Comparator<ClusterNode> { public enum LoadComparator implements Comparator<ClusterNode> {
/**
* This policy only considers queue length.
* When allocating, increments queue length without looking at resources
* available on the node, and when sorting, also only sorts by queue length.
*/
QUEUE_LENGTH, QUEUE_LENGTH,
QUEUE_WAIT_TIME; /**
* This policy only considers the wait time of containers in the queue.
* Neither looks at resources nor at queue length.
*/
QUEUE_WAIT_TIME,
/**
* This policy considers both queue length and resources.
* When allocating, first decrements resources available on a node.
* If resources are available, does not place OContainers on the node queue.
* When sorting, it first sorts by queue length,
* then by available resources.
*/
QUEUE_LENGTH_THEN_RESOURCES;
private Resource clusterResource = Resources.none();
private final DominantResourceCalculator resourceCalculator =
new DominantResourceCalculator();
private boolean shouldPerformMinRatioComputation() {
if (clusterResource == null) {
return false;
}
return !resourceCalculator.isAnyMajorResourceZeroOrNegative(
clusterResource);
}
/**
* Compares queue length of nodes first (shortest first),
* then compares available resources normalized
* over cluster resources (most available resources first).
* @param o1 the first ClusterNode
* @param o2 the second ClusterNode
* @return the difference the two ClusterNodes for sorting
*/
private int compareQueueLengthThenResources(
final ClusterNode o1, final ClusterNode o2) {
int diff = o1.getQueueLength() - o2.getQueueLength();
if (diff != 0) {
return diff;
}
final Resource availableResource1 = o1.getAvailableResource();
final Resource availableResource2 = o2.getAvailableResource();
// Cluster resource should be valid before performing min-ratio logic
// Use raw available resource comparison otherwise
if (shouldPerformMinRatioComputation()) {
// Takes the least available resource of the two nodes,
// normalized to the overall cluster resource
final float availableRatio1 =
resourceCalculator.minRatio(availableResource1, clusterResource);
final float availableRatio2 =
resourceCalculator.minRatio(availableResource2, clusterResource);
// The one with more available resources should be placed first
diff = Precision.compareTo(
availableRatio2, availableRatio1, Precision.EPSILON);
}
if (diff == 0) {
// Compare absolute value if ratios are the same
diff = availableResource2.getVirtualCores() - availableResource1.getVirtualCores();
}
if (diff == 0) {
diff = Long.compare(availableResource2.getMemorySize(),
availableResource1.getMemorySize());
}
return diff;
}
@Override @Override
public int compare(ClusterNode o1, ClusterNode o2) { public int compare(ClusterNode o1, ClusterNode o2) {
if (getMetric(o1) == getMetric(o2)) { int diff;
switch (this) {
case QUEUE_LENGTH_THEN_RESOURCES:
diff = compareQueueLengthThenResources(o1, o2);
break;
case QUEUE_WAIT_TIME:
case QUEUE_LENGTH:
default:
diff = getMetric(o1) - getMetric(o2);
break;
}
if (diff == 0) {
return (int) (o2.getTimestamp() - o1.getTimestamp()); return (int) (o2.getTimestamp() - o1.getTimestamp());
} }
return getMetric(o1) - getMetric(o2); return diff;
}
@VisibleForTesting
void setClusterResource(Resource clusterResource) {
this.clusterResource = clusterResource;
}
public ResourceCalculator getResourceCalculator() {
return resourceCalculator;
} }
public int getMetric(ClusterNode c) { public int getMetric(ClusterNode c) {
return (this == QUEUE_LENGTH) ? switch (this) {
c.getQueueLength().get() : c.getQueueWaitTime().get(); case QUEUE_WAIT_TIME:
return c.getQueueWaitTime();
case QUEUE_LENGTH:
case QUEUE_LENGTH_THEN_RESOURCES:
default:
return c.getQueueLength();
}
} }
/** /**
* Increment the metric by a delta if it is below the threshold. * Increment the metric by a delta if it is below the threshold.
* @param c ClusterNode * @param c ClusterNode
* @param incrementSize increment size * @param incrementSize increment size
* @param requested the requested resource
* @return true if the metric was below threshold and was incremented. * @return true if the metric was below threshold and was incremented.
*/ */
public boolean compareAndIncrement(ClusterNode c, int incrementSize) { public boolean compareAndIncrement(
if(this == QUEUE_LENGTH) { ClusterNode c, int incrementSize, Resource requested) {
int ret = c.getQueueLength().addAndGet(incrementSize); switch (this) {
if (ret <= c.getQueueCapacity()) { case QUEUE_LENGTH_THEN_RESOURCES:
return true; return c.compareAndIncrementAllocation(
} incrementSize, resourceCalculator, requested);
c.getQueueLength().addAndGet(-incrementSize); case QUEUE_WAIT_TIME:
return false;
}
// for queue wait time, we don't have any threshold. // for queue wait time, we don't have any threshold.
return true; return true;
case QUEUE_LENGTH:
default:
return c.compareAndIncrementAllocation(incrementSize);
}
}
/**
* Whether we should be placing OContainers on a node.
* @param cn the clusterNode
* @return whether we should be placing OContainers on a node.
*/
public boolean isNodeAvailable(final ClusterNode cn) {
int queueCapacity = cn.getQueueCapacity();
int queueLength = cn.getQueueLength();
if (this == LoadComparator.QUEUE_LENGTH_THEN_RESOURCES) {
if (queueCapacity <= 0) {
return queueLength <= 0;
} else {
return queueLength < queueCapacity;
}
}
// In the special case where queueCapacity is 0 for the node,
// the container can be allocated on the node but will be rejected there
return queueCapacity <= 0 || queueLength < queueCapacity;
} }
} }
@ -261,13 +392,21 @@ protected void onNewNodeAdded(
if (rmNode.getState() != NodeState.DECOMMISSIONING && if (rmNode.getState() != NodeState.DECOMMISSIONING &&
(estimatedQueueWaitTime != -1 || (estimatedQueueWaitTime != -1 ||
comparator == LoadComparator.QUEUE_LENGTH)) { comparator == LoadComparator.QUEUE_LENGTH ||
this.clusterNodes.put(rmNode.getNodeID(), comparator == LoadComparator.QUEUE_LENGTH_THEN_RESOURCES)) {
new ClusterNode(rmNode.getNodeID()) final ClusterNode.Properties properties =
ClusterNode.Properties.newInstance()
.setQueueWaitTime(estimatedQueueWaitTime) .setQueueWaitTime(estimatedQueueWaitTime)
.setQueueLength(waitQueueLength) .setQueueLength(waitQueueLength)
.setNodeLabels(rmNode.getNodeLabels()) .setNodeLabels(rmNode.getNodeLabels())
.setQueueCapacity(opportQueueCapacity)); .setCapability(rmNode.getTotalCapability())
.setAllocatedResource(rmNode.getAllocatedContainerResource())
.setQueueCapacity(opportQueueCapacity)
.updateTimestamp();
this.clusterNodes.put(rmNode.getNodeID(),
new ClusterNode(rmNode.getNodeID()).setProperties(properties));
LOG.info( LOG.info(
"Inserting ClusterNode [{}] with queue wait time [{}] and " "Inserting ClusterNode [{}] with queue wait time [{}] and "
+ "wait queue length [{}]", + "wait queue length [{}]",
@ -295,12 +434,19 @@ protected void onExistingNodeUpdated(
if (rmNode.getState() != NodeState.DECOMMISSIONING && if (rmNode.getState() != NodeState.DECOMMISSIONING &&
(estimatedQueueWaitTime != -1 || (estimatedQueueWaitTime != -1 ||
comparator == LoadComparator.QUEUE_LENGTH)) { comparator == LoadComparator.QUEUE_LENGTH ||
clusterNode comparator == LoadComparator.QUEUE_LENGTH_THEN_RESOURCES)) {
final ClusterNode.Properties properties =
ClusterNode.Properties.newInstance()
.setQueueWaitTime(estimatedQueueWaitTime) .setQueueWaitTime(estimatedQueueWaitTime)
.setQueueLength(waitQueueLength) .setQueueLength(waitQueueLength)
.setNodeLabels(rmNode.getNodeLabels()) .setNodeLabels(rmNode.getNodeLabels())
.setCapability(rmNode.getTotalCapability())
.setAllocatedResource(rmNode.getAllocatedContainerResource())
.updateTimestamp(); .updateTimestamp();
clusterNode.setProperties(properties);
LOG.debug("Updating ClusterNode [{}] with queue wait time [{}] and" LOG.debug("Updating ClusterNode [{}] with queue wait time [{}] and"
+ " wait queue length [{}]", rmNode.getNodeID(), + " wait queue length [{}]", rmNode.getNodeID(),
estimatedQueueWaitTime, waitQueueLength); estimatedQueueWaitTime, waitQueueLength);
@ -345,27 +491,47 @@ public List<NodeId> selectLeastLoadedNodes(int k) {
} }
} }
public RMNode selectLocalNode(String hostName, Set<String> blacklist) { /**
* Selects the node as specified by hostName for resource allocation,
* unless the node has been blacklisted.
* @param hostName the hostname of the node for local resource allocation
* @param blacklist the blacklisted nodes
* @param request the requested resource
* @return the selected node, null if the node is full or is blacklisted
*/
public RMNode selectLocalNode(
String hostName, Set<String> blacklist, Resource request) {
if (blacklist.contains(hostName)) { if (blacklist.contains(hostName)) {
return null; return null;
} }
RMNode node = nodeByHostName.get(hostName); RMNode node = nodeByHostName.get(hostName);
if (node != null) { if (node != null) {
ClusterNode clusterNode = clusterNodes.get(node.getNodeID()); ClusterNode clusterNode = clusterNodes.get(node.getNodeID());
if (comparator.compareAndIncrement(clusterNode, 1)) { if (clusterNode != null && comparator
.compareAndIncrement(clusterNode, 1, request)) {
return node; return node;
} }
} }
return null; return null;
} }
public RMNode selectRackLocalNode(String rackName, Set<String> blacklist) { /**
* Selects a node from the rack as specified by rackName
* for resource allocation, excluding blacklisted nodes
* @param rackName the rack name for rack-local resource allocation
* @param blacklist the blacklisted nodes
* @param request the requested resource
* @return the selected node, null if no suitable nodes
*/
public RMNode selectRackLocalNode(
String rackName, Set<String> blacklist, Resource request) {
Set<NodeId> nodesOnRack = nodeIdsByRack.get(rackName); Set<NodeId> nodesOnRack = nodeIdsByRack.get(rackName);
if (nodesOnRack != null) { if (nodesOnRack != null) {
for (NodeId nodeId : nodesOnRack) { for (NodeId nodeId : nodesOnRack) {
if (!blacklist.contains(nodeId.getHost())) { if (!blacklist.contains(nodeId.getHost())) {
ClusterNode node = clusterNodes.get(nodeId); ClusterNode node = clusterNodes.get(nodeId);
if (node != null && comparator.compareAndIncrement(node, 1)) { if (node != null &&
comparator.compareAndIncrement(node, 1, request)) {
return nodeByHostName.get(nodeId.getHost()); return nodeByHostName.get(nodeId.getHost());
} }
} }
@ -374,7 +540,14 @@ public RMNode selectRackLocalNode(String rackName, Set<String> blacklist) {
return null; return null;
} }
public RMNode selectAnyNode(Set<String> blacklist) { /**
* Selects a node from all ClusterNodes for resource allocation,
* excluding blacklisted nodes.
* @param blacklist the blacklisted nodes
* @param request the requested resource
* @return the selected node, null if no suitable nodes
*/
public RMNode selectAnyNode(Set<String> blacklist, Resource request) {
List<NodeId> nodeIds = getCandidatesForSelectAnyNode(); List<NodeId> nodeIds = getCandidatesForSelectAnyNode();
int size = nodeIds.size(); int size = nodeIds.size();
if (size <= 0) { if (size <= 0) {
@ -388,7 +561,8 @@ public RMNode selectAnyNode(Set<String> blacklist) {
NodeId nodeId = nodeIds.get(index); NodeId nodeId = nodeIds.get(index);
if (nodeId != null && !blacklist.contains(nodeId.getHost())) { if (nodeId != null && !blacklist.contains(nodeId.getHost())) {
ClusterNode node = clusterNodes.get(nodeId); ClusterNode node = clusterNodes.get(nodeId);
if (node != null && comparator.compareAndIncrement(node, 1)) { if (node != null && comparator.compareAndIncrement(
node, 1, request)) {
return nodeByHostName.get(nodeId.getHost()); return nodeByHostName.get(nodeId.getHost());
} }
} }
@ -402,7 +576,10 @@ protected List<NodeId> getCandidatesForSelectAnyNode() {
protected void removeFromNodeIdsByRack(RMNode removedNode) { protected void removeFromNodeIdsByRack(RMNode removedNode) {
nodeIdsByRack.computeIfPresent(removedNode.getRackName(), nodeIdsByRack.computeIfPresent(removedNode.getRackName(),
(k, v) -> v).remove(removedNode.getNodeID()); (k, v) -> {
v.remove(removedNode.getNodeID());
return v;
});
} }
protected void addIntoNodeIdsByRack(RMNode addedNode) { protected void addIntoNodeIdsByRack(RMNode addedNode) {
@ -414,21 +591,21 @@ protected List<ClusterNode> sortNodes(boolean excludeFullNodes) {
ReentrantReadWriteLock.ReadLock readLock = clusterNodesLock.readLock(); ReentrantReadWriteLock.ReadLock readLock = clusterNodesLock.readLock();
readLock.lock(); readLock.lock();
try { try {
ArrayList<ClusterNode> aList = new ArrayList<>(this.clusterNodes.values()); final ClusterNode[] nodes = new ClusterNode[clusterNodes.size()];
List<ClusterNode> retList = new ArrayList<>(); int nodesIdx = 0;
Object[] nodes = aList.toArray(); final Resource clusterResource = Resource.newInstance(Resources.none());
// Collections.sort would do something similar by calling Arrays.sort for (final ClusterNode node : this.clusterNodes.values()) {
// internally but would finally iterate through the input list (aList) Resources.addTo(clusterResource, node.getCapability());
// to reset the value of each element. Since we don't really care about nodes[nodesIdx] = node;
// 'aList', we can use the iteration to create the list of nodeIds which nodesIdx++;
// is what we ultimately care about. }
Arrays.sort(nodes, (Comparator)comparator);
for (int j=0; j < nodes.length; j++) { comparator.setClusterResource(clusterResource);
ClusterNode cNode = (ClusterNode)nodes[j];
// Only add node to the result list when either condition is met: final List<ClusterNode> retList = new ArrayList<>();
// 1. we don't exclude full nodes Arrays.sort(nodes, comparator);
// 2. we do exclude full nodes, but the current node is not full for (final ClusterNode cNode : nodes) {
if (!excludeFullNodes || !cNode.isQueueFull()) { if (!excludeFullNodes || comparator.isNodeAvailable(cNode)) {
retList.add(cNode); retList.add(cNode);
} }
} }

View File

@ -18,17 +18,29 @@
package org.apache.hadoop.yarn.server.resourcemanager.scheduler.distributed; package org.apache.hadoop.yarn.server.resourcemanager.scheduler.distributed;
import org.apache.hadoop.net.NodeBase;
import org.apache.hadoop.thirdparty.com.google.common.collect.ImmutableMap;
import org.apache.hadoop.yarn.api.protocolrecords.ResourceTypes;
import org.apache.hadoop.yarn.api.records.NodeId; import org.apache.hadoop.yarn.api.records.NodeId;
import org.apache.hadoop.yarn.api.records.NodeState; import org.apache.hadoop.yarn.api.records.NodeState;
import org.apache.hadoop.yarn.api.records.Resource;
import org.apache.hadoop.yarn.api.records.ResourceInformation;
import org.apache.hadoop.yarn.api.records.ResourceUtilization;
import org.apache.hadoop.yarn.server.api.records.ContainerQueuingLimit; import org.apache.hadoop.yarn.server.api.records.ContainerQueuingLimit;
import org.apache.hadoop.yarn.server.api.records.OpportunisticContainersStatus; import org.apache.hadoop.yarn.server.api.records.OpportunisticContainersStatus;
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode; import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode;
import org.apache.hadoop.yarn.util.resource.ResourceUtils;
import org.apache.hadoop.yarn.util.resource.Resources;
import org.junit.Assert; import org.junit.Assert;
import org.junit.BeforeClass;
import org.junit.Test; import org.junit.Test;
import org.mockito.Mockito; import org.mockito.Mockito;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet; import java.util.HashSet;
import java.util.List; import java.util.List;
import java.util.Map;
import java.util.Set; import java.util.Set;
/** /**
@ -36,8 +48,18 @@
*/ */
public class TestNodeQueueLoadMonitor { public class TestNodeQueueLoadMonitor {
// Extra resource type to test that all resource dimensions are considered
private static final String NETWORK_RESOURCE = "network";
private final static int DEFAULT_MAX_QUEUE_LENGTH = 200; private final static int DEFAULT_MAX_QUEUE_LENGTH = 200;
// Note: The following variables are private static resources
// re-initialized on each test because resource dimensions considered
// are initialized in a static method.
// Declaring them as static final will "lock-in" resource dimensions and
// disallow specification of a new resource dimension ("network") in tests.
private static Resource defaultResourceRequested;
private static Resource defaultCapacity;
static class FakeNodeId extends NodeId { static class FakeNodeId extends NodeId {
final String host; final String host;
final int port; final int port;
@ -70,6 +92,44 @@ public String toString() {
} }
} }
private static Resource newResourceInstance(long memory, int vCores) {
return newResourceInstance(memory, vCores, 0L);
}
private static Resource newResourceInstance(
final long memory, final int vCores, final long network) {
return Resource.newInstance(memory, vCores,
ImmutableMap.of(NETWORK_RESOURCE, network));
}
private static long getNetworkResourceValue(final Resource resource) {
return resource.getResourceValue(NETWORK_RESOURCE);
}
public static void addNewTypesToResources(String... resourceTypes) {
// Initialize resource map
Map<String, ResourceInformation> riMap = new HashMap<>();
// Initialize mandatory resources
riMap.put(ResourceInformation.MEMORY_URI, ResourceInformation.MEMORY_MB);
riMap.put(ResourceInformation.VCORES_URI, ResourceInformation.VCORES);
for (String newResource : resourceTypes) {
riMap.put(newResource, ResourceInformation
.newInstance(newResource, "", 0, ResourceTypes.COUNTABLE, 0,
Integer.MAX_VALUE));
}
ResourceUtils.initializeResourcesFromResourceInformationMap(riMap);
}
@BeforeClass
public static void classSetUp() {
addNewTypesToResources(NETWORK_RESOURCE);
defaultResourceRequested = newResourceInstance(128, 1, 1);
defaultCapacity = newResourceInstance(1024, 8, 1000);
}
@Test @Test
public void testWaitTimeSort() { public void testWaitTimeSort() {
NodeQueueLoadMonitor selector = new NodeQueueLoadMonitor( NodeQueueLoadMonitor selector = new NodeQueueLoadMonitor(
@ -79,7 +139,6 @@ public void testWaitTimeSort() {
selector.updateNode(createRMNode("h3", 3, 10, 10)); selector.updateNode(createRMNode("h3", 3, 10, 10));
selector.computeTask.run(); selector.computeTask.run();
List<NodeId> nodeIds = selector.selectNodes(); List<NodeId> nodeIds = selector.selectNodes();
System.out.println("1-> " + nodeIds);
Assert.assertEquals("h2:2", nodeIds.get(0).toString()); Assert.assertEquals("h2:2", nodeIds.get(0).toString());
Assert.assertEquals("h3:3", nodeIds.get(1).toString()); Assert.assertEquals("h3:3", nodeIds.get(1).toString());
Assert.assertEquals("h1:1", nodeIds.get(2).toString()); Assert.assertEquals("h1:1", nodeIds.get(2).toString());
@ -88,7 +147,6 @@ public void testWaitTimeSort() {
selector.updateNode(createRMNode("h3", 3, 2, 10)); selector.updateNode(createRMNode("h3", 3, 2, 10));
selector.computeTask.run(); selector.computeTask.run();
nodeIds = selector.selectNodes(); nodeIds = selector.selectNodes();
System.out.println("2-> "+ nodeIds);
Assert.assertEquals("h3:3", nodeIds.get(0).toString()); Assert.assertEquals("h3:3", nodeIds.get(0).toString());
Assert.assertEquals("h2:2", nodeIds.get(1).toString()); Assert.assertEquals("h2:2", nodeIds.get(1).toString());
Assert.assertEquals("h1:1", nodeIds.get(2).toString()); Assert.assertEquals("h1:1", nodeIds.get(2).toString());
@ -97,7 +155,6 @@ public void testWaitTimeSort() {
selector.updateNode(createRMNode("h4", 4, -1, 10)); selector.updateNode(createRMNode("h4", 4, -1, 10));
selector.computeTask.run(); selector.computeTask.run();
nodeIds = selector.selectNodes(); nodeIds = selector.selectNodes();
System.out.println("3-> "+ nodeIds);
// No change // No change
Assert.assertEquals("h3:3", nodeIds.get(0).toString()); Assert.assertEquals("h3:3", nodeIds.get(0).toString());
Assert.assertEquals("h2:2", nodeIds.get(1).toString()); Assert.assertEquals("h2:2", nodeIds.get(1).toString());
@ -186,6 +243,208 @@ public void testQueueLengthSort() {
Assert.assertEquals("h4:4", nodeIds.get(2).toString()); Assert.assertEquals("h4:4", nodeIds.get(2).toString());
} }
@Test
public void testQueueLengthThenResourcesSort() {
NodeQueueLoadMonitor selector = new NodeQueueLoadMonitor(
NodeQueueLoadMonitor.LoadComparator.QUEUE_LENGTH_THEN_RESOURCES);
// Node and queue sizes were selected such that we can determine the
// order of these nodes in the selectNodes call deterministically
// h2 -> h1 -> h3 -> h4
selector.updateNode(createRMNode(
"h1", 1, -1, 0,
Resources.multiply(defaultResourceRequested, 3), defaultCapacity));
selector.updateNode(createRMNode(
"h2", 2, -1, 0,
Resources.multiply(defaultResourceRequested, 2), defaultCapacity));
selector.updateNode(createRMNode(
"h3", 3, -1, 5,
Resources.multiply(defaultResourceRequested, 3), defaultCapacity));
selector.updateNode(createRMNode(
"h4", 4, -1, 10,
Resources.multiply(defaultResourceRequested, 2), defaultCapacity));
selector.computeTask.run();
List<NodeId> nodeIds = selector.selectNodes();
Assert.assertEquals("h2:2", nodeIds.get(0).toString());
Assert.assertEquals("h1:1", nodeIds.get(1).toString());
Assert.assertEquals("h3:3", nodeIds.get(2).toString());
Assert.assertEquals("h4:4", nodeIds.get(3).toString());
// Now update node3
// node3 should now rank after node4 since it has the same queue length
// but less resources available
selector.updateNode(createRMNode(
"h3", 3, -1, 10,
Resources.multiply(defaultResourceRequested, 3), defaultCapacity));
selector.computeTask.run();
nodeIds = selector.selectNodes();
Assert.assertEquals("h2:2", nodeIds.get(0).toString());
Assert.assertEquals("h1:1", nodeIds.get(1).toString());
Assert.assertEquals("h4:4", nodeIds.get(2).toString());
Assert.assertEquals("h3:3", nodeIds.get(3).toString());
// Now update h3 and fill its queue -- it should no longer be available
selector.updateNode(createRMNode("h3", 3, -1,
DEFAULT_MAX_QUEUE_LENGTH));
selector.computeTask.run();
nodeIds = selector.selectNodes();
// h3 is queued up, so we should only have 3 nodes left
Assert.assertEquals(3, nodeIds.size());
Assert.assertEquals("h2:2", nodeIds.get(0).toString());
Assert.assertEquals("h1:1", nodeIds.get(1).toString());
Assert.assertEquals("h4:4", nodeIds.get(2).toString());
// Now update h2 to Decommissioning state
selector.updateNode(createRMNode("h2", 2, -1,
5, NodeState.DECOMMISSIONING));
selector.computeTask.run();
nodeIds = selector.selectNodes();
// h2 is decommissioned, and h3 is full, so we should only have 2 nodes
Assert.assertEquals(2, nodeIds.size());
Assert.assertEquals("h1:1", nodeIds.get(0).toString());
Assert.assertEquals("h4:4", nodeIds.get(1).toString());
// Now update h2 back to Running state
selector.updateNode(createRMNode(
"h2", 2, -1, 0,
Resources.multiply(defaultResourceRequested, 2), defaultCapacity));
selector.computeTask.run();
nodeIds = selector.selectNodes();
Assert.assertEquals(3, nodeIds.size());
Assert.assertEquals("h2:2", nodeIds.get(0).toString());
Assert.assertEquals("h1:1", nodeIds.get(1).toString());
Assert.assertEquals("h4:4", nodeIds.get(2).toString());
// Now update h2 to have a zero queue capacity.
// Make sure that here it is still in the pool.
selector.updateNode(createRMNode(
"h2", 2, -1, 0, 0,
Resources.multiply(defaultResourceRequested, 2),
defaultCapacity));
selector.computeTask.run();
nodeIds = selector.selectNodes();
Assert.assertEquals(3, nodeIds.size());
Assert.assertEquals("h2:2", nodeIds.get(0).toString());
Assert.assertEquals("h1:1", nodeIds.get(1).toString());
Assert.assertEquals("h4:4", nodeIds.get(2).toString());
// Now update h2 to have a positive queue length but a zero queue capacity.
// Make sure that here it is no longer in the pool.
// Need to first remove the node, because node capacity is not updated.
selector.removeNode(createRMNode(
"h2", 2, -1, 0, 0,
Resources.multiply(defaultResourceRequested, 2),
defaultCapacity));
selector.updateNode(createRMNode(
"h2", 2, -1, 1, 0,
Resources.multiply(defaultResourceRequested, 2),
defaultCapacity));
selector.computeTask.run();
nodeIds = selector.selectNodes();
Assert.assertEquals(2, nodeIds.size());
Assert.assertEquals("h1:1", nodeIds.get(0).toString());
Assert.assertEquals("h4:4", nodeIds.get(1).toString());
}
/**
* Tests that when using QUEUE_LENGTH_THEN_RESOURCES decrements the amount
* of resources on the internal {@link ClusterNode} representation.
*/
@Test
public void testQueueLengthThenResourcesDecrementsAvailable() {
NodeQueueLoadMonitor selector = new NodeQueueLoadMonitor(
NodeQueueLoadMonitor.LoadComparator.QUEUE_LENGTH_THEN_RESOURCES);
RMNode node = createRMNode("h1", 1, -1, 0);
selector.addNode(null, node);
selector.updateNode(node);
selector.updateSortedNodes();
ClusterNode clusterNode = selector.getClusterNodes().get(node.getNodeID());
Assert.assertEquals(Resources.none(),
clusterNode.getAllocatedResource());
// Has enough resources
RMNode selectedNode = selector.selectAnyNode(
Collections.emptySet(), defaultResourceRequested);
Assert.assertNotNull(selectedNode);
Assert.assertEquals(node.getNodeID(), selectedNode.getNodeID());
clusterNode = selector.getClusterNodes().get(node.getNodeID());
Assert.assertEquals(defaultResourceRequested,
clusterNode.getAllocatedResource());
// Does not have enough resources, but can queue
selectedNode = selector.selectAnyNode(
Collections.emptySet(), defaultCapacity);
Assert.assertNotNull(selectedNode);
Assert.assertEquals(node.getNodeID(), selectedNode.getNodeID());
clusterNode = selector.getClusterNodes().get(node.getNodeID());
Assert.assertEquals(1, clusterNode.getQueueLength());
// Does not have enough resources and cannot queue
selectedNode = selector.selectAnyNode(
Collections.emptySet(),
Resources.add(defaultResourceRequested, defaultCapacity));
Assert.assertNull(selectedNode);
}
@Test
public void testQueueLengthThenResourcesCapabilityChange() {
NodeQueueLoadMonitor selector = new NodeQueueLoadMonitor(
NodeQueueLoadMonitor.LoadComparator.QUEUE_LENGTH_THEN_RESOURCES);
// Node sizes were selected such that we can determine the
// order of these nodes in the selectNodes call deterministically
// h1 -> h2 -> h3 -> h4
selector.updateNode(createRMNode(
"h1", 1, -1, 0,
Resources.multiply(defaultResourceRequested, 1), defaultCapacity));
selector.updateNode(createRMNode(
"h2", 2, -1, 0,
Resources.multiply(defaultResourceRequested, 2), defaultCapacity));
selector.updateNode(createRMNode(
"h3", 3, -1, 0,
Resources.multiply(defaultResourceRequested, 3), defaultCapacity));
selector.updateNode(createRMNode(
"h4", 4, -1, 0,
Resources.multiply(defaultResourceRequested, 4), defaultCapacity));
selector.computeTask.run();
List<NodeId> nodeIds = selector.selectNodes();
Assert.assertEquals("h1:1", nodeIds.get(0).toString());
Assert.assertEquals("h2:2", nodeIds.get(1).toString());
Assert.assertEquals("h3:3", nodeIds.get(2).toString());
Assert.assertEquals("h4:4", nodeIds.get(3).toString());
// Now update node1 to have only defaultResourceRequested available
// by changing its capability to 2x defaultResourceReqeusted
// node1 should now rank last
selector.updateNode(createRMNode(
"h1", 1, -1, 0,
Resources.multiply(defaultResourceRequested, 1),
Resources.multiply(defaultResourceRequested, 2)));
selector.computeTask.run();
nodeIds = selector.selectNodes();
Assert.assertEquals("h2:2", nodeIds.get(0).toString());
Assert.assertEquals("h3:3", nodeIds.get(1).toString());
Assert.assertEquals("h4:4", nodeIds.get(2).toString());
Assert.assertEquals("h1:1", nodeIds.get(3).toString());
// Now update node2 to have no resources available
// by changing its capability to 1x defaultResourceReqeusted
// node2 should now rank last
selector.updateNode(createRMNode(
"h2", 2, -1, 0,
Resources.multiply(defaultResourceRequested, 1),
Resources.multiply(defaultResourceRequested, 1)));
selector.computeTask.run();
nodeIds = selector.selectNodes();
Assert.assertEquals("h3:3", nodeIds.get(0).toString());
Assert.assertEquals("h4:4", nodeIds.get(1).toString());
Assert.assertEquals("h1:1", nodeIds.get(2).toString());
Assert.assertEquals("h2:2", nodeIds.get(3).toString());
}
@Test @Test
public void testContainerQueuingLimit() { public void testContainerQueuingLimit() {
NodeQueueLoadMonitor selector = new NodeQueueLoadMonitor( NodeQueueLoadMonitor selector = new NodeQueueLoadMonitor(
@ -254,18 +513,22 @@ public void testSelectLocalNode() {
// basic test for selecting node which has queue length less // basic test for selecting node which has queue length less
// than queue capacity. // than queue capacity.
Set<String> blacklist = new HashSet<>(); Set<String> blacklist = new HashSet<>();
RMNode node = selector.selectLocalNode("h1", blacklist); RMNode node = selector.selectLocalNode(
"h1", blacklist, defaultResourceRequested);
Assert.assertEquals("h1", node.getHostName()); Assert.assertEquals("h1", node.getHostName());
// if node has been added to blacklist // if node has been added to blacklist
blacklist.add("h1"); blacklist.add("h1");
node = selector.selectLocalNode("h1", blacklist); node = selector.selectLocalNode(
"h1", blacklist, defaultResourceRequested);
Assert.assertNull(node); Assert.assertNull(node);
node = selector.selectLocalNode("h2", blacklist); node = selector.selectLocalNode(
"h2", blacklist, defaultResourceRequested);
Assert.assertNull(node); Assert.assertNull(node);
node = selector.selectLocalNode("h3", blacklist); node = selector.selectLocalNode(
"h3", blacklist, defaultResourceRequested);
Assert.assertEquals("h3", node.getHostName()); Assert.assertEquals("h3", node.getHostName());
} }
@ -293,19 +556,23 @@ public void testSelectRackLocalNode() {
// basic test for selecting node which has queue length less // basic test for selecting node which has queue length less
// than queue capacity. // than queue capacity.
Set<String> blacklist = new HashSet<>(); Set<String> blacklist = new HashSet<>();
RMNode node = selector.selectRackLocalNode("rack1", blacklist); RMNode node = selector.selectRackLocalNode(
"rack1", blacklist, defaultResourceRequested);
Assert.assertEquals("h1", node.getHostName()); Assert.assertEquals("h1", node.getHostName());
// if node has been added to blacklist // if node has been added to blacklist
blacklist.add("h1"); blacklist.add("h1");
node = selector.selectRackLocalNode("rack1", blacklist); node = selector.selectRackLocalNode(
"rack1", blacklist, defaultResourceRequested);
Assert.assertNull(node); Assert.assertNull(node);
node = selector.selectRackLocalNode("rack2", blacklist); node = selector.selectRackLocalNode(
"rack2", blacklist, defaultResourceRequested);
Assert.assertEquals("h3", node.getHostName()); Assert.assertEquals("h3", node.getHostName());
blacklist.add("h3"); blacklist.add("h3");
node = selector.selectRackLocalNode("rack2", blacklist); node = selector.selectRackLocalNode(
"rack2", blacklist, defaultResourceRequested);
Assert.assertNull(node); Assert.assertNull(node);
} }
@ -337,20 +604,217 @@ public void testSelectAnyNode() {
// basic test for selecting node which has queue length // basic test for selecting node which has queue length
// less than queue capacity. // less than queue capacity.
Set<String> blacklist = new HashSet<>(); Set<String> blacklist = new HashSet<>();
RMNode node = selector.selectAnyNode(blacklist); RMNode node = selector.selectAnyNode(blacklist, defaultResourceRequested);
Assert.assertTrue(node.getHostName().equals("h1") || Assert.assertTrue(node.getHostName().equals("h1") ||
node.getHostName().equals("h3")); node.getHostName().equals("h3"));
// if node has been added to blacklist // if node has been added to blacklist
blacklist.add("h1"); blacklist.add("h1");
node = selector.selectAnyNode(blacklist); node = selector.selectAnyNode(blacklist, defaultResourceRequested);
Assert.assertEquals("h3", node.getHostName()); Assert.assertEquals("h3", node.getHostName());
blacklist.add("h3"); blacklist.add("h3");
node = selector.selectAnyNode(blacklist); node = selector.selectAnyNode(blacklist, defaultResourceRequested);
Assert.assertNull(node); Assert.assertNull(node);
} }
@Test
public void testQueueLengthThenResourcesComparator() {
NodeQueueLoadMonitor.LoadComparator comparator =
NodeQueueLoadMonitor.LoadComparator.QUEUE_LENGTH_THEN_RESOURCES;
NodeId n1 = new FakeNodeId("n1", 5000);
NodeId n2 = new FakeNodeId("n2", 5000);
// Case 1: larger available cores should be ranked first
{
ClusterNode.Properties cn1Props =
ClusterNode.Properties.newInstance()
.setAllocatedResource(newResourceInstance(5, 5))
.setCapability(newResourceInstance(10, 10, 1000))
.setQueueLength(10);
ClusterNode cn1 = new ClusterNode(n1);
cn1.setProperties(cn1Props);
ClusterNode.Properties cn2Props =
ClusterNode.Properties.newInstance()
.setAllocatedResource(newResourceInstance(6, 6))
.setCapability(newResourceInstance(10, 10, 1000))
.setQueueLength(10);
ClusterNode cn2 = new ClusterNode(n2);
cn2.setProperties(cn2Props);
comparator.setClusterResource(
Resources.add(cn1.getCapability(), cn2.getCapability()));
Assert.assertTrue(comparator.compare(cn1, cn2) < 0);
}
// Case 2: Shorter queue should be ranked first before comparing resources
{
ClusterNode.Properties cn1Props =
ClusterNode.Properties.newInstance()
.setAllocatedResource(newResourceInstance(5, 5))
.setCapability(newResourceInstance(10, 10, 1000))
.setQueueLength(5);
ClusterNode cn1 = new ClusterNode(n1);
cn1.setProperties(cn1Props);
ClusterNode.Properties cn2Props =
ClusterNode.Properties.newInstance()
.setAllocatedResource(newResourceInstance(3, 3))
.setCapability(newResourceInstance(10, 10, 1000))
.setQueueLength(10);
ClusterNode cn2 = new ClusterNode(n2);
cn2.setProperties(cn2Props);
comparator.setClusterResource(
Resources.add(cn1.getCapability(), cn2.getCapability()));
Assert.assertTrue(comparator.compare(cn1, cn2) < 0);
}
// Case 3: No capability vs with capability,
// with capability should come first
{
ClusterNode.Properties cn1Props =
ClusterNode.Properties.newInstance()
.setAllocatedResource(Resources.none())
.setCapability(newResourceInstance(1, 1, 1000))
.setQueueLength(5);
ClusterNode cn1 = new ClusterNode(n1);
cn1.setProperties(cn1Props);
ClusterNode.Properties cn2Props =
ClusterNode.Properties.newInstance()
.setAllocatedResource(Resources.none())
.setCapability(Resources.none())
.setQueueLength(5);
ClusterNode cn2 = new ClusterNode(n2);
cn2.setProperties(cn2Props);
comparator.setClusterResource(
Resources.add(cn1.getCapability(), cn2.getCapability()));
Assert.assertTrue(comparator.compare(cn1, cn2) < 0);
}
// Case 4: Compare same values
{
ClusterNode.Properties cn1Props =
ClusterNode.Properties.newInstance()
.setAllocatedResource(newResourceInstance(5, 5))
.setCapability(newResourceInstance(10, 10, 1000))
.setQueueLength(10);
ClusterNode cn1 = new ClusterNode(n1);
cn1.setProperties(cn1Props);
ClusterNode.Properties cn2Props =
ClusterNode.Properties.newInstance()
.setAllocatedResource(newResourceInstance(5, 5))
.setCapability(newResourceInstance(10, 10, 1000))
.setQueueLength(10);
ClusterNode cn2 = new ClusterNode(n2);
cn2.setProperties(cn2Props);
comparator.setClusterResource(
Resources.add(cn1.getCapability(), cn2.getCapability()));
Assert.assertEquals(0, comparator.compare(cn1, cn2));
}
// Case 5: If ratio is the same, compare raw values
// by VCores first, then memory
{
ClusterNode.Properties cn1Props =
ClusterNode.Properties.newInstance()
.setAllocatedResource(newResourceInstance(6, 5))
.setCapability(newResourceInstance(10, 10, 1000))
.setQueueLength(10);
ClusterNode cn1 = new ClusterNode(n1);
cn1.setProperties(cn1Props);
ClusterNode.Properties cn2Props =
ClusterNode.Properties.newInstance()
.setAllocatedResource(newResourceInstance(5, 6))
.setCapability(newResourceInstance(10, 10, 1000))
.setQueueLength(10);
ClusterNode cn2 = new ClusterNode(n2);
cn2.setProperties(cn2Props);
comparator.setClusterResource(
Resources.add(cn1.getCapability(), cn2.getCapability()));
// Both are 60% allocated, but CN1 has 5 avail VCores, CN2 only has 4
Assert.assertTrue(comparator.compare(cn1, cn2) < 0);
}
// Case 6: by VCores absolute value
{
ClusterNode.Properties cn1Props =
ClusterNode.Properties.newInstance()
.setAllocatedResource(newResourceInstance(5, 5))
.setCapability(newResourceInstance(10, 10, 1000))
.setQueueLength(10);
ClusterNode cn1 = new ClusterNode(n1);
cn1.setProperties(cn1Props);
ClusterNode.Properties cn2Props =
ClusterNode.Properties.newInstance()
.setAllocatedResource(newResourceInstance(5, 6))
.setCapability(newResourceInstance(10, 12, 1000))
.setQueueLength(10);
ClusterNode cn2 = new ClusterNode(n2);
cn2.setProperties(cn2Props);
comparator.setClusterResource(
Resources.add(cn1.getCapability(), cn2.getCapability()));
Assert.assertTrue(comparator.compare(cn2, cn1) < 0);
}
// Case 7: by memory absolute value
{
ClusterNode.Properties cn1Props =
ClusterNode.Properties.newInstance()
.setAllocatedResource(newResourceInstance(5, 5))
.setCapability(newResourceInstance(10, 10, 1000))
.setQueueLength(10);
ClusterNode cn1 = new ClusterNode(n1);
cn1.setProperties(cn1Props);
ClusterNode.Properties cn2Props =
ClusterNode.Properties.newInstance()
.setAllocatedResource(newResourceInstance(6, 5))
.setCapability(newResourceInstance(12, 10, 1000))
.setQueueLength(10);
ClusterNode cn2 = new ClusterNode(n2);
cn2.setProperties(cn2Props);
comparator.setClusterResource(
Resources.add(cn1.getCapability(), cn2.getCapability()));
Assert.assertTrue(comparator.compare(cn2, cn1) < 0);
}
// Case 8: Memory should be more constraining in the overall cluster,
// so rank the node with less allocated memory first
{
ClusterNode.Properties cn1Props =
ClusterNode.Properties.newInstance()
.setAllocatedResource(newResourceInstance(5, 11))
.setCapability(newResourceInstance(10, 100, 1000))
.setQueueLength(10);
ClusterNode cn1 = new ClusterNode(n1);
cn1.setProperties(cn1Props);
ClusterNode.Properties cn2Props =
ClusterNode.Properties.newInstance()
.setAllocatedResource(newResourceInstance(6, 10))
.setCapability(newResourceInstance(10, 100, 1000))
.setQueueLength(10);
ClusterNode cn2 = new ClusterNode(n2);
cn2.setProperties(cn2Props);
comparator.setClusterResource(
Resources.add(cn1.getCapability(), cn2.getCapability()));
Assert.assertTrue(comparator.compare(cn1, cn2) < 0);
}
}
private RMNode createRMNode(String host, int port, private RMNode createRMNode(String host, int port,
int waitTime, int queueLength) { int waitTime, int queueLength) {
return createRMNode(host, port, waitTime, queueLength, return createRMNode(host, port, waitTime, queueLength,
@ -377,12 +841,40 @@ private RMNode createRMNode(String host, int port, String rack,
private RMNode createRMNode(String host, int port, String rack, private RMNode createRMNode(String host, int port, String rack,
int waitTime, int queueLength, int queueCapacity, NodeState state) { int waitTime, int queueLength, int queueCapacity, NodeState state) {
return createRMNode(host, port, rack, waitTime, queueLength, queueCapacity,
state, Resources.none(), defaultCapacity);
}
private RMNode createRMNode(
String host, int port, int waitTime, int queueLength,
Resource allocatedResource, Resource nodeResource) {
return createRMNode(host, port, waitTime, queueLength,
DEFAULT_MAX_QUEUE_LENGTH, allocatedResource, nodeResource);
}
private RMNode createRMNode(
String host, int port, int waitTime, int queueLength, int queueCapacity,
Resource allocatedResource, Resource nodeResource) {
return createRMNode(host, port, "default", waitTime, queueLength,
queueCapacity, NodeState.RUNNING, allocatedResource, nodeResource);
}
@SuppressWarnings("parameternumber")
private RMNode createRMNode(String host, int port, String rack,
int waitTime, int queueLength, int queueCapacity, NodeState state,
Resource allocatedResource, Resource nodeResource) {
RMNode node1 = Mockito.mock(RMNode.class); RMNode node1 = Mockito.mock(RMNode.class);
NodeId nID1 = new FakeNodeId(host, port); NodeId nID1 = new FakeNodeId(host, port);
Mockito.when(node1.getHostName()).thenReturn(host); Mockito.when(node1.getHostName()).thenReturn(host);
Mockito.when(node1.getRackName()).thenReturn(rack); Mockito.when(node1.getRackName()).thenReturn(rack);
Mockito.when(node1.getNode()).thenReturn(new NodeBase("/" + host));
Mockito.when(node1.getNodeID()).thenReturn(nID1); Mockito.when(node1.getNodeID()).thenReturn(nID1);
Mockito.when(node1.getState()).thenReturn(state); Mockito.when(node1.getState()).thenReturn(state);
Mockito.when(node1.getTotalCapability()).thenReturn(nodeResource);
Mockito.when(node1.getNodeUtilization()).thenReturn(
ResourceUtilization.newInstance(0, 0, 0));
Mockito.when(node1.getAllocatedContainerResource()).thenReturn(
allocatedResource);
OpportunisticContainersStatus status1 = OpportunisticContainersStatus status1 =
Mockito.mock(OpportunisticContainersStatus.class); Mockito.mock(OpportunisticContainersStatus.class);
Mockito.when(status1.getEstimatedQueueWaitTime()) Mockito.when(status1.getEstimatedQueueWaitTime())