YARN-10590. Consider legacy auto queue creation absolute resource template to avoid rounding errors. Contributed by Andras Gyori
This commit is contained in:
parent
0463498adc
commit
365375412f
@ -19,6 +19,7 @@
|
||||
package org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity;
|
||||
|
||||
import org.apache.hadoop.yarn.api.records.Resource;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueResourceQuotas;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerDynamicEditException;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.QueueEntitlement;
|
||||
|
||||
@ -30,6 +31,8 @@ import java.io.IOException;
|
||||
import java.util.HashSet;
|
||||
import java.util.Set;
|
||||
|
||||
import static org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.AbstractCSQueue.CapacityConfigType.ABSOLUTE_RESOURCE;
|
||||
|
||||
/**
|
||||
* Leaf queues which are auto created by an underlying implementation of
|
||||
* AbstractManagedParentQueue. Eg: PlanQueue for reservations or
|
||||
@ -81,14 +84,14 @@ public class AutoCreatedLeafQueue extends AbstractAutoCreatedLeafQueue {
|
||||
QueueCapacities capacities = leafQueueTemplate.getQueueCapacities();
|
||||
|
||||
//reset capacities for the leaf queue
|
||||
mergeCapacities(capacities);
|
||||
mergeCapacities(capacities, leafQueueTemplate.getResourceQuotas());
|
||||
|
||||
} finally {
|
||||
writeLock.unlock();
|
||||
}
|
||||
}
|
||||
|
||||
public void mergeCapacities(QueueCapacities capacities) {
|
||||
public void mergeCapacities(QueueCapacities capacities, QueueResourceQuotas resourceQuotas) {
|
||||
for ( String nodeLabel : capacities.getExistingNodeLabels()) {
|
||||
queueCapacities.setCapacity(nodeLabel,
|
||||
capacities.getCapacity(nodeLabel));
|
||||
@ -101,9 +104,19 @@ public class AutoCreatedLeafQueue extends AbstractAutoCreatedLeafQueue {
|
||||
|
||||
Resource resourceByLabel = labelManager.getResourceByLabel(nodeLabel,
|
||||
queueContext.getClusterResource());
|
||||
getQueueResourceQuotas().setEffectiveMinResource(nodeLabel,
|
||||
Resources.multiply(resourceByLabel,
|
||||
queueCapacities.getAbsoluteCapacity(nodeLabel)));
|
||||
// Update effective resource from template due to rounding errors.
|
||||
// However, we need to consider deactivation as well, in which case we fall back to
|
||||
// Percentage calculation (as absolute capacity will be 0, resource will be zero as well).
|
||||
if (getCapacityConfigType().equals(ABSOLUTE_RESOURCE)
|
||||
&& queueCapacities.getAbsoluteCapacity(nodeLabel) > 0) {
|
||||
getQueueResourceQuotas().setEffectiveMinResource(nodeLabel,
|
||||
resourceQuotas.getConfiguredMinResource(nodeLabel));
|
||||
} else {
|
||||
getQueueResourceQuotas().setEffectiveMinResource(nodeLabel,
|
||||
Resources.multiply(resourceByLabel,
|
||||
queueCapacities.getAbsoluteCapacity(nodeLabel)));
|
||||
}
|
||||
|
||||
getQueueResourceQuotas().setEffectiveMaxResource(nodeLabel,
|
||||
Resources.multiply(resourceByLabel, queueCapacities
|
||||
.getAbsoluteMaximumCapacity(nodeLabel)));
|
||||
|
@ -17,6 +17,8 @@
|
||||
*/
|
||||
package org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity;
|
||||
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueResourceQuotas;
|
||||
|
||||
/**
|
||||
* Auto Created Leaf queue configurations, capacity
|
||||
*/
|
||||
@ -31,15 +33,19 @@ public class AutoCreatedLeafQueueConfig {
|
||||
|
||||
private CapacitySchedulerConfiguration leafQueueConfigs;
|
||||
|
||||
private final QueueResourceQuotas resourceQuotas;
|
||||
|
||||
public AutoCreatedLeafQueueConfig(Builder builder) {
|
||||
this.queueCapacities = builder.queueCapacities;
|
||||
this.leafQueueConfigs = builder.leafQueueConfigs;
|
||||
this.resourceQuotas = builder.queueResourceQuotas;
|
||||
}
|
||||
|
||||
public static class Builder {
|
||||
|
||||
private QueueCapacities queueCapacities;
|
||||
private CapacitySchedulerConfiguration leafQueueConfigs;
|
||||
private QueueResourceQuotas queueResourceQuotas;
|
||||
|
||||
public Builder capacities(QueueCapacities capacities) {
|
||||
this.queueCapacities = capacities;
|
||||
@ -54,6 +60,11 @@ public class AutoCreatedLeafQueueConfig {
|
||||
public AutoCreatedLeafQueueConfig build() {
|
||||
return new AutoCreatedLeafQueueConfig(this);
|
||||
}
|
||||
|
||||
public Builder resourceQuotas(QueueResourceQuotas queueResourceQuotas) {
|
||||
this.queueResourceQuotas = queueResourceQuotas;
|
||||
return this;
|
||||
}
|
||||
}
|
||||
|
||||
public QueueCapacities getQueueCapacities() {
|
||||
@ -64,6 +75,10 @@ public class AutoCreatedLeafQueueConfig {
|
||||
return leafQueueConfigs;
|
||||
}
|
||||
|
||||
public QueueResourceQuotas getResourceQuotas() {
|
||||
return resourceQuotas;
|
||||
}
|
||||
|
||||
@Override public String toString() {
|
||||
return "AutoCreatedLeafQueueConfig{" + "queueCapacities=" + queueCapacities
|
||||
+ ", leafQueueConfigs=" + leafQueueConfigs + '}';
|
||||
|
@ -19,6 +19,7 @@ package org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity;
|
||||
|
||||
import org.apache.hadoop.yarn.api.records.Resource;
|
||||
import org.apache.hadoop.yarn.exceptions.YarnException;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueResourceQuotas;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceLimits;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler
|
||||
.SchedulerDynamicEditException;
|
||||
@ -161,25 +162,14 @@ public class ManagedParentQueue extends AbstractManagedParentQueue {
|
||||
CapacitySchedulerConfiguration autoCreatedTemplateConfig =
|
||||
super.initializeLeafQueueConfigs(leafQueueTemplateConfPrefix);
|
||||
builder.configuration(autoCreatedTemplateConfig);
|
||||
QueueResourceQuotas queueResourceQuotas = new QueueResourceQuotas();
|
||||
setAbsoluteResourceTemplates(configuration, queueResourceQuotas);
|
||||
|
||||
QueuePath templateQueuePath = configuration
|
||||
.getAutoCreatedQueueObjectTemplateConfPrefix(getQueuePath());
|
||||
|
||||
Set<String> templateConfiguredNodeLabels = queueContext
|
||||
.getQueueManager().getConfiguredNodeLabelsForAllQueues()
|
||||
.getLabelsByQueue(templateQueuePath.getFullPath());
|
||||
for (String nodeLabel : templateConfiguredNodeLabels) {
|
||||
Resource templateMinResource = autoCreatedTemplateConfig.getMinimumResourceRequirement(
|
||||
nodeLabel, configuration
|
||||
.getAutoCreatedQueueTemplateConfPrefix(getQueuePath()),
|
||||
resourceTypes);
|
||||
|
||||
if (this.capacityConfigType.equals(CapacityConfigType.PERCENTAGE)
|
||||
&& !templateMinResource.equals(Resources.none())) {
|
||||
throw new IOException("Managed Parent Queue " + this.getQueuePath()
|
||||
+ " config type is different from leaf queue template config type");
|
||||
}
|
||||
}
|
||||
|
||||
//Load template capacities
|
||||
QueueCapacities queueCapacities = new QueueCapacities(false);
|
||||
CSQueueUtils.loadCapacitiesByLabelsFromConf(templateQueuePath,
|
||||
@ -187,7 +177,6 @@ public class ManagedParentQueue extends AbstractManagedParentQueue {
|
||||
configuration,
|
||||
templateConfiguredNodeLabels);
|
||||
|
||||
|
||||
/**
|
||||
* Populate leaf queue template (of Parent resources configured in
|
||||
* ABSOLUTE_RESOURCE) capacities with actual values for which configured has
|
||||
@ -198,9 +187,31 @@ public class ManagedParentQueue extends AbstractManagedParentQueue {
|
||||
updateQueueCapacities(queueCapacities);
|
||||
}
|
||||
builder.capacities(queueCapacities);
|
||||
builder.resourceQuotas(queueResourceQuotas);
|
||||
return builder;
|
||||
}
|
||||
|
||||
private void setAbsoluteResourceTemplates(CapacitySchedulerConfiguration configuration,
|
||||
QueueResourceQuotas queueResourceQuotas) throws IOException {
|
||||
QueuePath templateQueuePath = configuration
|
||||
.getAutoCreatedQueueObjectTemplateConfPrefix(getQueuePath());
|
||||
Set<String> templateConfiguredNodeLabels = queueContext
|
||||
.getQueueManager().getConfiguredNodeLabelsForAllQueues()
|
||||
.getLabelsByQueue(templateQueuePath.getFullPath());
|
||||
|
||||
for (String nodeLabel : templateConfiguredNodeLabels) {
|
||||
Resource templateMinResource = configuration.getMinimumResourceRequirement(
|
||||
nodeLabel, templateQueuePath.getFullPath(), resourceTypes);
|
||||
queueResourceQuotas.setConfiguredMinResource(nodeLabel, templateMinResource);
|
||||
|
||||
if (this.capacityConfigType.equals(CapacityConfigType.PERCENTAGE)
|
||||
&& !templateMinResource.equals(Resources.none())) {
|
||||
throw new IOException("Managed Parent Queue " + this.getQueuePath()
|
||||
+ " config type is different from leaf queue template config type");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void updateQueueCapacities(QueueCapacities queueCapacities) {
|
||||
CapacitySchedulerConfiguration configuration =
|
||||
queueContext.getConfiguration();
|
||||
|
@ -592,19 +592,18 @@ public class GuaranteedOrZeroCapacityOverTimePolicy
|
||||
|
||||
for (String nodeLabel : updatedQueueTemplate.getQueueCapacities()
|
||||
.getExistingNodeLabels()) {
|
||||
if (updatedQueueTemplate.getQueueCapacities().
|
||||
getCapacity(nodeLabel) > 0) {
|
||||
if (updatedQueueTemplate.getQueueCapacities().getCapacity(nodeLabel) > 0) {
|
||||
if (isActive(leafQueue, nodeLabel)) {
|
||||
LOG.debug("Queue is already active. Skipping activation : {}",
|
||||
leafQueue.getQueuePath());
|
||||
} else{
|
||||
activate(leafQueue, nodeLabel);
|
||||
}
|
||||
} else{
|
||||
} else {
|
||||
if (!isActive(leafQueue, nodeLabel)) {
|
||||
LOG.debug("Queue is already de-activated. Skipping "
|
||||
+ "de-activation : {}", leafQueue.getQueuePath());
|
||||
} else{
|
||||
} else {
|
||||
/**
|
||||
* While deactivating queues of type ABSOLUTE_RESOURCE, configured
|
||||
* min resource has to be set based on updated capacity (which is
|
||||
@ -613,7 +612,7 @@ public class GuaranteedOrZeroCapacityOverTimePolicy
|
||||
* leads to incorrect results.
|
||||
*/
|
||||
leafQueue
|
||||
.mergeCapacities(updatedQueueTemplate.getQueueCapacities());
|
||||
.mergeCapacities(updatedQueueTemplate.getQueueCapacities(), leafQueueTemplate.getResourceQuotas());
|
||||
leafQueue.getQueueResourceQuotas()
|
||||
.setConfiguredMinResource(Resources.multiply(
|
||||
managedParentQueue.getQueueContext().getClusterResource(),
|
||||
@ -787,6 +786,7 @@ public class GuaranteedOrZeroCapacityOverTimePolicy
|
||||
AutoCreatedLeafQueueConfig.Builder templateBuilder =
|
||||
new AutoCreatedLeafQueueConfig.Builder();
|
||||
templateBuilder.capacities(capacities);
|
||||
templateBuilder.resourceQuotas(managedParentQueue.getLeafQueueTemplate().getResourceQuotas());
|
||||
return new AutoCreatedLeafQueueConfig(templateBuilder);
|
||||
}
|
||||
}
|
||||
|
@ -148,8 +148,6 @@ public class TestAbsoluteResourceWithAutoQueue
|
||||
return csConf;
|
||||
}
|
||||
|
||||
// TODO: Wangda: I think this test case is not correct, Sunil could help look
|
||||
// into details.
|
||||
@Test(timeout = 20000)
|
||||
public void testAutoCreateLeafQueueCreation() throws Exception {
|
||||
|
||||
@ -182,10 +180,8 @@ public class TestAbsoluteResourceWithAutoQueue
|
||||
ManagedParentQueue parentQueue = (ManagedParentQueue) cs.getQueue(QUEUED);
|
||||
assertEquals(parentQueue, autoCreatedLeafQueue.getParent());
|
||||
|
||||
validateCapacities((AutoCreatedLeafQueue) autoCreatedLeafQueue, 0.4f,
|
||||
0.04f, 1f, 0.6f);
|
||||
validateCapacitiesByLabel((ManagedParentQueue) parentQueue,
|
||||
(AutoCreatedLeafQueue) autoCreatedLeafQueue, NO_LABEL);
|
||||
validateCapacities(autoCreatedLeafQueue, 0.4f, 0.04f, 1f, 0.6f);
|
||||
validateCapacitiesByLabel(parentQueue, autoCreatedLeafQueue, NO_LABEL);
|
||||
|
||||
Map<String, Float> expectedChildQueueAbsCapacity =
|
||||
new HashMap<String, Float>() {
|
||||
|
@ -89,6 +89,7 @@ import java.util.concurrent.TimeUnit;
|
||||
|
||||
import static org.apache.hadoop.yarn.nodelabels.CommonNodeLabelsManager
|
||||
.NO_LABEL;
|
||||
import static org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.AbstractCSQueue.CapacityConfigType.ABSOLUTE_RESOURCE;
|
||||
import static org.apache.hadoop.yarn.server.resourcemanager.scheduler
|
||||
.capacity.CSQueueUtils.EPSILON;
|
||||
import static org.apache.hadoop.yarn.server.resourcemanager.scheduler
|
||||
@ -166,6 +167,7 @@ public class TestCapacitySchedulerAutoCreatedQueueBase {
|
||||
|
||||
public static final float NODE_LABEL_GPU_TEMPLATE_CAPACITY = 30.0f;
|
||||
public static final float NODEL_LABEL_SSD_TEMPLATE_CAPACITY = 40.0f;
|
||||
public static final ImmutableSet<String> RESOURCE_TYPES = ImmutableSet.of("memory", "vcores");
|
||||
|
||||
protected MockRM mockRM = null;
|
||||
protected MockNM nm1 = null;
|
||||
@ -777,17 +779,21 @@ public class TestCapacitySchedulerAutoCreatedQueueBase {
|
||||
* parentQueue.getQueueCapacities().getAbsoluteCapacity(label));
|
||||
assertEquals(effMinCapacity, Resources.multiply(resourceByLabel,
|
||||
leafQueue.getQueueCapacities().getAbsoluteCapacity(label)));
|
||||
// TODO: Wangda, I think this is a wrong test, it doesn't consider rounding
|
||||
// loss of multiplication, the right value should be <10240, 2>, but the
|
||||
// test expects <10240, 1>
|
||||
// fixme, address this in the future patch (auto queue creation).
|
||||
// if (expectedQueueEntitlements.get(label).getCapacity() > EPSILON) {
|
||||
// assertEquals(Resource.newInstance(10 * GB, 2),
|
||||
// leafQueue.getEffectiveCapacity(label));
|
||||
// } else {
|
||||
// assertEquals(Resource.newInstance(0, 0),
|
||||
// leafQueue.getEffectiveCapacity(label));
|
||||
// }
|
||||
|
||||
if (expectedQueueEntitlements.get(label).getCapacity() > EPSILON) {
|
||||
if (leafQueue.getCapacityConfigType().equals(ABSOLUTE_RESOURCE)) {
|
||||
String templatePrefix = cs.getConfiguration().getAutoCreatedQueueTemplateConfPrefix(
|
||||
parentQueue.getQueuePath());
|
||||
Resource resourceTemplate = parentQueue.getLeafQueueTemplate().getLeafQueueConfigs()
|
||||
.getMinimumResourceRequirement(label, templatePrefix, RESOURCE_TYPES);
|
||||
assertEquals(resourceTemplate, leafQueue.getEffectiveCapacity(label));
|
||||
} else {
|
||||
assertEquals(effMinCapacity, leafQueue.getEffectiveCapacity(label));
|
||||
}
|
||||
} else {
|
||||
assertEquals(Resource.newInstance(0, 0),
|
||||
leafQueue.getEffectiveCapacity(label));
|
||||
}
|
||||
|
||||
if (leafQueue.getQueueCapacities().getAbsoluteCapacity(label) > 0) {
|
||||
assertTrue(Resources.greaterThan(cs.getResourceCalculator(),
|
||||
|
Loading…
x
Reference in New Issue
Block a user