YARN-7784. Fix Cluster metrics when placement processor is enabled. (asuresh)

This commit is contained in:
Arun Suresh 2018-01-25 19:09:21 -08:00
parent c23980c4f2
commit f8c5f5b237
3 changed files with 60 additions and 2 deletions

View File

@ -694,6 +694,12 @@ private void updateMetricsForAllocatedContainer(NodeType type,
metrics.runAppAttempt(applicationId, user);
}
updateMetrics(applicationId, type, node, containerAllocated, user, queue);
}
public static void updateMetrics(ApplicationId applicationId, NodeType type,
SchedulerNode node, Container containerAllocated, String user,
Queue queue) {
if (LOG.isDebugEnabled()) {
LOG.debug("allocate: applicationId=" + applicationId + " container="
+ containerAllocated.getId() + " host=" + containerAllocated
@ -702,10 +708,10 @@ private void updateMetricsForAllocatedContainer(NodeType type,
+ type);
}
if(node != null) {
metrics.allocateResources(node.getPartition(), user, 1,
queue.getMetrics().allocateResources(node.getPartition(), user, 1,
containerAllocated.getResource(), true);
}
metrics.incrNodeTypeAggregations(user, type);
queue.getMetrics().incrNodeTypeAggregations(user, type);
}
// Get AppPlacementAllocator by specified schedulerKey

View File

@ -56,6 +56,7 @@
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerState;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.AbstractUsersManager;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.Allocation;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.AppSchedulingInfo;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.Queue;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueResourceQuotas;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceLimits;
@ -548,6 +549,12 @@ public void apply(Resource cluster, ResourceCommitRequest<FiCaSchedulerApp,
((RMContainerImpl) rmContainer).setAllocationTags(
containerRequest.getSchedulingRequest().getAllocationTags());
}
} else {
AppSchedulingInfo.updateMetrics(getApplicationId(),
allocation.getAllocationLocalityType(),
schedulerContainer.getSchedulerNode(),
schedulerContainer.getRmContainer().getContainer(), getUser(),
getQueue());
}
attemptResourceUsage.incUsed(schedulerContainer.getNodePartition(),

View File

@ -39,6 +39,7 @@
import org.apache.hadoop.yarn.server.resourcemanager.MockNM;
import org.apache.hadoop.yarn.server.resourcemanager.MockRM;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacitySchedulerConfiguration;
@ -148,6 +149,10 @@ public void testAntiAffinityPlacement() throws Exception {
.collect(Collectors.toSet());
// Ensure unique nodes (antiaffinity)
Assert.assertEquals(4, nodeIds.size());
QueueMetrics metrics = rm.getResourceScheduler().getRootQueueMetrics();
// Verify Metrics
verifyMetrics(metrics, 11264, 11, 5120, 5, 5);
}
@Test(timeout = 300000)
@ -197,6 +202,10 @@ public void testMutualAntiAffinityPlacement() throws Exception {
.collect(Collectors.toSet());
// Ensure unique nodes (antiaffinity)
Assert.assertEquals(5, nodeIds.size());
QueueMetrics metrics = rm.getResourceScheduler().getRootQueueMetrics();
// Verify Metrics
verifyMetrics(metrics, 14336, 14, 6144, 6, 6);
}
@Test(timeout = 300000)
@ -245,6 +254,10 @@ public void testCardinalityPlacement() throws Exception {
for (NodeId n : nodeIdContainerIdMap.keySet()) {
Assert.assertTrue(nodeIdContainerIdMap.get(n) < 5);
}
QueueMetrics metrics = rm.getResourceScheduler().getRootQueueMetrics();
// Verify Metrics
verifyMetrics(metrics, 23552, 23, 9216, 9, 9);
}
@Test(timeout = 300000)
@ -288,6 +301,10 @@ public void testAffinityPlacement() throws Exception {
.collect(Collectors.toSet());
// Ensure all containers end up on the same node (affinity)
Assert.assertEquals(1, nodeIds.size());
QueueMetrics metrics = rm.getResourceScheduler().getRootQueueMetrics();
// Verify Metrics
verifyMetrics(metrics, 26624, 26, 6144, 6, 6);
}
@Test(timeout = 300000)
@ -340,6 +357,10 @@ public void testComplexPlacement() throws Exception {
for (NodeId n : nodeIdContainerIdMap.keySet()) {
Assert.assertTrue(nodeIdContainerIdMap.get(n) < 4);
}
QueueMetrics metrics = rm.getResourceScheduler().getRootQueueMetrics();
// Verify Metrics
verifyMetrics(metrics, 9216, 9, 7168, 7, 7);
}
@Test(timeout = 300000)
@ -407,6 +428,10 @@ public void testSchedulerRejection() throws Exception {
Assert.assertEquals(4, rej.getRequest().getAllocationRequestId());
Assert.assertEquals(RejectionReason.COULD_NOT_SCHEDULE_ON_NODE,
rej.getReason());
QueueMetrics metrics = rm.getResourceScheduler().getRootQueueMetrics();
// Verify Metrics
verifyMetrics(metrics, 12288, 12, 4096, 4, 4);
}
@Test(timeout = 300000)
@ -490,6 +515,10 @@ public void testRePlacementAfterSchedulerRejection() throws Exception {
.map(x -> x.getNodeId()).collect(Collectors.toSet());
// Ensure unique nodes
Assert.assertEquals(4, nodeIds.size());
QueueMetrics metrics = rm.getResourceScheduler().getRootQueueMetrics();
// Verify Metrics
verifyMetrics(metrics, 15360, 19, 9216, 5, 5);
}
@Test(timeout = 300000)
@ -557,6 +586,10 @@ public void testPlacementRejection() throws Exception {
RejectedSchedulingRequest rej = rejectedReqs.get(0);
Assert.assertEquals(RejectionReason.COULD_NOT_PLACE_ON_NODE,
rej.getReason());
QueueMetrics metrics = rm.getResourceScheduler().getRootQueueMetrics();
// Verify Metrics
verifyMetrics(metrics, 11264, 11, 5120, 5, 5);
}
private static void waitForContainerAllocation(Collection<MockNM> nodes,
@ -594,4 +627,16 @@ protected static SchedulingRequest schedulingRequest(
ResourceSizing.newInstance(1, Resource.newInstance(mem, cores)))
.build();
}
private static void verifyMetrics(QueueMetrics metrics, long availableMB,
int availableVirtualCores, long allocatedMB,
int allocatedVirtualCores, int allocatedContainers) {
Assert.assertEquals(availableMB, metrics.getAvailableMB());
Assert.assertEquals(availableVirtualCores,
metrics.getAvailableVirtualCores());
Assert.assertEquals(allocatedMB, metrics.getAllocatedMB());
Assert.assertEquals(allocatedVirtualCores,
metrics.getAllocatedVirtualCores());
Assert.assertEquals(allocatedContainers, metrics.getAllocatedContainers());
}
}