From 846a0cd678fba743220f28cef844ac9011a3f934 Mon Sep 17 00:00:00 2001 From: Daniel Templeton Date: Thu, 9 Mar 2017 17:51:47 -0800 Subject: [PATCH] YARN-1047. Expose # of pre-emptions as a queue counter (Contributed by Karthik Kambatla via Daniel Templeton) --- .../resourcemanager/scheduler/QueueMetrics.java | 13 +++++++++++++ .../scheduler/fair/FSAppAttempt.java | 5 +++++ .../scheduler/fair/TestFairSchedulerPreemption.java | 12 +++++++++--- 3 files changed, 27 insertions(+), 3 deletions(-) diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/QueueMetrics.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/QueueMetrics.java index 4e364f70f5..007d2b3b9f 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/QueueMetrics.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/QueueMetrics.java @@ -71,6 +71,8 @@ public class QueueMetrics implements MetricsSource { @Metric("Aggregate # of allocated off-switch containers") MutableCounterLong aggregateOffSwitchContainersAllocated; @Metric("Aggregate # of released containers") MutableCounterLong aggregateContainersReleased; + @Metric("Aggregate # of preempted containers") MutableCounterLong + aggregateContainersPreempted; @Metric("Available memory in MB") MutableGaugeLong availableMB; @Metric("Available CPU in virtual cores") MutableGaugeInt availableVCores; @Metric("Pending memory allocation in MB") MutableGaugeLong pendingMB; @@ -476,6 +478,13 @@ public void releaseResources(String user, Resource res) { } } + public void preemptContainer() { + aggregateContainersPreempted.incr(); + if (parent != null) { + parent.preemptContainer(); + } + } + public void reserveResource(String user, Resource res) { reservedContainers.incr(); reservedMB.incr(res.getMemorySize()); @@ -640,4 +649,8 @@ public long getAggregateOffSwitchContainersAllocated() { public long getAggegatedReleasedContainers() { return aggregateContainersReleased.value(); } + + public long getAggregatePreemptedContainers() { + return aggregateContainersPreempted.value(); + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSAppAttempt.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSAppAttempt.java index 6c61b451d6..3a9c94e656 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSAppAttempt.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSAppAttempt.java @@ -45,6 +45,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplicationAttempt; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNode; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerUtils; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.PendingAsk; import org.apache.hadoop.yarn.server.scheduler.SchedulerRequestKey; import org.apache.hadoop.yarn.server.utils.BuilderUtils; @@ -161,6 +162,10 @@ void containerCompleted(RMContainer rmContainer, } untrackContainerForPreemption(rmContainer); + if (containerStatus.getDiagnostics(). + equals(SchedulerUtils.PREEMPTED_CONTAINER)) { + queue.getMetrics().preemptContainer(); + } Resource containerResource = rmContainer.getContainer().getResource(); RMAuditLogger.logSuccess(getUser(), AuditConstants.RELEASE_CONTAINER, diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairSchedulerPreemption.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairSchedulerPreemption.java index 322ad5b3f5..3940a47ade 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairSchedulerPreemption.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairSchedulerPreemption.java @@ -284,14 +284,20 @@ private void verifyPreemption(int numStarvedAppContainers) Thread.sleep(10); } - // Verify the right amount of containers are preempted from greedyApp - assertEquals("Incorrect number of containers on the greedy app", + // Post preemption, verify the greedyApp has the correct # of containers. + assertEquals("Incorrect # of containers on the greedy app", 2 * numStarvedAppContainers, greedyApp.getLiveContainers().size()); + // Verify the queue metrics are set appropriately. The greedyApp started + // with 8 1GB, 1vcore containers. + assertEquals("Incorrect # of preempted containers in QueueMetrics", + 8 - 2 * numStarvedAppContainers, + greedyApp.getQueue().getMetrics().getAggregatePreemptedContainers()); + sendEnoughNodeUpdatesToAssignFully(); // Verify the preempted containers are assigned to starvingApp - assertEquals("Starved app is not assigned the right number of containers", + assertEquals("Starved app is not assigned the right # of containers", numStarvedAppContainers, starvingApp.getLiveContainers().size()); }