YARN-1047. Expose # of pre-emptions as a queue counter (Contributed by Karthik Kambatla via Daniel Templeton)
This commit is contained in:
parent
819808a016
commit
846a0cd678
@ -71,6 +71,8 @@ public class QueueMetrics implements MetricsSource {
|
||||
@Metric("Aggregate # of allocated off-switch containers")
|
||||
MutableCounterLong aggregateOffSwitchContainersAllocated;
|
||||
@Metric("Aggregate # of released containers") MutableCounterLong aggregateContainersReleased;
|
||||
@Metric("Aggregate # of preempted containers") MutableCounterLong
|
||||
aggregateContainersPreempted;
|
||||
@Metric("Available memory in MB") MutableGaugeLong availableMB;
|
||||
@Metric("Available CPU in virtual cores") MutableGaugeInt availableVCores;
|
||||
@Metric("Pending memory allocation in MB") MutableGaugeLong pendingMB;
|
||||
@ -476,6 +478,13 @@ public void releaseResources(String user, Resource res) {
|
||||
}
|
||||
}
|
||||
|
||||
public void preemptContainer() {
|
||||
aggregateContainersPreempted.incr();
|
||||
if (parent != null) {
|
||||
parent.preemptContainer();
|
||||
}
|
||||
}
|
||||
|
||||
public void reserveResource(String user, Resource res) {
|
||||
reservedContainers.incr();
|
||||
reservedMB.incr(res.getMemorySize());
|
||||
@ -640,4 +649,8 @@ public long getAggregateOffSwitchContainersAllocated() {
|
||||
public long getAggegatedReleasedContainers() {
|
||||
return aggregateContainersReleased.value();
|
||||
}
|
||||
|
||||
public long getAggregatePreemptedContainers() {
|
||||
return aggregateContainersPreempted.value();
|
||||
}
|
||||
}
|
||||
|
@ -45,6 +45,7 @@
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplicationAttempt;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNode;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerUtils;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.PendingAsk;
|
||||
import org.apache.hadoop.yarn.server.scheduler.SchedulerRequestKey;
|
||||
import org.apache.hadoop.yarn.server.utils.BuilderUtils;
|
||||
@ -161,6 +162,10 @@ void containerCompleted(RMContainer rmContainer,
|
||||
}
|
||||
|
||||
untrackContainerForPreemption(rmContainer);
|
||||
if (containerStatus.getDiagnostics().
|
||||
equals(SchedulerUtils.PREEMPTED_CONTAINER)) {
|
||||
queue.getMetrics().preemptContainer();
|
||||
}
|
||||
|
||||
Resource containerResource = rmContainer.getContainer().getResource();
|
||||
RMAuditLogger.logSuccess(getUser(), AuditConstants.RELEASE_CONTAINER,
|
||||
|
@ -284,14 +284,20 @@ private void verifyPreemption(int numStarvedAppContainers)
|
||||
Thread.sleep(10);
|
||||
}
|
||||
|
||||
// Verify the right amount of containers are preempted from greedyApp
|
||||
assertEquals("Incorrect number of containers on the greedy app",
|
||||
// Post preemption, verify the greedyApp has the correct # of containers.
|
||||
assertEquals("Incorrect # of containers on the greedy app",
|
||||
2 * numStarvedAppContainers, greedyApp.getLiveContainers().size());
|
||||
|
||||
// Verify the queue metrics are set appropriately. The greedyApp started
|
||||
// with 8 1GB, 1vcore containers.
|
||||
assertEquals("Incorrect # of preempted containers in QueueMetrics",
|
||||
8 - 2 * numStarvedAppContainers,
|
||||
greedyApp.getQueue().getMetrics().getAggregatePreemptedContainers());
|
||||
|
||||
sendEnoughNodeUpdatesToAssignFully();
|
||||
|
||||
// Verify the preempted containers are assigned to starvingApp
|
||||
assertEquals("Starved app is not assigned the right number of containers",
|
||||
assertEquals("Starved app is not assigned the right # of containers",
|
||||
numStarvedAppContainers, starvingApp.getLiveContainers().size());
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user