YARN-2113. Add cross-user preemption within CapacityScheduler's leaf-queue. (Sunil G via wangda)

Change-Id: I9b19f69788068be05b3295247cdd7b972f8a573c
2017-05-22 14:26:13 -07:00 · 2017-05-22 14:26:13 -07:00 · c583ab02c7
commit c583ab02c7
parent 9cab42cc79
20 changed files with 1686 additions and 156 deletions
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/resource/DefaultResourceCalculator.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/resource/DefaultResourceCalculator.java
@ -121,4 +121,9 @@ public boolean fitsIn(Resource cluster,
      Resource smaller, Resource bigger) {
    return smaller.getMemorySize() <= bigger.getMemorySize();
  }
  @Override
  public boolean isAnyMajorResourceZero(Resource resource) {
    return resource.getMemorySize() == 0f;
  }
 }
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/resource/DominantResourceCalculator.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/resource/DominantResourceCalculator.java
@ -239,4 +239,9 @@ public boolean fitsIn(Resource cluster,
    return smaller.getMemorySize() <= bigger.getMemorySize()
        && smaller.getVirtualCores() <= bigger.getVirtualCores();
  }
  @Override
  public boolean isAnyMajorResourceZero(Resource resource) {
    return resource.getMemorySize() == 0f || resource.getVirtualCores() == 0;
  }
 }
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/resource/ResourceCalculator.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/resource/ResourceCalculator.java
@ -204,4 +204,13 @@ public abstract float divide(
   */
  public abstract boolean fitsIn(Resource cluster,
      Resource smaller, Resource bigger);
  /**
   * Check if resource has any major resource types (which are all NodeManagers
   * included) a zero value.
   *
   * @param resource resource
   * @return returns true if any resource is zero.
   */
  public abstract boolean isAnyMajorResourceZero(Resource resource);
 }
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/resource/Resources.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/resource/Resources.java
@ -352,4 +352,9 @@ public static Resource componentwiseMax(Resource lhs, Resource rhs) {
    return createResource(Math.max(lhs.getMemorySize(), rhs.getMemorySize()),
        Math.max(lhs.getVirtualCores(), rhs.getVirtualCores()));
  }
  public static boolean isAnyMajorResourceZero(ResourceCalculator rc,
      Resource resource) {
    return rc.isAnyMajorResourceZero(resource);
  }
 }
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/CapacitySchedulerPreemptionContext.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/CapacitySchedulerPreemptionContext.java
@ -18,9 +18,11 @@
 package org.apache.hadoop.yarn.server.resourcemanager.monitor.capacity;
 import org.apache.hadoop.classification.InterfaceStability.Unstable;
 import org.apache.hadoop.yarn.api.records.ContainerId;
 import org.apache.hadoop.yarn.api.records.Resource;
 import org.apache.hadoop.yarn.server.resourcemanager.RMContext;
 import org.apache.hadoop.yarn.server.resourcemanager.monitor.capacity.ProportionalCapacityPreemptionPolicy.IntraQueuePreemptionOrderPolicy;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler;
 import org.apache.hadoop.yarn.util.resource.ResourceCalculator;
@ -63,4 +65,7 @@ TempQueuePerPartition getQueueByPartition(String queueName,
  float getMinimumThresholdForIntraQueuePreemption();
  float getMaxAllowableLimitForIntraQueuePreemption();
  @Unstable
  IntraQueuePreemptionOrderPolicy getIntraQueuePreemptionOrderPolicy();
 }
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/CapacitySchedulerPreemptionUtils.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/CapacitySchedulerPreemptionUtils.java
@ -99,7 +99,7 @@ public static void deductPreemptableResourcesBasedSelectedCandidates(
          }
          deductPreemptableResourcePerApp(context, tq.totalPartitionResource,
-              tas, res, partition);
+              tas, res);
        }
      }
    }
@ -108,10 +108,10 @@ public static void deductPreemptableResourcesBasedSelectedCandidates(
  private static void deductPreemptableResourcePerApp(
      CapacitySchedulerPreemptionContext context,
      Resource totalPartitionResource, Collection<TempAppPerPartition> tas,
-      Resource res, String partition) {
+      Resource res) {
    for (TempAppPerPartition ta : tas) {
      ta.deductActuallyToBePreempted(context.getResourceCalculator(),
-          totalPartitionResource, res, partition);
+          totalPartitionResource, res);
    }
  }
@ -157,7 +157,8 @@ public static boolean tryPreemptContainerAndDeductResToObtain(
        && Resources.greaterThan(rc, clusterResource, toObtainByPartition,
            Resources.none())
        && Resources.fitsIn(rc, clusterResource,
-            rmContainer.getAllocatedResource(), totalPreemptionAllowed)) {
+            rmContainer.getAllocatedResource(), totalPreemptionAllowed)
        && !Resources.isAnyMajorResourceZero(rc, toObtainByPartition)) {
      Resources.subtractFrom(toObtainByPartition,
          rmContainer.getAllocatedResource());
      Resources.subtractFrom(totalPreemptionAllowed,
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/FifoIntraQueuePreemptionPlugin.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/FifoIntraQueuePreemptionPlugin.java
@ -18,11 +18,13 @@
 package org.apache.hadoop.yarn.server.resourcemanager.monitor.capacity;
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.Collections;
 import java.util.Comparator;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
 import java.util.PriorityQueue;
 import java.util.Set;
@ -33,7 +35,9 @@
 import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
 import org.apache.hadoop.yarn.api.records.Resource;
 import org.apache.hadoop.yarn.server.resourcemanager.monitor.capacity.IntraQueueCandidatesSelector.TAPriorityComparator;
 import org.apache.hadoop.yarn.server.resourcemanager.monitor.capacity.ProportionalCapacityPreemptionPolicy.IntraQueuePreemptionOrderPolicy;
 import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceUsage;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.LeafQueue;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.SchedulingMode;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp;
@ -60,6 +64,26 @@ public FifoIntraQueuePreemptionPlugin(ResourceCalculator rc,
    this.rc = rc;
  }
  @Override
  public Collection<FiCaSchedulerApp> getPreemptableApps(String queueName,
      String partition) {
    TempQueuePerPartition tq = context.getQueueByPartition(queueName,
        partition);
    List<FiCaSchedulerApp> apps = new ArrayList<FiCaSchedulerApp>();
    for (TempAppPerPartition tmpApp : tq.getApps()) {
      // If a lower priority app was not selected to get preempted, mark such
      // apps out from preemption candidate selection.
      if (Resources.equals(tmpApp.getActuallyToBePreempted(),
          Resources.none())) {
        continue;
      }
      apps.add(tmpApp.app);
    }
    return apps;
  }
  @Override
  public Map<String, Resource> getResourceDemandFromAppsPerQueue(
      String queueName, String partition) {
@ -90,7 +114,7 @@ public Map<String, Resource> getResourceDemandFromAppsPerQueue(
  @Override
  public void computeAppsIdealAllocation(Resource clusterResource,
-      Resource partitionBasedResource, TempQueuePerPartition tq,
+      TempQueuePerPartition tq,
      Map<ApplicationAttemptId, Set<RMContainer>> selectedCandidates,
      Resource totalPreemptedResourceAllowed,
      Resource queueReassignableResource, float maxAllowablePreemptLimit) {
@ -113,17 +137,15 @@ public void computeAppsIdealAllocation(Resource clusterResource,
    // 3. Create all tempApps for internal calculation and return a list from
    // high priority to low priority order.
-    TAPriorityComparator taComparator = new TAPriorityComparator();
+    PriorityQueue<TempAppPerPartition> orderedByPriority = createTempAppForResCalculation(
-    PriorityQueue<TempAppPerPartition> orderedByPriority =
+        tq, apps, clusterResource, perUserAMUsed);
        createTempAppForResCalculation(tq.partition, apps, taComparator);
    // 4. Calculate idealAssigned per app by checking based on queue's
    // unallocated resource.Also return apps arranged from lower priority to
    // higher priority.
-    TreeSet<TempAppPerPartition> orderedApps =
+    TreeSet<TempAppPerPartition> orderedApps = calculateIdealAssignedResourcePerApp(
-        calculateIdealAssignedResourcePerApp(clusterResource,
+        clusterResource, tq, selectedCandidates, queueReassignableResource,
-            partitionBasedResource, tq, selectedCandidates,
+        orderedByPriority);
            queueReassignableResource, orderedByPriority, perUserAMUsed);
    // 5. A configurable limit that could define an ideal allowable preemption
    // limit. Based on current queue's capacity,defined how much % could become
@ -146,7 +168,7 @@ public void computeAppsIdealAllocation(Resource clusterResource,
    // 7. From lowest priority app onwards, calculate toBePreempted resource
    // based on demand.
    calculateToBePreemptedResourcePerApp(clusterResource, orderedApps,
-        preemptionLimit);
+        Resources.clone(preemptionLimit));
    // Save all apps (low to high) to temp queue for further reference
    tq.addAllApps(orderedApps);
@ -154,7 +176,8 @@ public void computeAppsIdealAllocation(Resource clusterResource,
    // 8. There are chances that we may preempt for the demand from same
    // priority level, such cases are to be validated out.
    validateOutSameAppPriorityFromDemand(clusterResource,
-        (TreeSet<TempAppPerPartition>) tq.getApps());
+        (TreeSet<TempAppPerPartition>) orderedApps, tq.getUsersPerPartition(),
        context.getIntraQueuePreemptionOrderPolicy());
    if (LOG.isDebugEnabled()) {
      LOG.debug("Queue Name:" + tq.queueName + ", partition:" + tq.partition);
@ -177,17 +200,17 @@ private void calculateToBePreemptedResourcePerApp(Resource clusterResource,
      Resource preemtableFromApp = Resources.subtract(tmpApp.getUsed(),
          tmpApp.idealAssigned);
-      Resources.subtractFrom(preemtableFromApp, tmpApp.selected);
+      Resources.subtractFromNonNegative(preemtableFromApp, tmpApp.selected);
-      Resources.subtractFrom(preemtableFromApp, tmpApp.getAMUsed());
+      Resources.subtractFromNonNegative(preemtableFromApp, tmpApp.getAMUsed());
      // Calculate toBePreempted from apps as follows:
      // app.preemptable = min(max(app.used - app.selected - app.ideal, 0),
      // intra_q_preemptable)
      tmpApp.toBePreempted = Resources.min(rc, clusterResource, Resources
          .max(rc, clusterResource, preemtableFromApp, Resources.none()),
-          preemptionLimit);
+          Resources.clone(preemptionLimit));
-      preemptionLimit = Resources.subtract(preemptionLimit,
+      preemptionLimit = Resources.subtractFromNonNegative(preemptionLimit,
          tmpApp.toBePreempted);
    }
  }
@ -222,31 +245,24 @@ private void calculateToBePreemptedResourcePerApp(Resource clusterResource,
   * }
   *  
   * @param clusterResource Cluster Resource
   * @param partitionBasedResource resource per partition
   * @param tq TempQueue
   * @param selectedCandidates Already Selected preemption candidates
   * @param queueReassignableResource Resource used in a queue
   * @param orderedByPriority List of running apps
   * @param perUserAMUsed AM used resource
   * @return List of temp apps ordered from low to high priority
   */
  private TreeSet<TempAppPerPartition> calculateIdealAssignedResourcePerApp(
-      Resource clusterResource, Resource partitionBasedResource,
+      Resource clusterResource, TempQueuePerPartition tq,
      TempQueuePerPartition tq,
      Map<ApplicationAttemptId, Set<RMContainer>> selectedCandidates,
      Resource queueReassignableResource,
-      PriorityQueue<TempAppPerPartition> orderedByPriority,
+      PriorityQueue<TempAppPerPartition> orderedByPriority) {
      Map<String, Resource> perUserAMUsed) {
    Comparator<TempAppPerPartition> reverseComp = Collections
        .reverseOrder(new TAPriorityComparator());
    TreeSet<TempAppPerPartition> orderedApps = new TreeSet<>(reverseComp);
    Map<String, Resource> userIdealAssignedMapping = new HashMap<>();
    String partition = tq.partition;
-
+    Map<String, TempUserPerPartition> usersPerPartition = tq.getUsersPerPartition();
    Map<String, Resource> preCalculatedUserLimit =
        new HashMap<String, Resource>();
    while (!orderedByPriority.isEmpty()) {
      // Remove app from the next highest remaining priority and process it to
@ -256,44 +272,19 @@ private TreeSet<TempAppPerPartition> calculateIdealAssignedResourcePerApp(
      // Once unallocated resource is 0, we can stop assigning ideal per app.
      if (Resources.lessThanOrEqual(rc, clusterResource,
-          queueReassignableResource, Resources.none())) {
+          queueReassignableResource, Resources.none())
          || Resources.isAnyMajorResourceZero(rc, queueReassignableResource)) {
        continue;
      }
      String userName = tmpApp.app.getUser();
-      Resource userLimitResource = preCalculatedUserLimit.get(userName);
+      TempUserPerPartition tmpUser = usersPerPartition.get(userName);
-
+      Resource userLimitResource = tmpUser.getUserLimit();
-      // Verify whether we already calculated headroom for this user.
+      Resource idealAssignedForUser = tmpUser.idealAssigned;
      if (userLimitResource == null) {
        userLimitResource = Resources.clone(
            tq.leafQueue.getResourceLimitForAllUsers(userName, clusterResource,
                partition, SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY));
        Resource amUsed = perUserAMUsed.get(userName);
        if (null == amUsed) {
          amUsed = Resources.createResource(0, 0);
        }
        // Real AM used need not have to be considered for user-limit as well.
        userLimitResource = Resources.subtract(userLimitResource, amUsed);
        if (LOG.isDebugEnabled()) {
          LOG.debug("Userlimit for user '" + userName + "' is :"
              + userLimitResource + ", and amUsed is:" + amUsed);
        }
        preCalculatedUserLimit.put(userName, userLimitResource);
      }
      Resource idealAssignedForUser = userIdealAssignedMapping.get(userName);
      if (idealAssignedForUser == null) {
        idealAssignedForUser = Resources.createResource(0, 0);
        userIdealAssignedMapping.put(userName, idealAssignedForUser);
      }
      // Calculate total selected container resources from current app.
-      getAlreadySelectedPreemptionCandidatesResource(selectedCandidates,
+      getAlreadySelectedPreemptionCandidatesResource(selectedCandidates, tmpApp,
-          tmpApp, partition);
+          tmpUser, partition);
      // For any app, used+pending will give its idealAssigned. However it will
      // be tightly linked to queue's unallocated quota. So lower priority apps
@ -304,10 +295,11 @@ private TreeSet<TempAppPerPartition> calculateIdealAssignedResourcePerApp(
      if (Resources.lessThan(rc, clusterResource, idealAssignedForUser,
          userLimitResource)) {
-        appIdealAssigned = Resources.min(rc, clusterResource, appIdealAssigned,
+        Resource idealAssigned = Resources.min(rc, clusterResource,
            appIdealAssigned,
            Resources.subtract(userLimitResource, idealAssignedForUser));
        tmpApp.idealAssigned = Resources.clone(Resources.min(rc,
-            clusterResource, queueReassignableResource, appIdealAssigned));
+            clusterResource, queueReassignableResource, idealAssigned));
        Resources.addTo(idealAssignedForUser, tmpApp.idealAssigned);
      } else {
        continue;
@ -322,7 +314,8 @@ private TreeSet<TempAppPerPartition> calculateIdealAssignedResourcePerApp(
            Resources.subtract(tmpApp.idealAssigned, appUsedExcludedSelected));
      }
-      Resources.subtractFrom(queueReassignableResource, tmpApp.idealAssigned);
+      Resources.subtractFromNonNegative(queueReassignableResource,
          tmpApp.idealAssigned);
    }
    return orderedApps;
@ -334,7 +327,8 @@ private TreeSet<TempAppPerPartition> calculateIdealAssignedResourcePerApp(
   */
  private void getAlreadySelectedPreemptionCandidatesResource(
      Map<ApplicationAttemptId, Set<RMContainer>> selectedCandidates,
-      TempAppPerPartition tmpApp, String partition) {
+      TempAppPerPartition tmpApp, TempUserPerPartition tmpUser,
      String partition) {
    tmpApp.selected = Resources.createResource(0, 0);
    Set<RMContainer> containers = selectedCandidates
        .get(tmpApp.app.getApplicationAttemptId());
@ -346,16 +340,23 @@ private void getAlreadySelectedPreemptionCandidatesResource(
    for (RMContainer cont : containers) {
      if (partition.equals(cont.getNodeLabelExpression())) {
        Resources.addTo(tmpApp.selected, cont.getAllocatedResource());
        Resources.addTo(tmpUser.selected, cont.getAllocatedResource());
      }
    }
  }
  private PriorityQueue<TempAppPerPartition> createTempAppForResCalculation(
-      String partition, Collection<FiCaSchedulerApp> apps,
+      TempQueuePerPartition tq, Collection<FiCaSchedulerApp> apps,
-      TAPriorityComparator taComparator) {
+      Resource clusterResource,
      Map<String, Resource> perUserAMUsed) {
    TAPriorityComparator taComparator = new TAPriorityComparator();
    PriorityQueue<TempAppPerPartition> orderedByPriority = new PriorityQueue<>(
        100, taComparator);
    String partition = tq.partition;
    Map<String, TempUserPerPartition> usersPerPartition = tq
        .getUsersPerPartition();
    // have an internal temp app structure to store intermediate data(priority)
    for (FiCaSchedulerApp app : apps) {
@ -387,56 +388,156 @@ private PriorityQueue<TempAppPerPartition> createTempAppForResCalculation(
      tmpApp.idealAssigned = Resources.createResource(0, 0);
      orderedByPriority.add(tmpApp);
      // Create a TempUserPerPartition structure to hold more information
      // regarding each user's entities such as UserLimit etc. This could
      // be kept in a user to TempUserPerPartition map for further reference.
      String userName = app.getUser();
      if (!usersPerPartition.containsKey(userName)) {
        ResourceUsage userResourceUsage = tq.leafQueue.getUser(userName)
            .getResourceUsage();
        TempUserPerPartition tmpUser = new TempUserPerPartition(
            tq.leafQueue.getUser(userName), tq.queueName,
            Resources.clone(userResourceUsage.getUsed(partition)),
            Resources.clone(perUserAMUsed.get(userName)),
            Resources.clone(userResourceUsage.getReserved(partition)),
            Resources.none());
        Resource userLimitResource = Resources.clone(
            tq.leafQueue.getResourceLimitForAllUsers(userName, clusterResource,
                partition, SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY));
        // Real AM used need not have to be considered for user-limit as well.
        userLimitResource = Resources.subtract(userLimitResource,
            tmpUser.amUsed);
        tmpUser.setUserLimit(userLimitResource);
        if (LOG.isDebugEnabled()) {
          LOG.debug("TempUser:" + tmpUser);
        }
        tmpUser.idealAssigned = Resources.createResource(0, 0);
        tq.addUserPerPartition(userName, tmpUser);
      }
    }
    return orderedByPriority;
  }
  /*
   * Fifo+Priority based preemption policy need not have to preempt resources at
-   * same priority level. Such cases will be validated out.
+   * same priority level. Such cases will be validated out. But if the demand is
   * from an app of different user, force to preempt resources even if apps are
   * at same priority.
   */
  public void validateOutSameAppPriorityFromDemand(Resource cluster,
-      TreeSet<TempAppPerPartition> appsOrderedfromLowerPriority) {
+      TreeSet<TempAppPerPartition> orderedApps,
      Map<String, TempUserPerPartition> usersPerPartition,
      IntraQueuePreemptionOrderPolicy intraQueuePreemptionOrder) {
-    TempAppPerPartition[] apps = appsOrderedfromLowerPriority
+    TempAppPerPartition[] apps = orderedApps
-        .toArray(new TempAppPerPartition[appsOrderedfromLowerPriority.size()]);
+        .toArray(new TempAppPerPartition[orderedApps.size()]);
    if (apps.length <= 0) {
      return;
    }
-    int lPriority = 0;
+    for (int hPriority = apps.length - 1; hPriority >= 0; hPriority--) {
    int hPriority = apps.length - 1;
-    while (lPriority < hPriority
+      // Check whether high priority app with demand needs resource from other
-        && !apps[lPriority].equals(apps[hPriority])
+      // user.
-        && apps[lPriority].getPriority() < apps[hPriority].getPriority()) {
+      if (Resources.greaterThan(rc, cluster,
-      Resource toPreemptFromOther = apps[hPriority]
+          apps[hPriority].getToBePreemptFromOther(), Resources.none())) {
          .getToBePreemptFromOther();
      Resource actuallyToPreempt = apps[lPriority].getActuallyToBePreempted();
      Resource delta = Resources.subtract(apps[lPriority].toBePreempted,
          actuallyToPreempt);
-      if (Resources.greaterThan(rc, cluster, delta, Resources.none())) {
+        // Given we have a demand from a high priority app, we can do a reverse
-        Resource toPreempt = Resources.min(rc, cluster,
+        // scan from lower priority apps to select resources.
-            toPreemptFromOther, delta);
+        // Since idealAssigned of each app has considered user-limit, this logic
        // will provide eventual consistency w.r.t user-limit as well.
        for (int lPriority = 0; lPriority < apps.length; lPriority++) {
-        apps[hPriority].setToBePreemptFromOther(
+          // Check whether app with demand needs resource from other user.
-            Resources.subtract(toPreemptFromOther, toPreempt));
+          if (Resources.greaterThan(rc, cluster, apps[lPriority].toBePreempted,
-        apps[lPriority].setActuallyToBePreempted(
+              Resources.none())) {
            Resources.add(actuallyToPreempt, toPreempt));
      }
-      if (Resources.lessThanOrEqual(rc, cluster,
+            // If apps are of same user, and priority is same, then skip.
-          apps[lPriority].toBePreempted,
+            if ((apps[hPriority].getUser().equals(apps[lPriority].getUser()))
-          apps[lPriority].getActuallyToBePreempted())) {
+                && (apps[lPriority].getPriority() >= apps[hPriority]
-        lPriority++;
+                    .getPriority())) {
-        continue;
+              continue;
-      }
+            }
-      if (Resources.equals(apps[hPriority].getToBePreemptFromOther(),
+            if (Resources.lessThanOrEqual(rc, cluster,
-          Resources.none())) {
+                apps[lPriority].toBePreempted,
-        hPriority--;
+                apps[lPriority].getActuallyToBePreempted())
-        continue;
+                || Resources.equals(apps[hPriority].getToBePreemptFromOther(),
                    Resources.none())) {
              continue;
            }
            // Ideally if any application has a higher priority, then it can
            // force to preempt any lower priority app from any user. However
            // if admin enforces user-limit over priority, preemption module
            // will not choose lower priority apps from usre's who are not yet
            // met its user-limit.
            TempUserPerPartition tmpUser = usersPerPartition
                .get(apps[lPriority].getUser());
            if ((!apps[hPriority].getUser().equals(apps[lPriority].getUser()))
                && (!tmpUser.isUserLimitReached(rc, cluster))
                && (intraQueuePreemptionOrder
                    .equals(IntraQueuePreemptionOrderPolicy.USERLIMIT_FIRST))) {
              continue;
            }
            Resource toPreemptFromOther = apps[hPriority]
                .getToBePreemptFromOther();
            Resource actuallyToPreempt = apps[lPriority]
                .getActuallyToBePreempted();
            // A lower priority app could offer more resource to preempt, if
            // multiple higher priority/under served users needs resources.
            // After one iteration, we need to ensure that actuallyToPreempt is
            // subtracted from the resource to preempt.
            Resource preemptableFromLowerPriorityApp = Resources
                .subtract(apps[lPriority].toBePreempted, actuallyToPreempt);
            // In case of user-limit preemption, when app's are from different
            // user and of same priority, we will do user-limit preemption if
            // there is a demand from under UL quota app.
            // However this under UL quota app's demand may be more.
            // Still we should ensure that we are not doing over preemption such
            // that only a maximum of (user's used - UL quota) could be
            // preempted.
            if ((!apps[hPriority].getUser().equals(apps[lPriority].getUser()))
                && (apps[lPriority].getPriority() == apps[hPriority]
                    .getPriority())
                && tmpUser.isUserLimitReached(rc, cluster)) {
              Resource deltaULQuota = Resources
                  .subtract(tmpUser.getUsedDeductAM(), tmpUser.selected);
              Resources.subtractFrom(deltaULQuota, tmpUser.getUserLimit());
              if (tmpUser.isPreemptionQuotaForULDeltaDone()) {
                deltaULQuota = Resources.createResource(0, 0);
              }
              if (Resources.lessThan(rc, cluster, deltaULQuota,
                  preemptableFromLowerPriorityApp)) {
                tmpUser.updatePreemptionQuotaForULDeltaAsDone(true);
                preemptableFromLowerPriorityApp = deltaULQuota;
              }
            }
            if (Resources.greaterThan(rc, cluster,
                preemptableFromLowerPriorityApp, Resources.none())) {
              Resource toPreempt = Resources.min(rc, cluster,
                  toPreemptFromOther, preemptableFromLowerPriorityApp);
              apps[hPriority].setToBePreemptFromOther(
                  Resources.subtract(toPreemptFromOther, toPreempt));
              apps[lPriority].setActuallyToBePreempted(
                  Resources.add(actuallyToPreempt, toPreempt));
            }
          }
        }
      }
    }
  }
@ -456,6 +557,40 @@ private Resource calculateUsedAMResourcesPerQueue(String partition,
      Resources.addTo(userAMResource, app.getAMResource(partition));
      Resources.addTo(amUsed, app.getAMResource(partition));
    }
    return amUsed;
  }
  @Override
  public boolean skipContainerBasedOnIntraQueuePolicy(FiCaSchedulerApp app,
      Resource clusterResource, Resource usedResource, RMContainer c) {
    // Ensure below checks
    // 1. This check must be done only when preemption order is USERLIMIT_FIRST
    // 2. By selecting container "c", check whether this user's resource usage
    // is going below its user-limit.
    // 3. Used resource of user must be always greater than user-limit to
    // skip some containers as per this check. If used resource is under user
    // limit, then these containers of this user has to be preempted as demand
    // might be due to high priority apps running in same user.
    String partition = context.getScheduler()
        .getSchedulerNode(c.getAllocatedNode()).getPartition();
    TempQueuePerPartition tq = context.getQueueByPartition(app.getQueueName(),
        partition);
    TempUserPerPartition tmpUser = tq.getUsersPerPartition().get(app.getUser());
    // Given user is not present, skip the check.
    if (tmpUser == null) {
      return false;
    }
    // For ideal resource computations, user-limit got saved by subtracting am
    // used resource in TempUser. Hence it has to be added back here for
    // complete check.
    Resource userLimit = Resources.add(tmpUser.getUserLimit(), tmpUser.amUsed);
    return Resources.lessThanOrEqual(rc, clusterResource,
        Resources.subtract(usedResource, c.getAllocatedResource()), userLimit)
        && context.getIntraQueuePreemptionOrderPolicy()
            .equals(IntraQueuePreemptionOrderPolicy.USERLIMIT_FIRST);
  }
 }
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/IntraQueueCandidatesSelector.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/IntraQueueCandidatesSelector.java
@ -23,6 +23,7 @@
 import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
 import org.apache.hadoop.yarn.api.records.Priority;
 import org.apache.hadoop.yarn.api.records.Resource;
 import org.apache.hadoop.yarn.server.resourcemanager.monitor.capacity.ProportionalCapacityPreemptionPolicy.IntraQueuePreemptionOrderPolicy;
 import org.apache.hadoop.yarn.server.resourcemanager.nodelabels.RMNodeLabelsManager;
 import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.LeafQueue;
@ -31,8 +32,9 @@
 import java.io.Serializable;
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.Comparator;
-import java.util.Iterator;
+import java.util.HashMap;
 import java.util.LinkedHashSet;
 import java.util.List;
 import java.util.Map;
@ -51,14 +53,14 @@ static class TAPriorityComparator
        Comparator<TempAppPerPartition> {
    @Override
-    public int compare(TempAppPerPartition tq1, TempAppPerPartition tq2) {
+    public int compare(TempAppPerPartition ta1, TempAppPerPartition ta2) {
-      Priority p1 = Priority.newInstance(tq1.getPriority());
+      Priority p1 = Priority.newInstance(ta1.getPriority());
-      Priority p2 = Priority.newInstance(tq2.getPriority());
+      Priority p2 = Priority.newInstance(ta2.getPriority());
      if (!p1.equals(p2)) {
        return p1.compareTo(p2);
      }
-      return tq1.getApplicationId().compareTo(tq2.getApplicationId());
+      return ta1.getApplicationId().compareTo(ta2.getApplicationId());
    }
  }
@ -121,17 +123,27 @@ public Map<ApplicationAttemptId, Set<RMContainer>> selectCandidates(
        Map<String, Resource> resToObtainByPartition = fifoPreemptionComputePlugin
            .getResourceDemandFromAppsPerQueue(queueName, partition);
-        // 6. Based on the selected resource demand per partition, select
+        // Default preemption iterator considers only FIFO+priority. For
        // userlimit preemption, its possible that some lower priority apps
        // needs from high priority app of another user. Hence use apps
        // ordered by userlimit starvation as well.
        Collection<FiCaSchedulerApp> apps = fifoPreemptionComputePlugin
            .getPreemptableApps(queueName, partition);
        // 6. Get user-limit to ensure that we do not preempt resources which
        // will force user's resource to come under its UL.
        Map<String, Resource> rollingResourceUsagePerUser = new HashMap<>();
        initializeUsageAndUserLimitForCompute(clusterResource, partition,
            leafQueue, rollingResourceUsagePerUser);
        // 7. Based on the selected resource demand per partition, select
        // containers with known policy from inter-queue preemption.
        try {
          leafQueue.getReadLock().lock();
-          Iterator<FiCaSchedulerApp> desc = leafQueue.getOrderingPolicy()
+          for (FiCaSchedulerApp app : apps) {
-              .getPreemptionIterator();
+            preemptFromLeastStarvedApp(leafQueue, app, selectedCandidates,
-          while (desc.hasNext()) {
+                clusterResource, totalPreemptedResourceAllowed,
-            FiCaSchedulerApp app = desc.next();
+                resToObtainByPartition, rollingResourceUsagePerUser);
            preemptFromLeastStarvedApp(selectedCandidates, clusterResource,
                totalPreemptedResourceAllowed, resToObtainByPartition,
                leafQueue, app);
          }
        } finally {
          leafQueue.getReadLock().unlock();
@ -142,16 +154,30 @@ public Map<ApplicationAttemptId, Set<RMContainer>> selectCandidates(
    return selectedCandidates;
  }
-  private void preemptFromLeastStarvedApp(
+  private void initializeUsageAndUserLimitForCompute(Resource clusterResource,
      String partition, LeafQueue leafQueue,
      Map<String, Resource> rollingResourceUsagePerUser) {
    for (String user : leafQueue.getAllUsers()) {
      // Initialize used resource of a given user for rolling computation.
      rollingResourceUsagePerUser.put(user, Resources.clone(
          leafQueue.getUser(user).getResourceUsage().getUsed(partition)));
      if (LOG.isDebugEnabled()) {
        LOG.debug("Rolling resource usage for user:" + user + " is : "
            + rollingResourceUsagePerUser.get(user));
      }
    }
  }
  private void preemptFromLeastStarvedApp(LeafQueue leafQueue,
      FiCaSchedulerApp app,
      Map<ApplicationAttemptId, Set<RMContainer>> selectedCandidates,
      Resource clusterResource, Resource totalPreemptedResourceAllowed,
-      Map<String, Resource> resToObtainByPartition, LeafQueue leafQueue,
+      Map<String, Resource> resToObtainByPartition,
-      FiCaSchedulerApp app) {
+      Map<String, Resource> rollingResourceUsagePerUser) {
    // ToDo: Reuse reservation selector here.
-    List<RMContainer> liveContainers = new ArrayList<>(
+    List<RMContainer> liveContainers = new ArrayList<>(app.getLiveContainers());
        app.getLiveContainers());
    sortContainers(liveContainers);
    if (LOG.isDebugEnabled()) {
@ -160,6 +186,8 @@ private void preemptFromLeastStarvedApp(
              + totalPreemptedResourceAllowed);
    }
    Resource rollingUsedResourcePerUser = rollingResourceUsagePerUser
        .get(app.getUser());
    for (RMContainer c : liveContainers) {
      // if there are no demand, return.
@ -184,12 +212,34 @@ private void preemptFromLeastStarvedApp(
        continue;
      }
-      // Try to preempt this container
+      // If selected container brings down resource usage under its user's
-      CapacitySchedulerPreemptionUtils.tryPreemptContainerAndDeductResToObtain(
+      // UserLimit (or equals to), we must skip such containers.
-          rc, preemptionContext, resToObtainByPartition, c, clusterResource,
+      if (fifoPreemptionComputePlugin.skipContainerBasedOnIntraQueuePolicy(app,
-          selectedCandidates, totalPreemptedResourceAllowed);
+          clusterResource, rollingUsedResourcePerUser, c)) {
-    }
+        if (LOG.isDebugEnabled()) {
          LOG.debug(
              "Skipping container: " + c.getContainerId() + " with resource:"
                  + c.getAllocatedResource() + " as UserLimit for user:"
                  + app.getUser() + " with resource usage: "
                  + rollingUsedResourcePerUser + " is going under UL");
        }
        break;
      }
      // Try to preempt this container
      boolean ret = CapacitySchedulerPreemptionUtils
          .tryPreemptContainerAndDeductResToObtain(rc, preemptionContext,
              resToObtainByPartition, c, clusterResource, selectedCandidates,
              totalPreemptedResourceAllowed);
      // Subtract from respective user's resource usage once a container is
      // selected for preemption.
      if (ret && preemptionContext.getIntraQueuePreemptionOrderPolicy()
          .equals(IntraQueuePreemptionOrderPolicy.USERLIMIT_FIRST)) {
        Resources.subtractFrom(rollingUsedResourcePerUser,
            c.getAllocatedResource());
      }
    }
  }
  private void computeIntraQueuePreemptionDemand(Resource clusterResource,
@ -205,12 +255,7 @@ private void computeIntraQueuePreemptionDemand(Resource clusterResource,
        continue;
      }
-      // 2. Its better to get partition based resource limit earlier before
+      // 2. loop through all queues corresponding to a partition.
      // starting calculation
      Resource partitionBasedResource =
          context.getPartitionResource(partition);
      // 3. loop through all queues corresponding to a partition.
      for (String queueName : queueNames) {
        TempQueuePerPartition tq = context.getQueueByPartition(queueName,
            partition);
@ -221,23 +266,22 @@ private void computeIntraQueuePreemptionDemand(Resource clusterResource,
          continue;
        }
-        // 4. Consider reassignableResource as (used - actuallyToBePreempted).
+        // 3. Consider reassignableResource as (used - actuallyToBePreempted).
        // This provides as upper limit to split apps quota in a queue.
        Resource queueReassignableResource = Resources.subtract(tq.getUsed(),
            tq.getActuallyToBePreempted());
-        // 5. Check queue's used capacity. Make sure that the used capacity is
+        // 4. Check queue's used capacity. Make sure that the used capacity is
        // above certain limit to consider for intra queue preemption.
        if (leafQueue.getQueueCapacities().getUsedCapacity(partition) < context
            .getMinimumThresholdForIntraQueuePreemption()) {
          continue;
        }
-        // 6. compute the allocation of all apps based on queue's unallocated
+        // 5. compute the allocation of all apps based on queue's unallocated
        // capacity
        fifoPreemptionComputePlugin.computeAppsIdealAllocation(clusterResource,
-            partitionBasedResource, tq, selectedCandidates,
+            tq, selectedCandidates, totalPreemptedResourceAllowed,
            totalPreemptedResourceAllowed,
            queueReassignableResource,
            context.getMaxAllowableLimitForIntraQueuePreemption());
      }
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/IntraQueuePreemptionComputePlugin.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/IntraQueuePreemptionComputePlugin.java
@ -18,12 +18,14 @@
 package org.apache.hadoop.yarn.server.resourcemanager.monitor.capacity;
 import java.util.Collection;
 import java.util.Map;
 import java.util.Set;
 import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
 import org.apache.hadoop.yarn.api.records.Resource;
 import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp;
 interface IntraQueuePreemptionComputePlugin {
@ -32,8 +34,14 @@ Map<String, Resource> getResourceDemandFromAppsPerQueue(String queueName,
      String partition);
  void computeAppsIdealAllocation(Resource clusterResource,
-      Resource partitionBasedResource, TempQueuePerPartition tq,
+      TempQueuePerPartition tq,
      Map<ApplicationAttemptId, Set<RMContainer>> selectedCandidates,
      Resource totalPreemptedResourceAllowed, Resource queueTotalUnassigned,
      float maxAllowablePreemptLimit);
  Collection<FiCaSchedulerApp> getPreemptableApps(String queueName,
      String partition);
  boolean skipContainerBasedOnIntraQueuePolicy(FiCaSchedulerApp app,
      Resource clusterResource, Resource usedResource, RMContainer c);
 }
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/ProportionalCapacityPreemptionPolicy.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/ProportionalCapacityPreemptionPolicy.java
@ -22,6 +22,7 @@
 import org.apache.commons.lang.StringUtils;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.classification.InterfaceStability.Unstable;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
 import org.apache.hadoop.yarn.api.records.ContainerId;
@ -79,6 +80,16 @@
 */
 public class ProportionalCapacityPreemptionPolicy
    implements SchedulingEditPolicy, CapacitySchedulerPreemptionContext {
  /**
   * IntraQueuePreemptionOrder will be used to define various priority orders
   * which could be configured by admin.
   */
  @Unstable
  public enum IntraQueuePreemptionOrderPolicy {
    PRIORITY_FIRST, USERLIMIT_FIRST;
  }
  private static final Log LOG =
    LogFactory.getLog(ProportionalCapacityPreemptionPolicy.class);
@ -95,6 +106,7 @@ public class ProportionalCapacityPreemptionPolicy
  private float maxAllowableLimitForIntraQueuePreemption;
  private float minimumThresholdForIntraQueuePreemption;
  private IntraQueuePreemptionOrderPolicy intraQueuePreemptionOrderPolicy;
  // Pointer to other RM components
  private RMContext rmContext;
@ -190,6 +202,13 @@ public void init(Configuration config, RMContext context,
        CapacitySchedulerConfiguration.
        DEFAULT_INTRAQUEUE_PREEMPTION_MINIMUM_THRESHOLD);
    intraQueuePreemptionOrderPolicy = IntraQueuePreemptionOrderPolicy
        .valueOf(csConfig
            .get(
                CapacitySchedulerConfiguration.INTRAQUEUE_PREEMPTION_ORDER_POLICY,
                CapacitySchedulerConfiguration.DEFAULT_INTRAQUEUE_PREEMPTION_ORDER_POLICY)
            .toUpperCase());
    rc = scheduler.getResourceCalculator();
    nlm = scheduler.getRMContext().getNodeLabelManager();
@ -242,7 +261,6 @@ public synchronized void editSchedule() {
    }
  }
  @SuppressWarnings("unchecked")
  private void preemptOrkillSelectedContainerAfterWait(
      Map<ApplicationAttemptId, Set<RMContainer>> selectedCandidates,
      long currentTime) {
@ -652,4 +670,9 @@ public void addPartitionToUnderServedQueues(String queueName,
    }
    underServedQueues.add(queueName);
  }
  @Override
  public IntraQueuePreemptionOrderPolicy getIntraQueuePreemptionOrderPolicy() {
    return intraQueuePreemptionOrderPolicy;
  }
 }
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/TempAppPerPartition.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/TempAppPerPartition.java
@ -91,8 +91,12 @@ public ApplicationId getApplicationId() {
    return applicationId;
  }
  public String getUser() {
    return this.app.getUser();
  }
  public void deductActuallyToBePreempted(ResourceCalculator resourceCalculator,
-      Resource cluster, Resource toBeDeduct, String partition) {
+      Resource cluster, Resource toBeDeduct) {
    if (Resources.greaterThan(resourceCalculator, cluster,
        getActuallyToBePreempted(), toBeDeduct)) {
      Resources.subtractFrom(getActuallyToBePreempted(), toBeDeduct);
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/TempQueuePerPartition.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/TempQueuePerPartition.java
@ -26,6 +26,8 @@
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.LinkedHashMap;
 import java.util.Map;
 /**
 * Temporary data-structure tracking resource availability, pending resource
@ -59,6 +61,10 @@ public class TempQueuePerPartition extends AbstractPreemptionEntity {
  int relativePriority = 0;
  TempQueuePerPartition parent = null;
  // This will hold a temp user data structure and will hold userlimit,
  // idealAssigned, used etc.
  Map<String, TempUserPerPartition> usersPerPartition = new LinkedHashMap<>();
  TempQueuePerPartition(String queueName, Resource current,
      boolean preemptionDisabled, String partition, Resource killable,
      float absCapacity, float absMaxCapacity, Resource totalPartitionResource,
@ -289,4 +295,12 @@ public Collection<TempAppPerPartition> getApps() {
    return apps;
  }
  public void addUserPerPartition(String userName,
      TempUserPerPartition tmpUser) {
    this.usersPerPartition.put(userName, tmpUser);
  }
  public Map<String, TempUserPerPartition> getUsersPerPartition() {
    return usersPerPartition;
  }
 }
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/TempUserPerPartition.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/TempUserPerPartition.java
@ -0,0 +1,88 @@
 /**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 package org.apache.hadoop.yarn.server.resourcemanager.monitor.capacity;
 import org.apache.hadoop.yarn.api.records.Resource;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.UsersManager.User;
 import org.apache.hadoop.yarn.util.resource.ResourceCalculator;
 import org.apache.hadoop.yarn.util.resource.Resources;
 /**
 * Temporary data-structure tracking resource availability, pending resource
 * need, current utilization for an application.
 */
 public class TempUserPerPartition extends AbstractPreemptionEntity {
  private final User user;
  private Resource userLimit;
  private boolean donePreemptionQuotaForULDelta = false;
  TempUserPerPartition(User user, String queueName, Resource usedPerPartition,
      Resource amUsedPerPartition, Resource reserved,
      Resource pendingPerPartition) {
    super(queueName, usedPerPartition, amUsedPerPartition, reserved,
        pendingPerPartition);
    this.user = user;
  }
  @Override
  public String toString() {
    StringBuilder sb = new StringBuilder();
    sb.append(" NAME: " + getUserName()).append(" CUR: ").append(getUsed())
        .append(" PEN: ").append(pending).append(" RESERVED: ").append(reserved)
        .append(" AM_USED: ").append(amUsed).append(" USER_LIMIT: ")
        .append(getUserLimit()).append(" IDEAL_ASSIGNED: ")
        .append(idealAssigned).append(" USED_WO_AMUSED: ")
        .append(getUsedDeductAM()).append(" IDEAL_PREEMPT: ")
        .append(toBePreempted).append(" ACTUAL_PREEMPT: ")
        .append(getActuallyToBePreempted()).append("\n");
    return sb.toString();
  }
  public String getUserName() {
    return user.getUserName();
  }
  public Resource getUserLimit() {
    return userLimit;
  }
  public void setUserLimit(Resource userLimitResource) {
    this.userLimit = userLimitResource;
  }
  public boolean isUserLimitReached(ResourceCalculator rc,
      Resource clusterResource) {
    if (Resources.greaterThan(rc, clusterResource, getUsedDeductAM(),
        userLimit)) {
      return true;
    }
    return false;
  }
  public boolean isPreemptionQuotaForULDeltaDone() {
    return this.donePreemptionQuotaForULDelta;
  }
  public void updatePreemptionQuotaForULDeltaAsDone(boolean done) {
    this.donePreemptionQuotaForULDelta = done;
  }
 }
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacitySchedulerConfiguration.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacitySchedulerConfiguration.java
@ -1233,6 +1233,14 @@ public boolean getLazyPreemptionEnabled() {
  public static final float DEFAULT_INTRAQUEUE_PREEMPTION_MAX_ALLOWABLE_LIMIT =
      0.2f;
  /**
   * For intra-queue preemption, enforce a preemption order such as
   * "userlimit_first" or "priority_first".
   */
  public static final String INTRAQUEUE_PREEMPTION_ORDER_POLICY = PREEMPTION_CONFIG_PREFIX
      + INTRA_QUEUE_PREEMPTION_CONFIG_PREFIX + "preemption-order-policy";
  public static final String DEFAULT_INTRAQUEUE_PREEMPTION_ORDER_POLICY = "userlimit_first";
  /**
   * Maximum application for a queue to be used when application per queue is
   * not defined.To be consistent with previous version the default value is set
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java
@ -43,12 +43,10 @@
 import org.apache.hadoop.yarn.api.records.QueueState;
 import org.apache.hadoop.yarn.api.records.QueueUserACLInfo;
 import org.apache.hadoop.yarn.api.records.Resource;
 import org.apache.hadoop.yarn.exceptions.InvalidResourceRequestException;
 import org.apache.hadoop.yarn.factories.RecordFactory;
 import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
 import org.apache.hadoop.yarn.nodelabels.CommonNodeLabelsManager;
 import org.apache.hadoop.yarn.security.AccessType;
 import org.apache.hadoop.yarn.server.resourcemanager.RMServerUtils;
 import org.apache.hadoop.yarn.server.resourcemanager.nodelabels.RMNodeLabelsManager;
 import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer;
 import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerEventType;
@ -56,7 +54,6 @@
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceLimits;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceUsage;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedContainerChangeRequest;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplicationAttempt;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.*;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.activities.ActivityDiagnosticConstant;
@ -2022,4 +2019,12 @@ public void stopQueue() {
      writeLock.unlock();
    }
  }
  /**
   * Get all valid users in this queue.
   * @return user list
   */
  public Set<String> getAllUsers() {
    return this.getUsersManager().getUsers().keySet();
  }
 }
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/UsersManager.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/UsersManager.java
@ -253,6 +253,15 @@ public Resource getUserResourceLimit() {
    public void setUserResourceLimit(Resource userResourceLimit) {
      this.userResourceLimit = userResourceLimit;
    }
    public String getUserName() {
      return this.userName;
    }
    @VisibleForTesting
    public void setResourceUsage(ResourceUsage resourceUsage) {
      this.userResourceUsage = resourceUsage;
    }
  } /* End of User class */
  /**
@ -344,7 +353,7 @@ public void userLimitNeedsRecompute() {
  /*
   * Get all users of queue.
   */
-  private Map<String, User> getUsers() {
+  public Map<String, User> getUsers() {
    return users;
  }
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/ProportionalCapacityPreemptionPolicyMockFramework.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/ProportionalCapacityPreemptionPolicyMockFramework.java
@ -45,6 +45,7 @@
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.ParentQueue;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.QueueCapacities;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.SchedulingMode;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.UsersManager.User;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.preemption.PreemptionManager;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerNode;
@ -96,6 +97,7 @@ public class ProportionalCapacityPreemptionPolicyMockFramework {
  Clock mClock = null;
  CapacitySchedulerConfiguration conf = null;
  CapacityScheduler cs = null;
  @SuppressWarnings("rawtypes")
  EventHandler<Event> mDisp = null;
  ProportionalCapacityPreemptionPolicy policy = null;
  Resource clusterResource = null;
@ -247,6 +249,7 @@ public Integer answer(InvocationOnMock invocation) throws Throwable {
        if (containerId == 1) {
          when(rmc.isAMContainer()).thenReturn(true);
          when(app.getAMResource(label)).thenReturn(res);
          when(app.getAppAMNodePartitionName()).thenReturn(label);
        }
        if (reserved) {
@ -280,6 +283,12 @@ public Integer answer(InvocationOnMock invocation) throws Throwable {
        containerId++;
      }
      // If app has 0 container, and it has only pending, still make sure to
      // update label.
      if (repeat == 0) {
        when(app.getAppAMNodePartitionName()).thenReturn(label);
      }
      // Some more app specific aggregated data can be better filled here.
      when(app.getPriority()).thenReturn(pri);
      when(app.getUser()).thenReturn(userName);
@ -315,10 +324,15 @@ public Integer answer(InvocationOnMock invocation) throws Throwable {
  private void mockApplications(String appsConfig) {
    int id = 1;
    HashMap<String, HashSet<String>> userMap = new HashMap<String, HashSet<String>>();
    HashMap<String, HashMap<String, HashMap<String, ResourceUsage>>> userResourceUsagePerLabel = new HashMap<>();
    LeafQueue queue = null;
    int mulp = -1;
    for (String a : appsConfig.split(";")) {
      String[] strs = a.split("\t");
      String queueName = strs[0];
      if (mulp <= 0 && strs.length > 2 && strs[2] != null) {
        mulp = 100 / (new Integer(strs[2]).intValue());
      }
      // get containers
      List<RMContainer> liveContainers = new ArrayList<RMContainer>();
@ -338,6 +352,7 @@ private void mockApplications(String appsConfig) {
      when(app.getReservedContainers()).thenReturn(reservedContainers);
      when(app.getApplicationAttemptId()).thenReturn(appAttemptId);
      when(app.getApplicationId()).thenReturn(appId);
      when(app.getQueueName()).thenReturn(queueName);
      // add to LeafQueue
      queue = (LeafQueue) nameToCSQueues.get(queueName);
@ -349,23 +364,71 @@ private void mockApplications(String appsConfig) {
        users = new HashSet<String>();
        userMap.put(queueName, users);
      }
      users.add(app.getUser());
      String label = app.getAppAMNodePartitionName();
      // Get label to queue
      HashMap<String, HashMap<String, ResourceUsage>> userResourceUsagePerQueue = userResourceUsagePerLabel
          .get(label);
      if (null == userResourceUsagePerQueue) {
        userResourceUsagePerQueue = new HashMap<>();
        userResourceUsagePerLabel.put(label, userResourceUsagePerQueue);
      }
      // Get queue to user based resource map
      HashMap<String, ResourceUsage> userResourceUsage = userResourceUsagePerQueue
          .get(queueName);
      if (null == userResourceUsage) {
        userResourceUsage = new HashMap<>();
        userResourceUsagePerQueue.put(queueName, userResourceUsage);
      }
      // Get user to its resource usage.
      ResourceUsage usage = userResourceUsage.get(app.getUser());
      if (null == usage) {
        usage = new ResourceUsage();
        userResourceUsage.put(app.getUser(), usage);
      }
      usage.incAMUsed(app.getAMResource(label));
      usage.incUsed(app.getAppAttemptResourceUsage().getUsed(label));
      id++;
    }
-    for (String queueName : userMap.keySet()) {
+    for (String label : userResourceUsagePerLabel.keySet()) {
-      queue = (LeafQueue) nameToCSQueues.get(queueName);
+      for (String queueName : userMap.keySet()) {
-      // Currently we have user-limit test support only for default label.
+        queue = (LeafQueue) nameToCSQueues.get(queueName);
-      Resource totResoucePerPartition = partitionToResource.get("");
+        // Currently we have user-limit test support only for default label.
-      Resource capacity = Resources.multiply(totResoucePerPartition,
+        Resource totResoucePerPartition = partitionToResource.get("");
-          queue.getQueueCapacities().getAbsoluteCapacity());
+        Resource capacity = Resources.multiply(totResoucePerPartition,
-      HashSet<String> users = userMap.get(queue.getQueueName());
+            queue.getQueueCapacities().getAbsoluteCapacity());
-      Resource userLimit = Resources.divideAndCeil(rc, capacity, users.size());
+        HashSet<String> users = userMap.get(queue.getQueueName());
-      for (String userName : users) {
+        when(queue.getAllUsers()).thenReturn(users);
-        when(queue.getResourceLimitForAllUsers(eq(userName),
+        Resource userLimit;
-            any(Resource.class), anyString(), any(SchedulingMode.class)))
+        if (mulp > 0) {
-                .thenReturn(userLimit);
+          userLimit = Resources.divideAndCeil(rc, capacity, mulp);
        } else {
          userLimit = Resources.divideAndCeil(rc, capacity,
              users.size());
        }
        LOG.debug("Updating user-limit from mock: totResoucePerPartition="
            + totResoucePerPartition + ", capacity=" + capacity
            + ", users.size()=" + users.size() + ", userlimit= " + userLimit
            + ",label= " + label + ",queueName= " + queueName);
        HashMap<String, ResourceUsage> userResourceUsage = userResourceUsagePerLabel
            .get(label).get(queueName);
        for (String userName : users) {
          User user = new User(userName);
          if (userResourceUsage != null) {
            user.setResourceUsage(userResourceUsage.get(userName));
          }
          when(queue.getUser(eq(userName))).thenReturn(user);
          when(queue.getResourceLimitForAllUsers(eq(userName),
              any(Resource.class), anyString(), any(SchedulingMode.class)))
                  .thenReturn(userLimit);
        }
      }
    }
  }
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/TestProportionalCapacityPreemptionPolicyIntraQueue.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/TestProportionalCapacityPreemptionPolicyIntraQueue.java
@ -62,12 +62,16 @@ public void testSimpleIntraQueuePreemption() throws IOException {
     * Apps which are running at low priority (4) will preempt few of its
     * resources to meet the demand.
     */
    conf.set(CapacitySchedulerConfiguration.INTRAQUEUE_PREEMPTION_ORDER_POLICY,
        "priority_first");
    String labelsConfig = "=100,true;";
    String nodesConfig = // n1 has no label
        "n1= res=100";
    String queuesConfig =
        // guaranteed,max,used,pending,reserved
-        "root(=[100 100 80 120 0]);" + // root
+        "root(=[100 100 79 120 0]);" + // root
            "-a(=[11 100 11 50 0]);" + // a
            "-b(=[40 100 38 60 0]);" + // b
            "-c(=[20 100 10 10 0]);" + // c
@ -304,6 +308,8 @@ public void testLimitPreemptionWithMaxIntraQueuePreemptableLimit()
    conf.setFloat(CapacitySchedulerConfiguration.
        INTRAQUEUE_PREEMPTION_MAX_ALLOWABLE_LIMIT,
        (float) 0.5);
    conf.set(CapacitySchedulerConfiguration.INTRAQUEUE_PREEMPTION_ORDER_POLICY,
        "priority_first");
    String labelsConfig = "=100,true;";
    String nodesConfig = // n1 has no label
@ -357,6 +363,8 @@ public void testLimitPreemptionWithTotalPreemptedResourceAllowed()
    // report "ideal" preempt as 10%. Ensure preemption happens only for 10%
    conf.setFloat(CapacitySchedulerConfiguration.TOTAL_PREEMPTION_PER_ROUND,
        (float) 0.1);
    conf.set(CapacitySchedulerConfiguration.INTRAQUEUE_PREEMPTION_ORDER_POLICY,
        "priority_first");
    String labelsConfig = "=100,true;";
    String nodesConfig = // n1 has no label
@ -411,6 +419,8 @@ public void testAlreadySelectedContainerFromInterQueuePreemption()
    conf.setFloat(CapacitySchedulerConfiguration.
        INTRAQUEUE_PREEMPTION_MAX_ALLOWABLE_LIMIT,
        (float) 0.5);
    conf.set(CapacitySchedulerConfiguration.INTRAQUEUE_PREEMPTION_ORDER_POLICY,
        "priority_first");
    String labelsConfig = "=100,true;";
    String nodesConfig = // n1 has no label
@ -418,7 +428,7 @@ public void testAlreadySelectedContainerFromInterQueuePreemption()
    String queuesConfig =
        // guaranteed,max,used,pending,reserved
        "root(=[100 100 95 170 0]);" + // root
-            "-a(=[60 100 70 50 0]);" + // a
+            "-a(=[60 100 70 35 0]);" + // a
            "-b(=[40 100 25 120 0])"; // b
    String appsConfig =
@ -467,6 +477,8 @@ public void testSkipAMContainersInInterQueuePreemption() throws IOException {
    conf.setFloat(CapacitySchedulerConfiguration.
        INTRAQUEUE_PREEMPTION_MAX_ALLOWABLE_LIMIT,
        (float) 0.5);
    conf.set(CapacitySchedulerConfiguration.INTRAQUEUE_PREEMPTION_ORDER_POLICY,
        "priority_first");
    String labelsConfig = "=100,true;";
    String nodesConfig = // n1 has no label
@ -516,6 +528,8 @@ public void testSkipAMContainersInInterQueuePreemptionSingleApp()
     * cycle. Eventhough there are more demand and no other low priority
     * apps are present, still AM contaier need to soared.
     */
    conf.set(CapacitySchedulerConfiguration.INTRAQUEUE_PREEMPTION_ORDER_POLICY,
        "priority_first");
    String labelsConfig = "=100,true;";
    String nodesConfig = // n1 has no label
@ -660,6 +674,8 @@ public void testNodePartitionIntraQueuePreemption() throws IOException {
    conf.setFloat(CapacitySchedulerConfiguration.
        INTRAQUEUE_PREEMPTION_MAX_ALLOWABLE_LIMIT,
        (float) 0.5);
    conf.set(CapacitySchedulerConfiguration.INTRAQUEUE_PREEMPTION_ORDER_POLICY,
        "priority_first");
    String labelsConfig = "=100,true;" + // default partition
        "x=100,true"; // partition=x
@ -720,6 +736,8 @@ public void testComplexIntraQueuePreemption() throws IOException {
    conf.setFloat(CapacitySchedulerConfiguration.
        INTRAQUEUE_PREEMPTION_MAX_ALLOWABLE_LIMIT,
        (float) 0.5);
    conf.set(CapacitySchedulerConfiguration.INTRAQUEUE_PREEMPTION_ORDER_POLICY,
        "priority_first");
    String labelsConfig = "=100,true;";
    String nodesConfig = // n1 has no label
@ -840,8 +858,10 @@ public void testIntraQueuePreemptionWithTwoUsers()
    policy.editSchedule();
    // Considering user-limit of 50% since only 2 users are there, only preempt
-    // 15 more (5 is already running) eventhough demand is for 30.
+    // 14 more (5 is already running) eventhough demand is for 30. Ideally we
-    verify(mDisp, times(15)).handle(argThat(
+    // must preempt 15. But 15th container will bring user1's usage to 20 which
    // is same as user-limit. Hence skip 15th container.
    verify(mDisp, times(14)).handle(argThat(
        new TestProportionalCapacityPreemptionPolicy.IsPreemptionRequestFor(
            getAppAttemptId(3))));
  }
@ -869,6 +889,8 @@ public void testComplexNodePartitionIntraQueuePreemption()
    conf.setFloat(CapacitySchedulerConfiguration.
        INTRAQUEUE_PREEMPTION_MAX_ALLOWABLE_LIMIT,
        (float) 0.5);
    conf.set(CapacitySchedulerConfiguration.INTRAQUEUE_PREEMPTION_ORDER_POLICY,
        "priority_first");
    String labelsConfig = "=100,true;" + // default partition
        "x=100,true"; // partition=x
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/TestProportionalCapacityPreemptionPolicyIntraQueueUserLimit.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/TestProportionalCapacityPreemptionPolicyIntraQueueUserLimit.java
@ -0,0 +1,899 @@
 /**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 package org.apache.hadoop.yarn.server.resourcemanager.monitor.capacity;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacitySchedulerConfiguration;
 import org.junit.Before;
 import org.junit.Test;
 import java.io.IOException;
 import static org.mockito.Matchers.argThat;
 import static org.mockito.Mockito.times;
 import static org.mockito.Mockito.verify;
 /**
 * Test class for IntraQueuePreemption scenarios.
 */
 public class TestProportionalCapacityPreemptionPolicyIntraQueueUserLimit
    extends
      ProportionalCapacityPreemptionPolicyMockFramework {
  @Before
  public void setup() {
    super.setup();
    conf.setBoolean(
        CapacitySchedulerConfiguration.INTRAQUEUE_PREEMPTION_ENABLED, true);
    policy = new ProportionalCapacityPreemptionPolicy(rmContext, cs, mClock);
  }
  @Test
  public void testSimpleIntraQueuePreemptionWithTwoUsers()
      throws IOException {
    /**
     * Queue structure is:
     *
     * <pre>
     *       root
     *        |
     *        a
     * </pre>
     *
     * Scenario:
     * Preconditions:
     *   Queue total resources: 100
     *   Minimum user limit percent: 50%
     *   +--------------+----------+------+---------+
     *   | APP  | USER  | PRIORITY | USED | PENDING |
     *   +--------------+----------+------+---------+
     *   | app1 | user1 | 1        | 100  | 0       |
     *   | app2 | user2 | 1        | 0    | 30      |
     *   +--------------+----------+------+---------+
     * Hence in queueA of 100, each user has a quota of 50. app1 of high priority
     * has a demand of 0 and its already using 100. app2 from user2 has a demand
     * of 30, and UL is 50. 30 would be preempted from app1.
     */
    // Set max preemption limit as 50%.
    conf.setFloat(CapacitySchedulerConfiguration.
        INTRAQUEUE_PREEMPTION_MAX_ALLOWABLE_LIMIT,
        (float) 0.5);
    String labelsConfig = "=100,true;";
    String nodesConfig = // n1 has no label
        "n1= res=100";
    String queuesConfig =
        // guaranteed,max,used,pending,reserved
        "root(=[100 100 100 30 0]);" + // root
            "-a(=[100 100 100 30 0])"; // a
    String appsConfig =
    // queueName\t(priority,resource,host,expression,#repeat,reserved,pending)
        "a\t" // app1 in a
            + "(1,1,n1,,100,false,0,user1);" + // app1 a
            "a\t" // app2 in a
            + "(1,1,n1,,0,false,30,user2)";
    buildEnv(labelsConfig, nodesConfig, queuesConfig, appsConfig);
    policy.editSchedule();
    // app2 needs more resource and its well under its user-limit. Hence preempt
    // resources from app1.
    verify(mDisp, times(30)).handle(argThat(
        new TestProportionalCapacityPreemptionPolicy.IsPreemptionRequestFor(
            getAppAttemptId(1))));
  }
  @Test
  public void testNoIntraQueuePreemptionWithSingleUser()
      throws IOException {
    /**
     * Queue structure is:
     *
     * <pre>
     *       root
     *        |
     *        a
     * </pre>
     *
     * Scenario:
     *   Queue total resources: 100
     *   Minimum user limit percent: 50%
     *   +--------------+----------+------+---------+
     *   | APP  | USER  | PRIORITY | USED | PENDING |
     *   +--------------+----------+------+---------+
     *   | app1 | user1 | 1        | 100  | 0       |
     *   | app2 | user1 | 1        | 0    | 30      |
     *   +--------------+----------+------+---------+
     * Given single user, lower priority/late submitted apps has to
     * wait.
     */
    // Set max preemption limit as 50%.
    conf.setFloat(CapacitySchedulerConfiguration.
        INTRAQUEUE_PREEMPTION_MAX_ALLOWABLE_LIMIT,
        (float) 0.5);
    String labelsConfig = "=100,true;";
    String nodesConfig = // n1 has no label
        "n1= res=100";
    String queuesConfig =
        // guaranteed,max,used,pending,reserved
        "root(=[100 100 100 30 0]);" + // root
            "-a(=[100 100 100 30 0])"; // a
    String appsConfig =
    // queueName\t(priority,resource,host,expression,#repeat,reserved,pending)
        "a\t" // app1 in a
            + "(1,1,n1,,100,false,0,user1);" + // app1 a
            "a\t" // app2 in a
            + "(1,1,n1,,0,false,30,user1)";
    buildEnv(labelsConfig, nodesConfig, queuesConfig, appsConfig);
    policy.editSchedule();
    // app2 needs more resource. Since app1,2 are from same user, there wont be
    // any preemption.
    verify(mDisp, times(0)).handle(argThat(
        new TestProportionalCapacityPreemptionPolicy.IsPreemptionRequestFor(
            getAppAttemptId(1))));
  }
  @Test
  public void testNoIntraQueuePreemptionWithTwoUserUnderUserLimit()
      throws IOException {
    /**
     * Queue structure is:
     *
     * <pre>
     *       root
     *        |
     *        a
     * </pre>
     *
     * Scenario:
     *   Queue total resources: 100
     *   Minimum user limit percent: 50%
     *   +--------------+----------+------+---------+
     *   | APP  | USER  | PRIORITY | USED | PENDING |
     *   +--------------+----------+------+---------+
     *   | app1 | user1 | 1        | 50   | 0       |
     *   | app2 | user2 | 1        | 30   | 30      |
     *   +--------------+----------+------+---------+
     * Hence in queueA of 100, each user has a quota of 50. app1 of high priority
     * has a demand of 0 and its already using 50. app2 from user2 has a demand
     * of 30, and UL is 50. Since app1 is under UL, there should not be any
     * preemption.
     */
    // Set max preemption limit as 50%.
    conf.setFloat(CapacitySchedulerConfiguration.
        INTRAQUEUE_PREEMPTION_MAX_ALLOWABLE_LIMIT,
        (float) 0.5);
    String labelsConfig = "=100,true;";
    String nodesConfig = // n1 has no label
        "n1= res=100";
    String queuesConfig =
        // guaranteed,max,used,pending,reserved
        "root(=[100 100 80 30 0]);" + // root
            "-a(=[100 100 80 30 0])"; // a
    String appsConfig =
    // queueName\t(priority,resource,host,expression,#repeat,reserved,pending)
        "a\t" // app1 in a
            + "(1,1,n1,,50,false,0,user1);" + // app1 a
            "a\t" // app2 in a
            + "(1,1,n1,,30,false,30,user2)";
    buildEnv(labelsConfig, nodesConfig, queuesConfig, appsConfig);
    policy.editSchedule();
    // app2 needs more resource. Since app1,2 are from same user, there wont be
    // any preemption.
    verify(mDisp, times(0)).handle(argThat(
        new TestProportionalCapacityPreemptionPolicy.IsPreemptionRequestFor(
            getAppAttemptId(1))));
  }
  @Test
  public void testSimpleIntraQueuePreemptionWithTwoUsersWithAppPriority()
      throws IOException {
    /**
     * Queue structure is:
     *
     * <pre>
     *       root
     *        |
     *        a
     * </pre>
     *
     * Scenario:
     *   Queue total resources: 100
     *   Minimum user limit percent: 50%
     *   +--------------+----------+------+---------+
     *   | APP  | USER  | PRIORITY | USED | PENDING |
     *   +--------------+----------+------+---------+
     *   | app1 | user1 | 2        | 100  | 0       |
     *   | app2 | user2 | 1        | 0    | 30      |
     *   +--------------+----------+------+---------+
     * Hence in queueA of 100, each user has a quota of 50. app1 of high priority
     * has a demand of 0 and its already using 100. app2 from user2 has a demand
     * of 30, and UL is 50. 30 would be preempted from app1.
     */
    // Set max preemption limit as 50%.
    conf.setFloat(CapacitySchedulerConfiguration.
        INTRAQUEUE_PREEMPTION_MAX_ALLOWABLE_LIMIT,
        (float) 0.5);
    String labelsConfig = "=100,true;";
    String nodesConfig = // n1 has no label
        "n1= res=100";
    String queuesConfig =
        // guaranteed,max,used,pending,reserved
        "root(=[100 100 100 30 0]);" + // root
            "-a(=[100 100 100 30 0])"; // a
    String appsConfig =
    // queueName\t(priority,resource,host,expression,#repeat,reserved,pending)
        "a\t" // app1 in a
            + "(2,1,n1,,100,false,0,user1);" + // app1 a
            "a\t" // app2 in a
            + "(1,1,n1,,0,false,30,user2)";
    buildEnv(labelsConfig, nodesConfig, queuesConfig, appsConfig);
    policy.editSchedule();
    // app2 needs more resource and its well under its user-limit. Hence preempt
    // resources from app1 even though its priority is more than app2.
    verify(mDisp, times(30)).handle(argThat(
        new TestProportionalCapacityPreemptionPolicy.IsPreemptionRequestFor(
            getAppAttemptId(1))));
  }
  @Test
  public void testIntraQueuePreemptionOfUserLimitWithMultipleApps()
      throws IOException {
    /**
     * Queue structure is:
     *
     * <pre>
     *       root
     *        |
     *        a
     * </pre>
     *
     * Scenario:
     *   Queue total resources: 100
     *   Minimum user limit percent: 50%
     *   +--------------+----------+------+---------+
     *   | APP  | USER  | PRIORITY | USED | PENDING |
     *   +--------------+----------+------+---------+
     *   | app1 | user1 | 1        | 30   | 30      |
     *   | app2 | user2 | 1        | 20   | 20      |
     *   | app3 | user1 | 1        | 30   | 30      |
     *   | app4 | user2 | 1        | 0    | 10      |
     *   +--------------+----------+------+---------+
     * Hence in queueA of 100, each user has a quota of 50. Now have multiple
     * apps and check for preemption across apps.
     */
    // Set max preemption limit as 50%.
    conf.setFloat(CapacitySchedulerConfiguration.
        INTRAQUEUE_PREEMPTION_MAX_ALLOWABLE_LIMIT,
        (float) 0.5);
    String labelsConfig = "=100,true;";
    String nodesConfig = // n1 has no label
        "n1= res=100";
    String queuesConfig =
        // guaranteed,max,used,pending,reserved
        "root(=[100 100 80 90 0]);" + // root
            "-a(=[100 100 80 90 0])"; // a
    String appsConfig =
    // queueName\t(priority,resource,host,expression,#repeat,reserved,pending)
        "a\t" // app1 in a
            + "(1,1,n1,,30,false,30,user1);" + // app1 a
            "a\t" // app2 in a
            + "(1,1,n1,,20,false,20,user2);" +
            "a\t" // app3 in a
            + "(1,1,n1,,30,false,30,user1);" +
            "a\t" // app4 in a
            + "(1,1,n1,,0,false,10,user2)";
    buildEnv(labelsConfig, nodesConfig, queuesConfig, appsConfig);
    policy.editSchedule();
    // app2/app4 needs more resource and its well under its user-limit. Hence
    // preempt resources from app3 (compare to app1, app3 has low priority).
    verify(mDisp, times(9)).handle(argThat(
        new TestProportionalCapacityPreemptionPolicy.IsPreemptionRequestFor(
            getAppAttemptId(3))));
  }
  @Test
  public void testNoPreemptionOfUserLimitWithMultipleAppsAndSameUser()
      throws IOException {
    /**
     * Queue structure is:
     *
     * <pre>
     *       root
     *        |
     *        a
     * </pre>
     *
     * Scenario:
     *   Queue total resources: 100
     *   Minimum user limit percent: 50%
     *   +--------------+----------+------+---------+
     *   | APP  | USER  | PRIORITY | USED | PENDING |
     *   +--------------+----------+------+---------+
     *   | app1 | user1 | 1        | 30   | 30      |
     *   | app2 | user1 | 1        | 20   | 20      |
     *   | app3 | user1 | 1        | 30   | 30      |
     *   | app4 | user1 | 1        | 0    | 10      |
     *   +--------------+----------+------+---------+
     * Hence in queueA of 100, each user has a quota of 50. Now have multiple
     * apps and check for preemption across apps.
     */
    // Set max preemption limit as 50%.
    conf.setFloat(CapacitySchedulerConfiguration.
        INTRAQUEUE_PREEMPTION_MAX_ALLOWABLE_LIMIT,
        (float) 0.5);
    String labelsConfig = "=100,true;";
    String nodesConfig = // n1 has no label
        "n1= res=100";
    String queuesConfig =
        // guaranteed,max,used,pending,reserved
        "root(=[100 100 80 90 0]);" + // root
            "-a(=[100 100 80 90 0])"; // a
    String appsConfig =
    // queueName\t(priority,resource,host,expression,#repeat,reserved,pending)
        "a\t" // app1 in a
            + "(1,1,n1,,30,false,20,user1);" + // app1 a
            "a\t" // app2 in a
            + "(1,1,n1,,20,false,20,user1);" +
            "a\t" // app3 in a
            + "(1,1,n1,,30,false,30,user1);" +
            "a\t" // app4 in a
            + "(1,1,n1,,0,false,10,user1)";
    buildEnv(labelsConfig, nodesConfig, queuesConfig, appsConfig);
    policy.editSchedule();
    // app2/app4 needs more resource and its well under its user-limit. Hence
    // preempt resources from app3 (compare to app1, app3 has low priority).
    verify(mDisp, times(0)).handle(argThat(
        new TestProportionalCapacityPreemptionPolicy.IsPreemptionRequestFor(
            getAppAttemptId(1))));
    verify(mDisp, times(0)).handle(argThat(
        new TestProportionalCapacityPreemptionPolicy.IsPreemptionRequestFor(
            getAppAttemptId(2))));
    verify(mDisp, times(0)).handle(argThat(
        new TestProportionalCapacityPreemptionPolicy.IsPreemptionRequestFor(
            getAppAttemptId(3))));
    verify(mDisp, times(0)).handle(argThat(
        new TestProportionalCapacityPreemptionPolicy.IsPreemptionRequestFor(
            getAppAttemptId(4))));
  }
  @Test
  public void testIntraQueuePreemptionOfUserLimitWitAppsOfDifferentPriority()
      throws IOException {
    /**
     * Queue structure is:
     * <pre>
     *       root
     *        |
     *        a
     * </pre>
     *
     * Scenario:
     *   Queue total resources: 100
     *   Minimum user limit percent: 50%
     *   +--------------+----------+------+---------+
     *   | APP  | USER  | PRIORITY | USED | PENDING |
     *   +--------------+----------+------+---------+
     *   | app1 | user1 | 3        | 30   | 30      |
     *   | app2 | user2 | 1        | 20   | 20      |
     *   | app3 | user1 | 4        | 30   | 0       |
     *   | app4 | user2 | 1        | 0    | 10      |
     *   +--------------+----------+------+---------+
     * Hence in queueA of 100, each user has a quota of 50. Now have multiple
     * apps and check for preemption across apps.
     */
    // Set max preemption limit as 50%.
    conf.setFloat(
        CapacitySchedulerConfiguration.INTRAQUEUE_PREEMPTION_MAX_ALLOWABLE_LIMIT,
        (float) 0.5);
    String labelsConfig = "=100,true;";
    String nodesConfig = // n1 has no label
        "n1= res=100";
    String queuesConfig =
        // guaranteed,max,used,pending,reserved
        "root(=[100 100 80 60 0]);" + // root
            "-a(=[100 100 80 60 0])"; // b
    String appsConfig =
        // queueName\t(priority,resource,host,expression,#repeat,reserved,pending)
        "a\t" // app1 in a
            + "(3,1,n1,,30,false,30,user1);" + // app1 a
            "a\t" // app2 in a
            + "(1,1,n1,,20,false,20,user2);" + "a\t" // app3 in a
            + "(4,1,n1,,30,false,0,user1);" + "a\t" // app4 in a
            + "(1,1,n1,,0,false,10,user2)";
    buildEnv(labelsConfig, nodesConfig, queuesConfig, appsConfig);
    policy.editSchedule();
    // app2/app4 needs more resource and its well under its user-limit. Hence
    // preempt resources from app1 (compare to app3, app1 has low priority).
    verify(mDisp, times(9)).handle(argThat(
        new TestProportionalCapacityPreemptionPolicy.IsPreemptionRequestFor(
            getAppAttemptId(1))));
  }
  @Test
  public void testIntraQueuePreemptionOfUserLimitInTwoQueues()
      throws IOException {
    /**
     * Queue structure is:
     *
     * <pre>
     *       root
     *      /   \
     *     a     b
     * </pre>
     *
     * Guaranteed resource of a/b are 40:60 Total cluster resource = 100
     * maxIntraQueuePreemptableLimit by default is 50%. This test is to verify
     * that intra-queue preemption could occur in two queues when user-limit
     * irreuglarity is present in queue.
     */
    // Set max preemption limit as 50%.
    conf.setFloat(CapacitySchedulerConfiguration.
            INTRAQUEUE_PREEMPTION_MAX_ALLOWABLE_LIMIT,
        (float) 0.5);
    String labelsConfig = "=100,true;";
    String nodesConfig = // n1 has no label
        "n1= res=100";
    String queuesConfig =
        // guaranteed,max,used,pending,reserved
        "root(=[100 100 90 80 0]);" + // root
            "-a(=[60 100 55 60 0]);" + // a
            "-b(=[40 100 35 20 0])"; // b
    String appsConfig =
        // queueName\t(priority,resource,host,expression,#repeat,reserved,pending)
        "a\t" // app1 in a
            + "(3,1,n1,,20,false,30,user1);" + // app1 a
            "a\t" // app2 in a
            + "(1,1,n1,,20,false,20,user2);" +
            "a\t" // app3 in a
            + "(4,1,n1,,15,false,0,user1);" +
            "a\t" // app4 in a
            + "(1,1,n1,,0,false,10,user2);" +
            "b\t" // app5 in b
            + "(3,1,n1,,25,false,10,user1);" +
            "b\t" // app6 in b
            + "(1,1,n1,,10,false,10,user2)";
    buildEnv(labelsConfig, nodesConfig, queuesConfig, appsConfig);
    policy.editSchedule();
    // app2/app4 needs more resource and its well under its user-limit. Hence
    // preempt resources from app1 (compare to app3, app1 has low priority).
    verify(mDisp, times(4)).handle(argThat(
        new TestProportionalCapacityPreemptionPolicy.IsPreemptionRequestFor(
            getAppAttemptId(1))));
    verify(mDisp, times(4)).handle(argThat(
        new TestProportionalCapacityPreemptionPolicy.IsPreemptionRequestFor(
            getAppAttemptId(5))));
  }
  @Test
  public void testIntraQueuePreemptionWithTwoRequestingUsers()
      throws IOException {
    /**
    * Queue structure is:
    *
    * <pre>
    *       root
    *        |
    *        a
    * </pre>
    *
    * Scenario:
    *   Queue total resources: 100
    *   Minimum user limit percent: 50%
    *   +--------------+----------+------+---------+
    *   | APP  | USER  | PRIORITY | USED | PENDING |
    *   +--------------+----------+------+---------+
    *   | app1 | user1 | 1        | 60   | 10      |
    *   | app2 | user2 | 1        | 40   | 10      |
    *   +--------------+----------+------+---------+
    * Hence in queueA of 100, each user has a quota of 50. Now have multiple
    * apps and check for preemption across apps.
    */
    // Set max preemption limit as 50%.
    conf.setFloat(CapacitySchedulerConfiguration.
        INTRAQUEUE_PREEMPTION_MAX_ALLOWABLE_LIMIT,
        (float) 0.5);
    String labelsConfig = "=100,true;";
    String nodesConfig = // n1 has no label
        "n1= res=100";
    String queuesConfig =
        // guaranteed,max,used,pending,reserved
        "root(=[100 100 100 20 0]);" + // root
            "-a(=[100 100 100 20 0])"; // a
    String appsConfig =
    // queueName\t(priority,resource,host,expression,#repeat,reserved,pending)
        "a\t" // app1 in a
            + "(1,1,n1,,60,false,10,user1);" + // app1 a
            "a\t" // app2 in a
            + "(1,1,n1,,40,false,10,user2)";
    buildEnv(labelsConfig, nodesConfig, queuesConfig, appsConfig);
    policy.editSchedule();
    // app2 needs more resource and its well under its user-limit. Hence preempt
    // resources from app1.
    verify(mDisp, times(9)).handle(argThat(
        new TestProportionalCapacityPreemptionPolicy.IsPreemptionRequestFor(
            getAppAttemptId(1))));
    verify(mDisp, times(0)).handle(argThat(
        new TestProportionalCapacityPreemptionPolicy.IsPreemptionRequestFor(
            getAppAttemptId(2))));
  }
  @Test
  public void testNoIntraQueuePreemptionIfBelowUserLimitAndLowPriorityExtraUsers()
      throws IOException {
     /**
     * Queue structure is:
     *
     * <pre>
     *       root
     *        |
     *        a
     * </pre>
     *
     * Scenario:
     * Preconditions:
     *   Queue total resources: 100
     *   Minimum user limit percent: 50%
     *   +--------------+----------+------+---------+
     *   | APP  | USER  | PRIORITY | USED | PENDING |
     *   +--------------+----------+------+---------+
     *   | app1 | user1 | 1        | 50   | 0       |
     *   | app2 | user2 | 1        | 50   | 0       |
     *   | app3 | user3 | 0        | 0    | 10      |
     *   +--------------+----------+------+---------+
     * This scenario should never preempt from either user1 or user2
     */
    // Set max preemption per round to 50% (this is different from minimum user
    // limit percent).
    conf.setFloat(CapacitySchedulerConfiguration.
        INTRAQUEUE_PREEMPTION_MAX_ALLOWABLE_LIMIT,
        (float) 0.7);
    String labelsConfig = "=100,true;";
    String nodesConfig = // n1 has no label
        "n1= res=100";
    String queuesConfig =
        // guaranteed,max,used,pending,reserved
        "root(=[100 100 100 10 0]);" + // root
            "-a(=[100 100 100 10 0])"; // a
    String appsConfig =
    // queueName\t\
    //     (priority,resource,host,label,#repeat,reserved,pending,user)\tMULP;
        "a\t(1,1,n1,,50,false,0,user1)\t50;" + // app1, user1
        "a\t(1,1,n1,,50,false,0,user2)\t50;" + // app2, user2
        "a\t(0,1,n1,,0,false,10,user3)\t50";   // app3, user3
    buildEnv(labelsConfig, nodesConfig, queuesConfig, appsConfig);
    policy.editSchedule();
    // app2/app4 needs more resource and its well under its user-limit. Hence
    // preempt resources from app1 (compare to app3, app1 has low priority).
    verify(mDisp, times(0)).handle(argThat(
        new TestProportionalCapacityPreemptionPolicy.IsPreemptionRequestFor(
            getAppAttemptId(1))));
    verify(mDisp, times(0)).handle(argThat(
        new TestProportionalCapacityPreemptionPolicy.IsPreemptionRequestFor(
            getAppAttemptId(2))));
  }
  @Test
  public void testNoIntraQueuePreemptionIfBelowUserLimitAndSamePriorityExtraUsers()
      throws IOException {
     /**
     * Queue structure is:
     *
     * <pre>
     *       root
     *        |
     *        a
     * </pre>
     *
     * Scenario:
     * Preconditions:
     *   Queue total resources: 100
     *   Minimum user limit percent: 50%
     *   +--------------+----------+------+---------+
     *   | APP  | USER  | PRIORITY | USED | PENDING |
     *   +--------------+----------+------+---------+
     *   | app1 | user1 | 1        | 50   | 0       |
     *   | app2 | user2 | 1        | 50   | 0       |
     *   | app3 | user3 | 1        | 0    | 10      |
     *   +--------------+----------+------+---------+
     * This scenario should never preempt from either user1 or user2
     */
    // Set max preemption per round to 50% (this is different from minimum user
    // limit percent).
    conf.setFloat(CapacitySchedulerConfiguration.
        INTRAQUEUE_PREEMPTION_MAX_ALLOWABLE_LIMIT,
        (float) 0.7);
    String labelsConfig = "=100,true;";
    String nodesConfig = // n1 has no label
        "n1= res=100";
    String queuesConfig =
        // guaranteed,max,used,pending,reserved
        "root(=[100 100 100 10 0]);" + // root
            "-a(=[100 100 100 10 0])"; // a
    String appsConfig =
    // queueName\t\
    //     (priority,resource,host,label,#repeat,reserved,pending,user)\tMULP;
        "a\t(1,1,n1,,50,false,0,user1)\t50;" + // app1, user1
        "a\t(1,1,n1,,50,false,0,user2)\t50;" + // app2, user2
        "a\t(1,1,n1,,0,false,10,user3)\t50";   // app3, user3
    buildEnv(labelsConfig, nodesConfig, queuesConfig, appsConfig);
    policy.editSchedule();
    // app2/app4 needs more resource and its well under its user-limit. Hence
    // preempt resources from app1 (compare to app3, app1 has low priority).
    verify(mDisp, times(0)).handle(argThat(
        new TestProportionalCapacityPreemptionPolicy.IsPreemptionRequestFor(
            getAppAttemptId(1))));
    verify(mDisp, times(0)).handle(argThat(
        new TestProportionalCapacityPreemptionPolicy.IsPreemptionRequestFor(
            getAppAttemptId(2))));
  }
  @Test
  public void testNoIntraQueuePreemptionIfBelowUserLimitAndHighPriorityExtraUsers()
      throws IOException {
     /**
     * Queue structure is:
     *
     * <pre>
     *       root
     *        |
     *        a
     * </pre>
     *
     * Scenario:
     * Preconditions:
     *   Queue total resources: 100
     *   Minimum user limit percent: 50%
     *   +--------------+----------+------+---------+
     *   | APP  | USER  | PRIORITY | USED | PENDING |
     *   +--------------+----------+------+---------+
     *   | app1 | user1 | 1        | 50   | 0       |
     *   | app2 | user2 | 1        | 50   | 0       |
     *   | app3 | user3 | 5        | 0    | 10      |
     *   +--------------+----------+------+---------+
     * This scenario should never preempt from either user1 or user2
     */
    // Set max preemption per round to 50% (this is different from minimum user
    // limit percent).
    conf.setFloat(CapacitySchedulerConfiguration.
        INTRAQUEUE_PREEMPTION_MAX_ALLOWABLE_LIMIT,
        (float) 0.7);
    String labelsConfig = "=100,true;";
    String nodesConfig = // n1 has no label
        "n1= res=100";
    String queuesConfig =
        // guaranteed,max,used,pending,reserved
        "root(=[100 100 100 10 0]);" + // root
            "-a(=[100 100 100 10 0])"; // a
    String appsConfig =
    // queueName\t\
    //     (priority,resource,host,label,#repeat,reserved,pending,user)\tMULP;
        "a\t(1,1,n1,,50,false,0,user1)\t50;" + // app1, user1
        "a\t(1,1,n1,,50,false,0,user2)\t50;" + // app2, user2
        "a\t(5,1,n1,,0,false,10,user3)\t50";   // app3, user3
    buildEnv(labelsConfig, nodesConfig, queuesConfig, appsConfig);
    policy.editSchedule();
    // app2/app4 needs more resource and its well under its user-limit. Hence
    // preempt resources from app1 (compare to app3, app1 has low priority).
    verify(mDisp, times(0)).handle(argThat(
        new TestProportionalCapacityPreemptionPolicy.IsPreemptionRequestFor(
            getAppAttemptId(1))));
    verify(mDisp, times(0)).handle(argThat(
        new TestProportionalCapacityPreemptionPolicy.IsPreemptionRequestFor(
            getAppAttemptId(2))));
  }
  @Test
  public void testNoIntraQueuePreemptionWithUserLimitDeadzone()
      throws IOException {
    /**
    * Queue structure is:
    *
    * <pre>
    *       root
    *        |
    *        a
    * </pre>
    *
    * Scenario:
    *   Queue total resources: 100
    *   Minimum user limit percent: 50%
    *   +--------------+----------+------+---------+
    *   | APP  | USER  | PRIORITY | USED | PENDING |
    *   +--------------+----------+------+---------+
    *   | app1 | user1 | 1        | 60   | 10      |
    *   | app2 | user2 | 1        | 40   | 10      |
    *   +--------------+----------+------+---------+
    * Hence in queueA of 100, each user has a quota of 50. Now have multiple
    * apps and check for preemption across apps but also ensure that user's
    * usage not coming under its user-limit.
    */
    // Set max preemption limit as 50%.
    conf.setFloat(CapacitySchedulerConfiguration.
        INTRAQUEUE_PREEMPTION_MAX_ALLOWABLE_LIMIT,
        (float) 0.5);
    String labelsConfig = "=100,true;";
    String nodesConfig = // n1 has no label
        "n1= res=100";
    String queuesConfig =
        // guaranteed,max,used,pending,reserved
        "root(=[100 100 100 20 0]);" + // root
            "-a(=[100 100 100 20 0])"; // a
    String appsConfig =
    // queueName\t(priority,resource,host,expression,#repeat,reserved,pending)
        "a\t" // app1 in a
            + "(1,3,n1,,20,false,10,user1);" + // app1 a
            "a\t" // app2 in a
            + "(1,1,n1,,40,false,10,user2)";
    buildEnv(labelsConfig, nodesConfig, queuesConfig, appsConfig);
    policy.editSchedule();
    // app2 needs more resource and its well under its user-limit. Hence preempt
    // 3 resources (9GB) from app1. We will not preempt last container as it may
    // pull user's usage under its user-limit.
    verify(mDisp, times(3)).handle(argThat(
        new TestProportionalCapacityPreemptionPolicy.IsPreemptionRequestFor(
            getAppAttemptId(1))));
    verify(mDisp, times(0)).handle(argThat(
        new TestProportionalCapacityPreemptionPolicy.IsPreemptionRequestFor(
            getAppAttemptId(2))));
  }
  @Test
  public void testIntraQueuePreemptionWithUserLimitDeadzoneAndPriority()
      throws IOException {
    /**
    * Queue structure is:
    *
    * <pre>
    *       root
    *        |
    *        a
    * </pre>
    *
    * Scenario:
    *   Queue total resources: 100
    *   Minimum user limit percent: 50%
    *   +--------------+----------+------+---------+
    *   | APP  | USER  | PRIORITY | USED | PENDING |
    *   +--------------+----------+------+---------+
    *   | app1 | user1 | 1        | 60   | 10      |
    *   | app2 | user2 | 1        | 40   | 10      |
    *   +--------------+----------+------+---------+
    * Hence in queueA of 100, each user has a quota of 50. Now have multiple
    * apps and check for preemption across apps but also ensure that user's
    * usage not coming under its user-limit.
    */
    // Set max preemption limit as 50%.
    conf.setFloat(CapacitySchedulerConfiguration.
        INTRAQUEUE_PREEMPTION_MAX_ALLOWABLE_LIMIT,
        (float) 0.5);
    String labelsConfig = "=100,true;";
    String nodesConfig = // n1 has no label
        "n1= res=100";
    String queuesConfig =
        // guaranteed,max,used,pending,reserved
        "root(=[100 100 100 20 0]);" + // root
            "-a(=[100 100 100 20 0])"; // a
    String appsConfig =
    // queueName\t(priority,resource,host,expression,#repeat,reserved,pending)
        "a\t" // app1 in a
            + "(1,3,n1,,20,false,10,user1);" + // app1 a
            "a\t" // app2 in a
            + "(2,1,n1,,0,false,10,user1);" + // app1 a
            "a\t" // app2 in a
            + "(1,1,n1,,40,false,20,user2)";
    buildEnv(labelsConfig, nodesConfig, queuesConfig, appsConfig);
    policy.editSchedule();
    // app2 needs more resource and its well under its user-limit. Hence preempt
    // 3 resources (9GB) from app1. We will not preempt last container as it may
    // pull user's usage under its user-limit.
    verify(mDisp, times(3)).handle(argThat(
        new TestProportionalCapacityPreemptionPolicy.IsPreemptionRequestFor(
            getAppAttemptId(1))));
    verify(mDisp, times(0)).handle(argThat(
        new TestProportionalCapacityPreemptionPolicy.IsPreemptionRequestFor(
            getAppAttemptId(2))));
    // After first round, 3 containers were preempted from app1 and resource
    // distribution will be like below.
    appsConfig =
        // queueName\t(priority,resource,host,expression,#repeat,reserved,pending)
        "a\t" // app1 in a
            + "(1,3,n1,,17,false,10,user1);" + // app1 a
            "a\t" // app2 in a
            + "(2,1,n1,,0,false,10,user1);" + // app2 a
            "a\t" // app2 in a
            + "(1,1,n1,,49,false,11,user2)";
    buildEnv(labelsConfig, nodesConfig, queuesConfig, appsConfig);
    policy.editSchedule();
    // app2 has priority demand within same user 'user1'. However user1's used
    // is alredy under UL. Hence no preemption. We will still get 3 container
    // while asserting as it was aleady selected in earlier round.
    verify(mDisp, times(3)).handle(argThat(
        new TestProportionalCapacityPreemptionPolicy.IsPreemptionRequestFor(
            getAppAttemptId(1))));
  }
 }
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/TestProportionalCapacityPreemptionPolicyIntraQueueWithDRF.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/TestProportionalCapacityPreemptionPolicyIntraQueueWithDRF.java
@ -0,0 +1,178 @@
 /**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 package org.apache.hadoop.yarn.server.resourcemanager.monitor.capacity;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacitySchedulerConfiguration;
 import org.apache.hadoop.yarn.util.resource.DominantResourceCalculator;
 import org.junit.Before;
 import org.junit.Test;
 import java.io.IOException;
 import static org.mockito.Matchers.argThat;
 import static org.mockito.Mockito.times;
 import static org.mockito.Mockito.verify;
 import static org.mockito.Mockito.when;
 /**
 * Test class for IntraQueuePreemption scenarios.
 */
 public class TestProportionalCapacityPreemptionPolicyIntraQueueWithDRF
    extends
      ProportionalCapacityPreemptionPolicyMockFramework {
  @Before
  public void setup() {
    super.setup();
    conf.setBoolean(
        CapacitySchedulerConfiguration.INTRAQUEUE_PREEMPTION_ENABLED, true);
    rc = new DominantResourceCalculator();
    when(cs.getResourceCalculator()).thenReturn(rc);
    policy = new ProportionalCapacityPreemptionPolicy(rmContext, cs, mClock);
  }
  @Test
  public void testSimpleIntraQueuePreemptionWithVCoreResource()
      throws IOException {
    /**
     * The simplest test preemption, Queue structure is:
     *
     * <pre>
     *       root
     *     /  | | \
     *    a  b  c  d
     * </pre>
     *
     * Guaranteed resource of a/b/c/d are 10:40:20:30 Total cluster resource =
     * 100 Scenario: Queue B has few running apps and two high priority apps
     * have demand. Apps which are running at low priority (4) will preempt few
     * of its resources to meet the demand.
     */
    conf.set(CapacitySchedulerConfiguration.INTRAQUEUE_PREEMPTION_ORDER_POLICY,
        "priority_first");
    String labelsConfig = "=100:200,true;";
    String nodesConfig = // n1 has no label
        "n1= res=100:200";
    String queuesConfig =
        // guaranteed,max,used,pending,reserved
        "root(=[100:50 100:50 80:40 120:60 0]);" + // root
            "-a(=[10:5 100:50 10:5 50:25 0]);" + // a
            "-b(=[40:20 100:50 40:20 60:30 0]);" + // b
            "-c(=[20:10 100:50 10:5 10:5 0]);" + // c
            "-d(=[30:15 100:50 20:10 0 0])"; // d
    String appsConfig =
        // queueName\t(priority,resource,host,expression,#repeat,reserved,
        // pending)
        "a\t" // app1 in a
            + "(1,1:1,n1,,5,false,25:25);" + // app1 a
            "a\t" // app2 in a
            + "(1,1:1,n1,,5,false,25:25);" + // app2 a
            "b\t" // app3 in b
            + "(4,1:1,n1,,36,false,20:20);" + // app3 b
            "b\t" // app4 in b
            + "(4,1:1,n1,,2,false,10:10);" + // app4 b
            "b\t" // app4 in b
            + "(5,1:1,n1,,1,false,10:10);" + // app5 b
            "b\t" // app4 in b
            + "(6,1:1,n1,,1,false,10:10);" + // app6 in b
            "c\t" // app1 in a
            + "(1,1:1,n1,,10,false,10:10);" + "d\t" // app7 in c
            + "(1,1:1,n1,,20,false,0)";
    buildEnv(labelsConfig, nodesConfig, queuesConfig, appsConfig);
    policy.editSchedule();
    // For queue B, app3 and app4 were of lower priority. Hence take 8
    // containers from them by hitting the intraQueuePreemptionDemand of 20%.
    verify(mDisp, times(1)).handle(argThat(
        new TestProportionalCapacityPreemptionPolicy.IsPreemptionRequestFor(
            getAppAttemptId(4))));
    verify(mDisp, times(7)).handle(argThat(
        new TestProportionalCapacityPreemptionPolicy.IsPreemptionRequestFor(
            getAppAttemptId(3))));
  }
  @Test
  public void testIntraQueuePreemptionWithDominantVCoreResource()
      throws IOException {
    /**
     * The simplest test preemption, Queue structure is:
     *
     * <pre>
     *     root
     *     /  \
     *    a    b
     * </pre>
     *
     * Guaranteed resource of a/b are 40:60 Total cluster resource = 100
     * Scenario: Queue B has few running apps and two high priority apps have
     * demand. Apps which are running at low priority (4) will preempt few of
     * its resources to meet the demand.
     */
    conf.set(CapacitySchedulerConfiguration.INTRAQUEUE_PREEMPTION_ORDER_POLICY,
        "priority_first");
    // Set max preemption limit as 50%.
    conf.setFloat(CapacitySchedulerConfiguration.
        INTRAQUEUE_PREEMPTION_MAX_ALLOWABLE_LIMIT,
        (float) 0.5);
    String labelsConfig = "=100:200,true;";
    String nodesConfig = // n1 has no label
        "n1= res=100:200";
    String queuesConfig =
        // guaranteed,max,used,pending,reserved
        "root(=[100:50 100:50 50:40 110:60 0]);" + // root
            "-a(=[40:20 100:50 9:9 50:30 0]);" + // a
            "-b(=[60:30 100:50 40:30 60:30 0]);"; // b
    String appsConfig =
        // queueName\t(priority,resource,host,expression,#repeat,reserved,
        // pending)
        "a\t" // app1 in a
            + "(1,2:1,n1,,4,false,25:25);" + // app1 a
            "a\t" // app2 in a
            + "(1,1:3,n1,,2,false,25:25);" + // app2 a
            "b\t" // app3 in b
            + "(4,2:1,n1,,10,false,20:20);" + // app3 b
            "b\t" // app4 in b
            + "(4,1:2,n1,,5,false,10:10);" + // app4 b
            "b\t" // app5 in b
            + "(5,1:1,n1,,5,false,30:20);" + // app5 b
            "b\t" // app6 in b
            + "(6,2:1,n1,,5,false,30:20);";
    buildEnv(labelsConfig, nodesConfig, queuesConfig, appsConfig);
    policy.editSchedule();
    // For queue B, app3 and app4 were of lower priority. Hence take 4
    // containers.
    verify(mDisp, times(9)).handle(argThat(
        new TestProportionalCapacityPreemptionPolicy.IsPreemptionRequestFor(
            getAppAttemptId(3))));
    verify(mDisp, times(4)).handle(argThat(
        new TestProportionalCapacityPreemptionPolicy.IsPreemptionRequestFor(
            getAppAttemptId(4))));
    verify(mDisp, times(4)).handle(argThat(
        new TestProportionalCapacityPreemptionPolicy.IsPreemptionRequestFor(
            getAppAttemptId(5))));
  }
 }