diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java index a5efd9f4c8..8de363140f 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java @@ -1301,6 +1301,10 @@ private CSAssignment allocateContainerOnSingleNode( if (reservedContainer != null) { FiCaSchedulerApp reservedApplication = getCurrentAttemptForContainer( reservedContainer.getContainerId()); + if (reservedApplication == null) { + LOG.error("Trying to schedule for a finished app, please double check."); + return null; + } // Try to fulfill the reservation LOG.info( diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java index 86fcbc9afa..ac1a26ccef 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java @@ -1201,7 +1201,14 @@ public boolean accept(Resource cluster, allocation.getSchedulingMode(), null); // Deduct resources that we can release - Resource usedResource = Resources.clone(getUser(username).getUsed(p)); + User user = getUser(username); + if (user == null) { + if (LOG.isDebugEnabled()) { + LOG.debug("User " + username + " has been removed!"); + } + return false; + } + Resource usedResource = Resources.clone(user.getUsed(p)); Resources.subtractFrom(usedResource, request.getTotalReleasedResource()); @@ -1406,6 +1413,12 @@ Resource computeUserLimitAndSetHeadroom(FiCaSchedulerApp application, SchedulingMode schedulingMode, Resource userLimit) { String user = application.getUser(); User queueUser = getUser(user); + if (queueUser == null) { + if (LOG.isDebugEnabled()) { + LOG.debug("User " + user + " has been removed!"); + } + return Resources.none(); + } // Compute user limit respect requested labels, // TODO, need consider headroom respect labels also @@ -1500,6 +1513,12 @@ protected boolean canAssignToUser(Resource clusterResource, try { readLock.lock(); User user = getUser(userName); + if (user == null) { + if (LOG.isDebugEnabled()) { + LOG.debug("User " + userName + " has been removed!"); + } + return false; + } currentResourceLimits.setAmountNeededUnreserve(Resources.none()); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/common/fica/FiCaSchedulerApp.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/common/fica/FiCaSchedulerApp.java index a4e0096c52..776a7e98af 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/common/fica/FiCaSchedulerApp.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/common/fica/FiCaSchedulerApp.java @@ -322,6 +322,11 @@ private boolean commonCheckContainerAllocation( RMContainer reservedContainerOnNode = schedulerContainer.getSchedulerNode().getReservedContainer(); if (reservedContainerOnNode != null) { + // adding NP check as this proposal could not be allocated from reserved + // container in async-scheduling mode + if (allocation.getAllocateFromReservedContainer() == null) { + return false; + } RMContainer fromReservedContainer = allocation.getAllocateFromReservedContainer().getRmContainer();