YARN-8193. YARN RM hangs abruptly (stops allocating resources) when running successive applications. (Zian Chen via wangda)

Change-Id: Ia83dd2499ee9000b9e09ae5a932f21a13c0ddee6
This commit is contained in:
Wangda Tan 2018-04-25 22:10:18 -07:00 committed by Owen O'Malley
parent 13d389bf51
commit 2a0fa50f9d

View File

@ -179,11 +179,22 @@ private ContainerAllocation preCheckForNodeCandidateSet(
// This is to make sure non-partitioned-resource-request will prefer // This is to make sure non-partitioned-resource-request will prefer
// to be allocated to non-partitioned nodes // to be allocated to non-partitioned nodes
int missedNonPartitionedRequestSchedulingOpportunity = 0; int missedNonPartitionedRequestSchedulingOpportunity = 0;
AppPlacementAllocator appPlacementAllocator =
appInfo.getAppPlacementAllocator(schedulerKey);
if (null == appPlacementAllocator){
// This is possible when #pending resource decreased by a different
// thread.
ActivitiesLogger.APP.recordSkippedAppActivityWithoutAllocation(
activitiesManager, node, application, priority,
ActivityDiagnosticConstant.PRIORITY_SKIPPED_BECAUSE_NULL_ANY_REQUEST);
return ContainerAllocation.PRIORITY_SKIPPED;
}
String requestPartition =
appPlacementAllocator.getPrimaryRequestedNodePartition();
// Only do this when request associated with given scheduler key accepts // Only do this when request associated with given scheduler key accepts
// NO_LABEL under RESPECT_EXCLUSIVITY mode // NO_LABEL under RESPECT_EXCLUSIVITY mode
if (StringUtils.equals(RMNodeLabelsManager.NO_LABEL, if (StringUtils.equals(RMNodeLabelsManager.NO_LABEL, requestPartition)) {
appInfo.getAppPlacementAllocator(schedulerKey)
.getPrimaryRequestedNodePartition())) {
missedNonPartitionedRequestSchedulingOpportunity = missedNonPartitionedRequestSchedulingOpportunity =
application.addMissedNonPartitionedRequestSchedulingOpportunity( application.addMissedNonPartitionedRequestSchedulingOpportunity(
schedulerKey); schedulerKey);
@ -261,12 +272,9 @@ ContainerAllocation tryAllocateOnNode(Resource clusterResource,
return result; return result;
} }
public float getLocalityWaitFactor( public float getLocalityWaitFactor(int uniqAsks, int clusterNodes) {
SchedulerRequestKey schedulerKey, int clusterNodes) {
// Estimate: Required unique resources (i.e. hosts + racks) // Estimate: Required unique resources (i.e. hosts + racks)
int requiredResources = Math.max( int requiredResources = Math.max(uniqAsks - 1, 0);
application.getAppPlacementAllocator(schedulerKey)
.getUniqueLocationAsks() - 1, 0);
// waitFactor can't be more than '1' // waitFactor can't be more than '1'
// i.e. no point skipping more than clustersize opportunities // i.e. no point skipping more than clustersize opportunities
@ -296,10 +304,16 @@ private boolean canAssign(SchedulerRequestKey schedulerKey,
if (rmContext.getScheduler().getNumClusterNodes() == 0) { if (rmContext.getScheduler().getNumClusterNodes() == 0) {
return false; return false;
} }
int uniqLocationAsks = 0;
AppPlacementAllocator appPlacementAllocator =
application.getAppPlacementAllocator(schedulerKey);
if (appPlacementAllocator != null) {
uniqLocationAsks = appPlacementAllocator.getUniqueLocationAsks();
}
// If we have only ANY requests for this schedulerKey, we should not // If we have only ANY requests for this schedulerKey, we should not
// delay its scheduling. // delay its scheduling.
if (application.getAppPlacementAllocator(schedulerKey) if (uniqLocationAsks == 1) {
.getUniqueLocationAsks() == 1) {
return true; return true;
} }
@ -313,7 +327,7 @@ private boolean canAssign(SchedulerRequestKey schedulerKey,
} else { } else {
long requiredContainers = long requiredContainers =
application.getOutstandingAsksCount(schedulerKey); application.getOutstandingAsksCount(schedulerKey);
float localityWaitFactor = getLocalityWaitFactor(schedulerKey, float localityWaitFactor = getLocalityWaitFactor(uniqLocationAsks,
rmContext.getScheduler().getNumClusterNodes()); rmContext.getScheduler().getNumClusterNodes());
// Cap the delay by the number of nodes in the cluster. // Cap the delay by the number of nodes in the cluster.
return (Math.min(rmContext.getScheduler().getNumClusterNodes(), return (Math.min(rmContext.getScheduler().getNumClusterNodes(),
@ -806,6 +820,12 @@ private ContainerAllocation allocate(Resource clusterResource,
application.getAppSchedulingInfo().getAppPlacementAllocator( application.getAppSchedulingInfo().getAppPlacementAllocator(
schedulerKey); schedulerKey);
// This could be null when #pending request decreased by another thread.
if (schedulingPS == null) {
return new ContainerAllocation(reservedContainer, null,
AllocationState.QUEUE_SKIPPED);
}
result = ContainerAllocation.PRIORITY_SKIPPED; result = ContainerAllocation.PRIORITY_SKIPPED;
Iterator<FiCaSchedulerNode> iter = schedulingPS.getPreferredNodeIterator( Iterator<FiCaSchedulerNode> iter = schedulingPS.getPreferredNodeIterator(