From 49969b16cdba0f251b9f8bf3d8df9906e38b5c61 Mon Sep 17 00:00:00 2001 From: Wangda Tan Date: Tue, 26 Jul 2016 18:14:09 -0700 Subject: [PATCH] YARN-5342. Improve non-exclusive node partition resource allocation in Capacity Scheduler. (Sunil G via wangda) --- .../allocator/RegularContainerAllocator.java | 15 +++++++++++---- .../capacity/TestApplicationPriority.java | 2 ++ .../TestNodeLabelContainerAllocation.java | 13 ++++++------- 3 files changed, 19 insertions(+), 11 deletions(-) diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/allocator/RegularContainerAllocator.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/allocator/RegularContainerAllocator.java index 4bae5bedd6..29b37d8e36 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/allocator/RegularContainerAllocator.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/allocator/RegularContainerAllocator.java @@ -18,6 +18,7 @@ package org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.allocator; +import org.apache.commons.lang.StringUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.yarn.api.records.Container; @@ -695,15 +696,21 @@ ContainerAllocation doAllocation(ContainerAllocation allocationResult, } // Non-exclusive scheduling opportunity is different: we need reset - // it every time to make sure non-labeled resource request will be + // it when: + // - It allocated on the default partition + // + // This is to make sure non-labeled resource request will be // most likely allocated on non-labeled nodes first. - application.resetMissedNonPartitionedRequestSchedulingOpportunity( - schedulerKey); + if (StringUtils.equals(node.getPartition(), + RMNodeLabelsManager.NO_LABEL)) { + application + .resetMissedNonPartitionedRequestSchedulingOpportunity(schedulerKey); + } } return allocationResult; } - + private ContainerAllocation allocate(Resource clusterResource, FiCaSchedulerNode node, SchedulingMode schedulingMode, ResourceLimits resourceLimits, SchedulerRequestKey schedulerKey, diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestApplicationPriority.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestApplicationPriority.java index 9bb8827e70..d862c75ea4 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestApplicationPriority.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestApplicationPriority.java @@ -47,6 +47,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppImpl; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptImpl; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptMetrics; +import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptState; import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNodeReport; @@ -280,6 +281,7 @@ public void testPriorityWithPendingApplications() throws Exception { // If app3 (highest priority among rest) gets active, it indicates that // priority is working with pendingApplications. rm.killApp(app1.getApplicationId()); + rm.waitForState(am1.getApplicationAttemptId(), RMAppAttemptState.KILLED); // kick the scheduler, app3 (high among pending) gets free space MockAM am3 = MockRM.launchAM(app3, rm, nm1); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestNodeLabelContainerAllocation.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestNodeLabelContainerAllocation.java index df2c9ffeff..9070577850 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestNodeLabelContainerAllocation.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestNodeLabelContainerAllocation.java @@ -768,8 +768,6 @@ public RMNodeLabelsManager createNodeLabelManager() { rm1.start(); MockNM nm1 = rm1.registerNode("h1:1234", 8 * GB); // label = y MockNM nm2 = rm1.registerNode("h2:1234", 100 * GB); // label = - - ContainerId nextContainerId; // launch an app to queue b1 (label = y), AM container should be launched in nm3 RMApp app1 = rm1.submitApp(1 * GB, "app", "user", null, "b1"); @@ -777,12 +775,13 @@ public RMNodeLabelsManager createNodeLabelManager() { // request containers from am2, priority=1 asks for "" and priority=2 asks // for "y", "y" container should be allocated first - nextContainerId = - ContainerId.newContainerId(am1.getApplicationAttemptId(), 2); am1.allocate("*", 1 * GB, 1, 1, new ArrayList(), ""); am1.allocate("*", 1 * GB, 1, 2, new ArrayList(), "y"); - Assert.assertTrue(rm1.waitForState(nm1, nextContainerId, - RMContainerState.ALLOCATED)); + + // Do a node heartbeat once + CapacityScheduler cs = (CapacityScheduler) rm1.getResourceScheduler(); + cs.handle(new NodeUpdateSchedulerEvent( + rm1.getRMContext().getRMNodes().get(nm1.getNodeId()))); // Check pending resource for am2, priority=1 doesn't get allocated before // priority=2 allocated @@ -1674,7 +1673,7 @@ public RMNodeLabelsManager createNodeLabelManager() { // Test case 7 // After c allocated, d will go first because it has less used_capacity(x) // than c - doNMHeartbeat(rm, nm1.getNodeId(), 2); + doNMHeartbeat(rm, nm1.getNodeId(), 1); checkNumOfContainersInAnAppOnGivenNode(2, nm1.getNodeId(), cs.getApplicationAttempt(am1.getApplicationAttemptId())); checkNumOfContainersInAnAppOnGivenNode(3, nm1.getNodeId(),