From 8447557333a901f0a42ee474daec76103d489b7e Mon Sep 17 00:00:00 2001 From: Thomas White Date: Tue, 15 Jan 2013 16:12:33 +0000 Subject: [PATCH] YARN-335. Fair scheduler doesn't check whether rack needs containers before assigning to node. Contributed by Sandy Ryza. git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1433484 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-yarn-project/CHANGES.txt | 3 ++ .../scheduler/fair/AppSchedulable.java | 19 ++++++--- .../scheduler/fair/TestFairScheduler.java | 42 +++++++++++++++++++ 3 files changed, 58 insertions(+), 6 deletions(-) diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index a37b8f3edc..614e960bf2 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -189,6 +189,9 @@ Release 2.0.3-alpha - Unreleased YARN-330. Fix flakey test: TestNodeManagerShutdown#testKillContainersOnShutdown. (Sandy Ryza via hitesh) + + YARN-335. Fair scheduler doesn't check whether rack needs containers + before assigning to node. (Sandy Ryza via tomwhite) Release 2.0.2-alpha - 2012-09-07 diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/AppSchedulable.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/AppSchedulable.java index 376603e36f..060e90ad29 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/AppSchedulable.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/AppSchedulable.java @@ -307,20 +307,27 @@ public Resource assignContainer(FSSchedulerNode node, boolean reserved) { // (not scheduled) in order to promote better locality. synchronized (app) { for (Priority priority : prioritiesToTry) { + if (app.getTotalRequiredResources(priority) <= 0) { + continue; + } + app.addSchedulingOpportunity(priority); + + ResourceRequest rackLocalRequest = app.getResourceRequest(priority, + node.getRackName()); + ResourceRequest localRequest = app.getResourceRequest(priority, + node.getHostName()); + NodeType allowedLocality = app.getAllowedLocalityLevel(priority, scheduler.getNumClusterNodes(), scheduler.getNodeLocalityThreshold(), scheduler.getRackLocalityThreshold()); - - ResourceRequest localRequest = app.getResourceRequest(priority, - node.getHostName()); - if (localRequest != null && localRequest.getNumContainers() != 0) { + + if (rackLocalRequest != null && rackLocalRequest.getNumContainers() != 0 + && localRequest != null && localRequest.getNumContainers() != 0) { return assignContainer(node, app, priority, localRequest, NodeType.NODE_LOCAL, reserved); } - ResourceRequest rackLocalRequest = app.getResourceRequest(priority, - node.getRackName()); if (rackLocalRequest != null && rackLocalRequest.getNumContainers() != 0 && (allowedLocality.equals(NodeType.RACK_LOCAL) || allowedLocality.equals(NodeType.OFF_SWITCH))) { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java index da53e39220..8ff7e110c5 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java @@ -1275,4 +1275,46 @@ public void testAclSubmitApplication() throws Exception { FSSchedulerApp app2 = scheduler.applications.get(attId2); assertNull("The application was allowed", app2); } + + @Test + public void testMultipleNodesSingleRackRequest() throws Exception { + RMNode node1 = MockNodes.newNodeInfo(1, Resources.createResource(1024)); + RMNode node2 = MockNodes.newNodeInfo(1, Resources.createResource(1024)); + RMNode node3 = MockNodes.newNodeInfo(2, Resources.createResource(1024)); + NodeAddedSchedulerEvent nodeEvent1 = new NodeAddedSchedulerEvent(node1); + scheduler.handle(nodeEvent1); + NodeAddedSchedulerEvent nodeEvent2 = new NodeAddedSchedulerEvent(node2); + scheduler.handle(nodeEvent2); + + ApplicationAttemptId appId = createAppAttemptId(this.APP_ID++, this.ATTEMPT_ID++); + scheduler.addApplication(appId, "queue1", "user1"); + + // 1 request with 2 nodes on the same rack. another request with 1 node on + // a different rack + List asks = new ArrayList(); + asks.add(createResourceRequest(1024, node1.getHostName(), 1, 1)); + asks.add(createResourceRequest(1024, node2.getHostName(), 1, 1)); + asks.add(createResourceRequest(1024, node3.getHostName(), 1, 1)); + asks.add(createResourceRequest(1024, node1.getRackName(), 1, 1)); + asks.add(createResourceRequest(1024, node3.getRackName(), 1, 1)); + asks.add(createResourceRequest(1024, RMNode.ANY, 1, 2)); + + scheduler.allocate(appId, asks, new ArrayList()); + + // node 1 checks in + scheduler.update(); + NodeUpdateSchedulerEvent updateEvent1 = new NodeUpdateSchedulerEvent(node1, + new ArrayList(), new ArrayList()); + scheduler.handle(updateEvent1); + // should assign node local + assertEquals(1, scheduler.applications.get(appId).getLiveContainers().size()); + + // node 2 checks in + scheduler.update(); + NodeUpdateSchedulerEvent updateEvent2 = new NodeUpdateSchedulerEvent(node2, + new ArrayList(), new ArrayList()); + scheduler.handle(updateEvent2); + // should assign rack local + assertEquals(2, scheduler.applications.get(appId).getLiveContainers().size()); + } }