From 84dfae2f8a1c787380c65bf8de59bfd2d65901e3 Mon Sep 17 00:00:00 2001 From: Sanford Ryza Date: Wed, 14 May 2014 06:41:20 +0000 Subject: [PATCH] YARN-1986. In Fifo Scheduler, node heartbeat in between creating app and attempt causes NPE (Hong Zhiguo via Sandy Ryza) git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1594476 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-yarn-project/CHANGES.txt | 3 ++ .../scheduler/fifo/FifoScheduler.java | 12 ++++++-- .../resourcemanager/TestFifoScheduler.java | 29 ++++++++++++++++++- 3 files changed, 41 insertions(+), 3 deletions(-) diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index 6b9d488daf..d82cd482a1 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -222,6 +222,9 @@ Release 2.4.1 - UNRELEASED YARN-1957. Consider the max capacity of the queue when computing the ideal capacity for preemption. (Carlo Curino via cdouglas) + YARN-1986. In Fifo Scheduler, node heartbeat in between creating app and + attempt causes NPE (Hong Zhiguo via Sandy Ryza) + Release 2.4.0 - 2014-04-07 INCOMPATIBLE CHANGES diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/FifoScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/FifoScheduler.java index 82000e1e67..21fcdecf4f 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/FifoScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/FifoScheduler.java @@ -360,7 +360,8 @@ private FiCaSchedulerNode getNode(NodeId nodeId) { return nodes.get(nodeId); } - private synchronized void addApplication(ApplicationId applicationId, + @VisibleForTesting + public synchronized void addApplication(ApplicationId applicationId, String queue, String user) { SchedulerApplication application = new SchedulerApplication(DEFAULT_QUEUE, user); @@ -372,7 +373,8 @@ private synchronized void addApplication(ApplicationId applicationId, .handle(new RMAppEvent(applicationId, RMAppEventType.APP_ACCEPTED)); } - private synchronized void + @VisibleForTesting + public synchronized void addApplicationAttempt(ApplicationAttemptId appAttemptId, boolean transferStateFromPreviousAttempt) { SchedulerApplication application = @@ -458,6 +460,9 @@ private void assignContainers(FiCaSchedulerNode node) { .entrySet()) { FiCaSchedulerApp application = (FiCaSchedulerApp) e.getValue().getCurrentAppAttempt(); + if (application == null) { + continue; + } LOG.debug("pre-assignContainers"); application.showRequests(); synchronized (application) { @@ -497,6 +502,9 @@ private void assignContainers(FiCaSchedulerNode node) { for (SchedulerApplication application : applications.values()) { FiCaSchedulerApp attempt = (FiCaSchedulerApp) application.getCurrentAppAttempt(); + if (attempt == null) { + continue; + } attempt.setHeadroom(Resources.subtract(clusterResource, usedResource)); } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestFifoScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestFifoScheduler.java index 4ce6fba6e2..fcd5041e42 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestFifoScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestFifoScheduler.java @@ -52,6 +52,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.SchedulerEvent; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fifo.FifoScheduler; import org.apache.hadoop.yarn.server.utils.BuilderUtils; +import org.apache.hadoop.yarn.util.resource.Resources; import org.apache.log4j.Level; import org.apache.log4j.LogManager; import org.apache.log4j.Logger; @@ -66,7 +67,7 @@ public class TestFifoScheduler { private final int GB = 1024; private static YarnConfiguration conf; - + @BeforeClass public static void setup() { conf = new YarnConfiguration(); @@ -213,6 +214,32 @@ public void test() throws Exception { rm.stop(); } + @Test + public void testNodeUpdateBeforeAppAttemptInit() throws Exception { + FifoScheduler scheduler = new FifoScheduler(); + MockRM rm = new MockRM(conf); + scheduler.reinitialize(conf, rm.getRMContext()); + + RMNode node = MockNodes.newNodeInfo(1, + Resources.createResource(1024, 4), 1, "127.0.0.1"); + scheduler.handle(new NodeAddedSchedulerEvent(node)); + + ApplicationId appId = ApplicationId.newInstance(0, 1); + scheduler.addApplication(appId, "queue1", "user1"); + + NodeUpdateSchedulerEvent updateEvent = new NodeUpdateSchedulerEvent(node); + try { + scheduler.handle(updateEvent); + } catch (NullPointerException e) { + Assert.fail(); + } + + ApplicationAttemptId attId = ApplicationAttemptId.newInstance(appId, 1); + scheduler.addApplicationAttempt(attId, false); + + rm.stop(); + } + private void testMinimumAllocation(YarnConfiguration conf, int testAlloc) throws Exception { MockRM rm = new MockRM(conf);