From 855d52927b6115e2cfbd97a94d6c1a3ddf0e94bb Mon Sep 17 00:00:00 2001 From: Junping Du Date: Sun, 15 Nov 2015 04:43:57 -0800 Subject: [PATCH] YARN-4354. Public resource localization fails with NPE. Contributed by Jason Lowe. --- hadoop-yarn-project/CHANGES.txt | 3 + .../localizer/LocalResourcesTrackerImpl.java | 10 +++- .../TestLocalResourcesTrackerImpl.java | 56 ++++++++++++++++++- .../TestResourceLocalizationService.java | 4 +- 4 files changed, 67 insertions(+), 6 deletions(-) diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index 2f4311e4c7..747c729012 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -1181,6 +1181,9 @@ Release 2.7.2 - UNRELEASED YARN-4320. TestJobHistoryEventHandler fails as AHS in MiniYarnCluster no longer binds to default port 8188. (Varun Saxena via ozawa) + YARN-4354. Public resource localization fails with NPE. (Jason Lowe via + junping_du) + Release 2.7.1 - 2015-07-06 INCOMPATIBLE CHANGES diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/LocalResourcesTrackerImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/LocalResourcesTrackerImpl.java index fb8f767938..38fffe6c6c 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/LocalResourcesTrackerImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/LocalResourcesTrackerImpl.java @@ -185,14 +185,22 @@ public synchronized void handle(ResourceEvent event) { break; } + if (rsrc == null) { + LOG.warn("Received " + event.getType() + " event for request " + req + + " but localized resource is missing"); + return; + } rsrc.handle(event); // Remove the resource if its downloading and its reference count has // become 0 after RELEASE. This maybe because a container was killed while // localizing and no other container is referring to the resource. + // NOTE: This should NOT be done for public resources since the + // download is not associated with a container-specific localizer. if (event.getType() == ResourceEventType.RELEASE) { if (rsrc.getState() == ResourceState.DOWNLOADING && - rsrc.getRefCount() <= 0) { + rsrc.getRefCount() <= 0 && + rsrc.getRequest().getVisibility() != LocalResourceVisibility.PUBLIC) { removeResource(req); } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/TestLocalResourcesTrackerImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/TestLocalResourcesTrackerImpl.java index 4eb86758db..21ceaa4f98 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/TestLocalResourcesTrackerImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/TestLocalResourcesTrackerImpl.java @@ -141,12 +141,12 @@ public void test() { tracker.handle(rel21Event); dispatcher.await(); - verifyTrackedResourceCount(tracker, 1); + verifyTrackedResourceCount(tracker, 2); // Verify resource with non zero ref count is not removed. Assert.assertEquals(2, lr1.getRefCount()); Assert.assertFalse(tracker.remove(lr1, mockDelService)); - verifyTrackedResourceCount(tracker, 1); + verifyTrackedResourceCount(tracker, 2); // Localize resource1 ResourceLocalizedEvent rle = @@ -161,7 +161,7 @@ public void test() { // Verify resources in state LOCALIZED with ref-count=0 is removed. Assert.assertTrue(tracker.remove(lr1, mockDelService)); - verifyTrackedResourceCount(tracker, 0); + verifyTrackedResourceCount(tracker, 1); } finally { if (dispatcher != null) { dispatcher.stop(); @@ -899,6 +899,56 @@ public void testResourcePresentInGoodDir() throws IOException { } } + @Test + @SuppressWarnings("unchecked") + public void testReleaseWhileDownloading() throws Exception { + String user = "testuser"; + DrainDispatcher dispatcher = null; + try { + Configuration conf = new Configuration(); + dispatcher = createDispatcher(conf); + EventHandler localizerEventHandler = + mock(EventHandler.class); + EventHandler containerEventHandler = + mock(EventHandler.class); + dispatcher.register(LocalizerEventType.class, localizerEventHandler); + dispatcher.register(ContainerEventType.class, containerEventHandler); + + ContainerId cId = BuilderUtils.newContainerId(1, 1, 1, 1); + LocalizerContext lc = new LocalizerContext(user, cId, null); + + LocalResourceRequest req = + createLocalResourceRequest(user, 1, 1, LocalResourceVisibility.PUBLIC); + LocalizedResource lr = createLocalizedResource(req, dispatcher); + ConcurrentMap localrsrc = + new ConcurrentHashMap(); + localrsrc.put(req, lr); + LocalResourcesTracker tracker = + new LocalResourcesTrackerImpl(user, null, dispatcher, localrsrc, + false, conf, new NMNullStateStoreService(), null); + + // request the resource + ResourceEvent reqEvent = + new ResourceRequestEvent(req, LocalResourceVisibility.PUBLIC, lc); + tracker.handle(reqEvent); + + // release the resource + ResourceEvent relEvent = new ResourceReleaseEvent(req, cId); + tracker.handle(relEvent); + + // download completing after release + ResourceLocalizedEvent rle = + new ResourceLocalizedEvent(req, new Path("file:///tmp/r1"), 1); + tracker.handle(rle); + + dispatcher.await(); + } finally { + if (dispatcher != null) { + dispatcher.stop(); + } + } + } + private boolean createdummylocalizefile(Path path) { boolean ret = false; File file = new File(path.toUri().getRawPath().toString()); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/TestResourceLocalizationService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/TestResourceLocalizationService.java index f9e4188c5b..c14ec7f45d 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/TestResourceLocalizationService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/TestResourceLocalizationService.java @@ -487,8 +487,8 @@ public void testResourceRelease() throws Exception { Assert.assertEquals("Incorrect reference count", 0, lr.getRefCount()); pubRsrcs.remove(lr.getRequest()); } - Assert.assertEquals(2, pubRsrcs.size()); - Assert.assertEquals(0, pubRsrcCount); + Assert.assertEquals(0, pubRsrcs.size()); + Assert.assertEquals(2, pubRsrcCount); appRsrcCount = 0; for (LocalizedResource lr : appTracker) {