From d42806160eb95594f08f38bb753cf0306a191a38 Mon Sep 17 00:00:00 2001 From: Eric Yang Date: Thu, 16 Aug 2018 18:41:58 -0400 Subject: [PATCH] YARN-8667. Cleanup symlinks when container restarted by NM. Contributed by Chandni Singh --- .../server/nodemanager/ContainerExecutor.java | 62 ++++++++++++++----- .../launcher/ContainerLaunch.java | 7 +++ .../nodemanager/TestContainerExecutor.java | 31 +++++++++- 3 files changed, 85 insertions(+), 15 deletions(-) diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/ContainerExecutor.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/ContainerExecutor.java index 9b604cede5..ba272e2497 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/ContainerExecutor.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/ContainerExecutor.java @@ -27,6 +27,7 @@ import java.nio.charset.Charset; import java.util.ArrayList; import java.util.Arrays; +import java.util.HashMap; import java.util.List; import java.util.LinkedHashSet; import java.util.Map; @@ -35,6 +36,7 @@ import java.util.concurrent.locks.ReentrantReadWriteLock; import java.util.concurrent.locks.ReentrantReadWriteLock.ReadLock; import java.util.concurrent.locks.ReentrantReadWriteLock.WriteLock; + import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -417,20 +419,9 @@ public void writeLaunchEnv(OutputStream out, Map environment, if (resources != null) { sb.echo("Setting up job resources"); - for (Map.Entry> resourceEntry : - resources.entrySet()) { - for (String linkName : resourceEntry.getValue()) { - if (new Path(linkName).getName().equals(WILDCARD)) { - // If this is a wildcarded path, link to everything in the - // directory from the working directory - for (File wildLink : readDirAsUser(user, resourceEntry.getKey())) { - sb.symlink(new Path(wildLink.toString()), - new Path(wildLink.getName())); - } - } else { - sb.symlink(resourceEntry.getKey(), new Path(linkName)); - } - } + Map symLinks = resolveSymLinks(resources, user); + for (Map.Entry symLink : symLinks.entrySet()) { + sb.symlink(symLink.getKey(), symLink.getValue()); } } @@ -791,6 +782,28 @@ public void resumeContainer(Container container) { throw new UnsupportedOperationException(); } + /** + * Perform any cleanup before the next launch of the container. + * @param container container + */ + public void cleanupBeforeRelaunch(Container container) + throws IOException, InterruptedException { + if (container.getLocalizedResources() != null) { + + Map symLinks = resolveSymLinks( + container.getLocalizedResources(), container.getUser()); + + for (Map.Entry symLink : symLinks.entrySet()) { + LOG.debug("{} deleting {}", container.getContainerId(), + symLink.getValue()); + deleteAsUser(new DeletionAsUserContext.Builder() + .setUser(container.getUser()) + .setSubDir(symLink.getValue()) + .build()); + } + } + } + /** * Get the process-identifier for the container. * @@ -870,4 +883,25 @@ public void run() { } } } + + private Map resolveSymLinks(Map> resources, String user) { + Map symLinks = new HashMap<>(); + for (Map.Entry> resourceEntry : + resources.entrySet()) { + for (String linkName : resourceEntry.getValue()) { + if (new Path(linkName).getName().equals(WILDCARD)) { + // If this is a wildcarded path, link to everything in the + // directory from the working directory + for (File wildLink : readDirAsUser(user, resourceEntry.getKey())) { + symLinks.put(new Path(wildLink.toString()), + new Path(wildLink.getName())); + } + } else { + symLinks.put(resourceEntry.getKey(), new Path(linkName)); + } + } + } + return symLinks; + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainerLaunch.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainerLaunch.java index 21952323d0..2aca5f8f2c 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainerLaunch.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainerLaunch.java @@ -1868,6 +1868,13 @@ protected void cleanupContainerFiles(Path containerWorkDir) { deleteAsUser(new Path(containerWorkDir, CONTAINER_SCRIPT)); // delete TokensPath deleteAsUser(new Path(containerWorkDir, FINAL_CONTAINER_TOKENS_FILE)); + + // delete symlinks because launch script will create symlinks again + try { + exec.cleanupBeforeRelaunch(container); + } catch (IOException | InterruptedException e) { + LOG.warn("{} exec failed to cleanup", container.getContainerId(), e); + } } private void deleteAsUser(Path path) { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestContainerExecutor.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestContainerExecutor.java index 3bcdc8751e..2890bb5ead 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestContainerExecutor.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestContainerExecutor.java @@ -18,9 +18,17 @@ package org.apache.hadoop.yarn.server.nodemanager; +import java.nio.file.Files; +import java.nio.file.Paths; import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import com.google.common.collect.Lists; +import org.apache.commons.io.FileUtils; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; import org.apache.hadoop.util.Shell; import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.conf.YarnConfiguration; @@ -28,13 +36,13 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container; import org.apache.hadoop.yarn.server.nodemanager.executor.ContainerReapContext; import org.apache.hadoop.yarn.server.nodemanager.util.NodeManagerHardwareUtils; -import org.apache.hadoop.yarn.util.ResourceCalculatorPlugin; import org.junit.Assert; import org.junit.Test; import static org.apache.hadoop.test.PlatformAssumptions.assumeWindows; import static org.junit.Assert.*; import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; @SuppressWarnings("deprecation") public class TestContainerExecutor { @@ -168,4 +176,25 @@ public void testReapContainer() throws Exception { builder.setContainer(container).setUser("foo"); assertTrue(containerExecutor.reapContainer(builder.build())); } + + @Test + public void testCleanupBeforeLaunch() throws Exception { + Container container = mock(Container.class); + java.nio.file.Path linkName = Paths.get("target/linkName"); + java.nio.file.Path target = Paths.get("target"); + //deletes the link if it already exists because of previous test failures + FileUtils.deleteQuietly(linkName.toFile()); + Files.createSymbolicLink(linkName.toAbsolutePath(), + target.toAbsolutePath()); + + Map> localResources = new HashMap<>(); + localResources.put(new Path(target.toFile().getAbsolutePath()), + Lists.newArrayList(linkName.toFile().getAbsolutePath())); + + when(container.getLocalizedResources()) + .thenReturn(localResources); + when(container.getUser()).thenReturn(System.getProperty("user.name")); + containerExecutor.cleanupBeforeRelaunch(container); + Assert.assertTrue(!Files.exists(linkName)); + } }