From e404650f489727d2df9a8813fddc4e0d682fbbee Mon Sep 17 00:00:00 2001 From: Robert Kanter Date: Fri, 12 Jan 2018 14:18:01 -0800 Subject: [PATCH] MAPREDUCE-7030. Uploader tool should ignore symlinks to the same directory (miklos.szegedi@cloudera.com via rkanter) --- .../mapred/uploader/FrameworkUploader.java | 50 ++++++++++++++++- .../uploader/TestFrameworkUploader.java | 53 +++++++++++++++++++ 2 files changed, 101 insertions(+), 2 deletions(-) diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-uploader/src/main/java/org/apache/hadoop/mapred/uploader/FrameworkUploader.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-uploader/src/main/java/org/apache/hadoop/mapred/uploader/FrameworkUploader.java index a374262e82..899689dc09 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-uploader/src/main/java/org/apache/hadoop/mapred/uploader/FrameworkUploader.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-uploader/src/main/java/org/apache/hadoop/mapred/uploader/FrameworkUploader.java @@ -40,6 +40,9 @@ import java.io.FileInputStream; import java.io.IOException; import java.io.OutputStream; +import java.nio.file.Files; +import java.nio.file.NotLinkException; +import java.nio.file.Paths; import java.util.HashSet; import java.util.LinkedList; import java.util.List; @@ -71,6 +74,7 @@ public class FrameworkUploader implements Runnable { String target = null; @VisibleForTesting short replication = 10; + private boolean ignoreSymlink = false; @VisibleForTesting Set filteredInputFiles = new HashSet<>(); @@ -79,8 +83,7 @@ public class FrameworkUploader implements Runnable { @VisibleForTesting List blacklistedFiles = new LinkedList<>(); - @VisibleForTesting - OutputStream targetStream = null; + private OutputStream targetStream = null; private String alias = null; private void printHelp(Options options) { @@ -284,6 +287,9 @@ private void addJar(File jar) throws UploaderException{ break; } } + if (ignoreSymlink && !excluded) { + excluded = checkSymlink(jar); + } if (found && !excluded) { LOG.info("Whitelisted " + jar.getAbsolutePath()); if (!filteredInputFiles.add(jar.getAbsolutePath())) { @@ -299,6 +305,40 @@ private void addJar(File jar) throws UploaderException{ } } + /** + * Check if the file is a symlink to the same directory. + * @param jar The file to check + * @return true, to ignore the directory + */ + @VisibleForTesting + boolean checkSymlink(File jar) { + if (Files.isSymbolicLink(jar.toPath())) { + try { + java.nio.file.Path link = Files.readSymbolicLink(jar.toPath()); + java.nio.file.Path jarPath = Paths.get(jar.getAbsolutePath()); + String linkString = link.toString(); + java.nio.file.Path jarParent = jarPath.getParent(); + java.nio.file.Path linkPath = + jarParent == null ? null : jarParent.resolve(linkString); + java.nio.file.Path linkPathParent = + linkPath == null ? null : linkPath.getParent(); + java.nio.file.Path normalizedLinkPath = + linkPathParent == null ? null : linkPathParent.normalize(); + if (normalizedLinkPath != null && jarParent.equals( + normalizedLinkPath)) { + LOG.info(String.format("Ignoring same directory link %s to %s", + jarPath.toString(), link.toString())); + return true; + } + } catch (NotLinkException ex) { + LOG.debug("Not a link", jar); + } catch (IOException ex) { + LOG.warn("Cannot read symbolic link on", jar); + } + } + return false; + } + private void validateTargetPath() throws UploaderException { if (!target.startsWith("hdfs:/") && !target.startsWith("file:/")) { @@ -340,6 +380,9 @@ boolean parseArguments(String[] args) throws IOException { .withDescription( "Desired replication count") .hasArg().create("replication")); + opts.addOption(OptionBuilder + .withDescription("Ignore symlinks into the same directory") + .create("nosymlink")); GenericOptionsParser parser = new GenericOptionsParser(opts, args); if (parser.getCommandLine().hasOption("help") || parser.getCommandLine().hasOption("h")) { @@ -354,6 +397,9 @@ boolean parseArguments(String[] args) throws IOException { "blacklist", DefaultJars.DEFAULT_EXCLUDED_MR_JARS); replication = Short.parseShort(parser.getCommandLine().getOptionValue( "replication", "10")); + if (parser.getCommandLine().hasOption("nosymlink")) { + ignoreSymlink = true; + } String fs = parser.getCommandLine() .getOptionValue("fs", null); if (fs == null) { diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-uploader/src/test/java/org/apache/hadoop/mapred/uploader/TestFrameworkUploader.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-uploader/src/test/java/org/apache/hadoop/mapred/uploader/TestFrameworkUploader.java index f3e4fc586d..ef64bfe929 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-uploader/src/test/java/org/apache/hadoop/mapred/uploader/TestFrameworkUploader.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-uploader/src/test/java/org/apache/hadoop/mapred/uploader/TestFrameworkUploader.java @@ -18,10 +18,13 @@ package org.apache.hadoop.mapred.uploader; +import com.google.common.collect.Lists; import org.apache.commons.compress.archivers.tar.TarArchiveEntry; import org.apache.commons.compress.archivers.tar.TarArchiveInputStream; import org.apache.commons.io.FileUtils; +import org.apache.commons.io.IOUtils; import org.junit.Assert; +import org.junit.Assume; import org.junit.Before; import org.junit.Test; @@ -32,6 +35,8 @@ import java.io.IOException; import java.io.InputStream; import java.io.PrintStream; +import java.nio.file.Files; +import java.nio.file.Paths; import java.util.HashMap; import java.util.HashSet; import java.util.Map; @@ -321,4 +326,52 @@ public void testRecursiveEnvironmentReplacement() } + /** + * Test native IO. + */ + @Test + public void testNativeIO() throws IOException { + FrameworkUploader uploader = new FrameworkUploader(); + File parent = new File(testDir); + try { + // Create a parent directory + parent.deleteOnExit(); + Assert.assertTrue(parent.mkdirs()); + + // Create a target file + File targetFile = new File(parent, "a.txt"); + try(FileOutputStream os = new FileOutputStream(targetFile)) { + IOUtils.writeLines(Lists.newArrayList("a", "b"), null, os); + } + Assert.assertFalse(uploader.checkSymlink(targetFile)); + + // Create a symlink to the target + File symlinkToTarget = new File(parent, "symlinkToTarget.txt"); + try { + Files.createSymbolicLink( + Paths.get(symlinkToTarget.getAbsolutePath()), + Paths.get(targetFile.getAbsolutePath())); + } catch (UnsupportedOperationException e) { + // Symlinks are not supported, so ignore the test + Assume.assumeTrue(false); + } + Assert.assertTrue(uploader.checkSymlink(symlinkToTarget)); + + // Create a symlink outside the current directory + File symlinkOutside = new File(parent, "symlinkToParent.txt"); + try { + Files.createSymbolicLink( + Paths.get(symlinkOutside.getAbsolutePath()), + Paths.get(parent.getAbsolutePath())); + } catch (UnsupportedOperationException e) { + // Symlinks are not supported, so ignore the test + Assume.assumeTrue(false); + } + Assert.assertFalse(uploader.checkSymlink(symlinkOutside)); + } finally { + FileUtils.deleteDirectory(parent); + } + + } + }