MAPREDUCE-7030. Uploader tool should ignore symlinks to the same directory (miklos.szegedi@cloudera.com via rkanter)

This commit is contained in:
Robert Kanter 2018-01-12 14:18:01 -08:00
parent c05b84703b
commit e404650f48
2 changed files with 101 additions and 2 deletions

View File

@ -40,6 +40,9 @@
import java.io.FileInputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.nio.file.Files;
import java.nio.file.NotLinkException;
import java.nio.file.Paths;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
@ -71,6 +74,7 @@ public class FrameworkUploader implements Runnable {
String target = null;
@VisibleForTesting
short replication = 10;
private boolean ignoreSymlink = false;
@VisibleForTesting
Set<String> filteredInputFiles = new HashSet<>();
@ -79,8 +83,7 @@ public class FrameworkUploader implements Runnable {
@VisibleForTesting
List<Pattern> blacklistedFiles = new LinkedList<>();
@VisibleForTesting
OutputStream targetStream = null;
private OutputStream targetStream = null;
private String alias = null;
private void printHelp(Options options) {
@ -284,6 +287,9 @@ private void addJar(File jar) throws UploaderException{
break;
}
}
if (ignoreSymlink && !excluded) {
excluded = checkSymlink(jar);
}
if (found && !excluded) {
LOG.info("Whitelisted " + jar.getAbsolutePath());
if (!filteredInputFiles.add(jar.getAbsolutePath())) {
@ -299,6 +305,40 @@ private void addJar(File jar) throws UploaderException{
}
}
/**
* Check if the file is a symlink to the same directory.
* @param jar The file to check
* @return true, to ignore the directory
*/
@VisibleForTesting
boolean checkSymlink(File jar) {
if (Files.isSymbolicLink(jar.toPath())) {
try {
java.nio.file.Path link = Files.readSymbolicLink(jar.toPath());
java.nio.file.Path jarPath = Paths.get(jar.getAbsolutePath());
String linkString = link.toString();
java.nio.file.Path jarParent = jarPath.getParent();
java.nio.file.Path linkPath =
jarParent == null ? null : jarParent.resolve(linkString);
java.nio.file.Path linkPathParent =
linkPath == null ? null : linkPath.getParent();
java.nio.file.Path normalizedLinkPath =
linkPathParent == null ? null : linkPathParent.normalize();
if (normalizedLinkPath != null && jarParent.equals(
normalizedLinkPath)) {
LOG.info(String.format("Ignoring same directory link %s to %s",
jarPath.toString(), link.toString()));
return true;
}
} catch (NotLinkException ex) {
LOG.debug("Not a link", jar);
} catch (IOException ex) {
LOG.warn("Cannot read symbolic link on", jar);
}
}
return false;
}
private void validateTargetPath() throws UploaderException {
if (!target.startsWith("hdfs:/") &&
!target.startsWith("file:/")) {
@ -340,6 +380,9 @@ boolean parseArguments(String[] args) throws IOException {
.withDescription(
"Desired replication count")
.hasArg().create("replication"));
opts.addOption(OptionBuilder
.withDescription("Ignore symlinks into the same directory")
.create("nosymlink"));
GenericOptionsParser parser = new GenericOptionsParser(opts, args);
if (parser.getCommandLine().hasOption("help") ||
parser.getCommandLine().hasOption("h")) {
@ -354,6 +397,9 @@ boolean parseArguments(String[] args) throws IOException {
"blacklist", DefaultJars.DEFAULT_EXCLUDED_MR_JARS);
replication = Short.parseShort(parser.getCommandLine().getOptionValue(
"replication", "10"));
if (parser.getCommandLine().hasOption("nosymlink")) {
ignoreSymlink = true;
}
String fs = parser.getCommandLine()
.getOptionValue("fs", null);
if (fs == null) {

View File

@ -18,10 +18,13 @@
package org.apache.hadoop.mapred.uploader;
import com.google.common.collect.Lists;
import org.apache.commons.compress.archivers.tar.TarArchiveEntry;
import org.apache.commons.compress.archivers.tar.TarArchiveInputStream;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.IOUtils;
import org.junit.Assert;
import org.junit.Assume;
import org.junit.Before;
import org.junit.Test;
@ -32,6 +35,8 @@
import java.io.IOException;
import java.io.InputStream;
import java.io.PrintStream;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
@ -321,4 +326,52 @@ public void testRecursiveEnvironmentReplacement()
}
/**
* Test native IO.
*/
@Test
public void testNativeIO() throws IOException {
FrameworkUploader uploader = new FrameworkUploader();
File parent = new File(testDir);
try {
// Create a parent directory
parent.deleteOnExit();
Assert.assertTrue(parent.mkdirs());
// Create a target file
File targetFile = new File(parent, "a.txt");
try(FileOutputStream os = new FileOutputStream(targetFile)) {
IOUtils.writeLines(Lists.newArrayList("a", "b"), null, os);
}
Assert.assertFalse(uploader.checkSymlink(targetFile));
// Create a symlink to the target
File symlinkToTarget = new File(parent, "symlinkToTarget.txt");
try {
Files.createSymbolicLink(
Paths.get(symlinkToTarget.getAbsolutePath()),
Paths.get(targetFile.getAbsolutePath()));
} catch (UnsupportedOperationException e) {
// Symlinks are not supported, so ignore the test
Assume.assumeTrue(false);
}
Assert.assertTrue(uploader.checkSymlink(symlinkToTarget));
// Create a symlink outside the current directory
File symlinkOutside = new File(parent, "symlinkToParent.txt");
try {
Files.createSymbolicLink(
Paths.get(symlinkOutside.getAbsolutePath()),
Paths.get(parent.getAbsolutePath()));
} catch (UnsupportedOperationException e) {
// Symlinks are not supported, so ignore the test
Assume.assumeTrue(false);
}
Assert.assertFalse(uploader.checkSymlink(symlinkOutside));
} finally {
FileUtils.deleteDirectory(parent);
}
}
}