HADOOP-11483. HardLink.java should use the jdk7 createLink method

This commit is contained in:
Akira Ajisaka 2015-01-15 20:54:44 +09:00
parent ba5116ec8e
commit db51548f70
3 changed files with 23 additions and 383 deletions

View File

@ -385,6 +385,8 @@ Release 2.7.0 - UNRELEASED
IMPROVEMENTS
HADOOP-11483. HardLink.java should use the jdk7 createLink method (aajisaka)
HADOOP-11156. DelegateToFileSystem should implement
getFsStatus(final Path f). (Zhihai Xu via wang)

View File

@ -23,13 +23,16 @@
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.StringReader;
import java.util.Arrays;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.util.Shell;
import org.apache.hadoop.util.Shell.ExitCodeException;
import org.apache.hadoop.util.Shell.ShellCommandExecutor;
import com.google.common.annotations.VisibleForTesting;
import static java.nio.file.Files.createLink;
/**
* Class for creating hardlinks.
* Supports Unix/Linux, Windows via winutils , and Mac OS X.
@ -75,114 +78,30 @@ public HardLink() {
/**
* This abstract class bridges the OS-dependent implementations of the
* needed functionality for creating hardlinks and querying link counts.
* needed functionality for querying link counts.
* The particular implementation class is chosen during
* static initialization phase of the HardLink class.
* The "getter" methods construct shell command strings for various purposes.
* The "getter" methods construct shell command strings.
*/
private static abstract class HardLinkCommandGetter {
/**
* Get the command string needed to hardlink a bunch of files from
* a single source directory into a target directory. The source directory
* is not specified here, but the command will be executed using the source
* directory as the "current working directory" of the shell invocation.
*
* @param fileBaseNames - array of path-less file names, relative
* to the source directory
* @param linkDir - target directory where the hardlinks will be put
* @return - an array of Strings suitable for use as a single shell command
* @throws IOException - if any of the file or path names misbehave
*/
abstract String[] linkMult(String[] fileBaseNames, File linkDir)
throws IOException;
/**
* Get the command string needed to hardlink a single file
*/
abstract String[] linkOne(File file, File linkName) throws IOException;
/**
* Get the command string to query the hardlink count of a file
*/
abstract String[] linkCount(File file) throws IOException;
/**
* Calculate the total string length of the shell command
* resulting from execution of linkMult, plus the length of the
* source directory name (which will also be provided to the shell)
*
* @param fileDir - source directory, parent of fileBaseNames
* @param fileBaseNames - array of path-less file names, relative
* to the source directory
* @param linkDir - target directory where the hardlinks will be put
* @return - total data length (must not exceed maxAllowedCmdArgLength)
* @throws IOException
*/
abstract int getLinkMultArgLength(
File fileDir, String[] fileBaseNames, File linkDir)
throws IOException;
/**
* Get the maximum allowed string length of a shell command on this OS,
* which is just the documented minimum guaranteed supported command
* length - aprx. 32KB for Unix, and 8KB for Windows.
*/
abstract int getMaxAllowedCmdArgLength();
}
/**
* Implementation of HardLinkCommandGetter class for Unix
*/
static class HardLinkCGUnix extends HardLinkCommandGetter {
private static String[] hardLinkCommand = {"ln", null, null};
private static String[] hardLinkMultPrefix = {"ln"};
private static String[] hardLinkMultSuffix = {null};
private static class HardLinkCGUnix extends HardLinkCommandGetter {
private static String[] getLinkCountCommand = {"stat","-c%h", null};
//Unix guarantees at least 32K bytes cmd length.
//Subtract another 64b to allow for Java 'exec' overhead
private static final int maxAllowedCmdArgLength = 32*1024 - 65;
private static synchronized
void setLinkCountCmdTemplate(String[] template) {
//May update this for specific unix variants,
//after static initialization phase
getLinkCountCommand = template;
}
/*
* @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#linkOne(java.io.File, java.io.File)
*/
@Override
String[] linkOne(File file, File linkName)
throws IOException {
String[] buf = new String[hardLinkCommand.length];
System.arraycopy(hardLinkCommand, 0, buf, 0, hardLinkCommand.length);
//unix wants argument order: "ln <existing> <new>"
buf[1] = FileUtil.makeShellPath(file, true);
buf[2] = FileUtil.makeShellPath(linkName, true);
return buf;
}
/*
* @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#linkMult(java.lang.String[], java.io.File)
*/
@Override
String[] linkMult(String[] fileBaseNames, File linkDir)
throws IOException {
String[] buf = new String[fileBaseNames.length
+ hardLinkMultPrefix.length
+ hardLinkMultSuffix.length];
int mark=0;
System.arraycopy(hardLinkMultPrefix, 0, buf, mark,
hardLinkMultPrefix.length);
mark += hardLinkMultPrefix.length;
System.arraycopy(fileBaseNames, 0, buf, mark, fileBaseNames.length);
mark += fileBaseNames.length;
buf[mark] = FileUtil.makeShellPath(linkDir, true);
return buf;
}
/*
* @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#linkCount(java.io.File)
*/
@ -195,169 +114,30 @@ String[] linkCount(File file)
buf[getLinkCountCommand.length - 1] = FileUtil.makeShellPath(file, true);
return buf;
}
/*
* @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#getLinkMultArgLength(java.io.File, java.lang.String[], java.io.File)
*/
@Override
int getLinkMultArgLength(File fileDir, String[] fileBaseNames, File linkDir)
throws IOException{
int sum = 0;
for (String x : fileBaseNames) {
// add 1 to account for terminal null or delimiter space
sum += 1 + ((x == null) ? 0 : x.length());
}
sum += 2 + FileUtil.makeShellPath(fileDir, true).length()
+ FileUtil.makeShellPath(linkDir, true).length();
//add the fixed overhead of the hardLinkMult prefix and suffix
sum += 3; //length("ln") + 1
return sum;
}
/*
* @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#getMaxAllowedCmdArgLength()
*/
@Override
int getMaxAllowedCmdArgLength() {
return maxAllowedCmdArgLength;
}
}
/**
* Implementation of HardLinkCommandGetter class for Windows
*/
@VisibleForTesting
static class HardLinkCGWin extends HardLinkCommandGetter {
//The Windows command getter impl class and its member fields are
//package-private ("default") access instead of "private" to assist
//unit testing (sort of) on non-Win servers
static String CMD_EXE = "cmd.exe";
static String[] hardLinkCommand = {
Shell.WINUTILS,"hardlink","create", null, null};
static String[] hardLinkMultPrefix = {
CMD_EXE, "/q", "/c", "for", "%f", "in", "("};
static String hardLinkMultDir = "\\%f";
static String[] hardLinkMultSuffix = {
")", "do", Shell.WINUTILS, "hardlink", "create", null,
"%f"};
static String[] getLinkCountCommand = {
Shell.WINUTILS, "hardlink", "stat", null};
//Windows guarantees only 8K - 1 bytes cmd length.
//Subtract another 64b to allow for Java 'exec' overhead
static final int maxAllowedCmdArgLength = 8*1024 - 65;
/*
* @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#linkOne(java.io.File, java.io.File)
*/
@Override
String[] linkOne(File file, File linkName)
throws IOException {
String[] buf = new String[hardLinkCommand.length];
System.arraycopy(hardLinkCommand, 0, buf, 0, hardLinkCommand.length);
//windows wants argument order: "create <new> <existing>"
buf[4] = file.getCanonicalPath();
buf[3] = linkName.getCanonicalPath();
return buf;
}
/*
* @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#linkMult(java.lang.String[], java.io.File)
*/
@Override
String[] linkMult(String[] fileBaseNames, File linkDir)
throws IOException {
String[] buf = new String[fileBaseNames.length
+ hardLinkMultPrefix.length
+ hardLinkMultSuffix.length];
String td = linkDir.getCanonicalPath() + hardLinkMultDir;
int mark=0;
System.arraycopy(hardLinkMultPrefix, 0, buf, mark,
hardLinkMultPrefix.length);
mark += hardLinkMultPrefix.length;
System.arraycopy(fileBaseNames, 0, buf, mark, fileBaseNames.length);
mark += fileBaseNames.length;
System.arraycopy(hardLinkMultSuffix, 0, buf, mark,
hardLinkMultSuffix.length);
mark += hardLinkMultSuffix.length;
buf[mark - 2] = td;
return buf;
}
/*
* @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#linkCount(java.io.File)
*/
@Override
String[] linkCount(File file)
throws IOException {
String[] linkCount(File file) throws IOException {
String[] buf = new String[getLinkCountCommand.length];
System.arraycopy(getLinkCountCommand, 0, buf, 0,
getLinkCountCommand.length);
buf[getLinkCountCommand.length - 1] = file.getCanonicalPath();
return buf;
}
/*
* @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#getLinkMultArgLength(java.io.File, java.lang.String[], java.io.File)
*/
@Override
int getLinkMultArgLength(File fileDir, String[] fileBaseNames, File linkDir)
throws IOException {
int sum = 0;
for (String x : fileBaseNames) {
// add 1 to account for terminal null or delimiter space
sum += 1 + ((x == null) ? 0 : x.length());
}
sum += 2 + fileDir.getCanonicalPath().length() +
linkDir.getCanonicalPath().length();
//add the fixed overhead of the hardLinkMult command
//(prefix, suffix, and Dir suffix)
sum += (CMD_EXE + " /q /c for %f in ( ) do "
+ Shell.WINUTILS + " hardlink create \\%f %f").length();
return sum;
}
/*
* @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#getMaxAllowedCmdArgLength()
*/
@Override
int getMaxAllowedCmdArgLength() {
return maxAllowedCmdArgLength;
}
}
/**
* Calculate the nominal length of all contributors to the total
* commandstring length, including fixed overhead of the OS-dependent
* command. It's protected rather than private, to assist unit testing,
* but real clients are not expected to need it -- see the way
* createHardLinkMult() uses it internally so the user doesn't need to worry
* about it.
*
* @param fileDir - source directory, parent of fileBaseNames
* @param fileBaseNames - array of path-less file names, relative
* to the source directory
* @param linkDir - target directory where the hardlinks will be put
* @return - total data length (must not exceed maxAllowedCmdArgLength)
* @throws IOException
*/
protected static int getLinkMultArgLength(
File fileDir, String[] fileBaseNames, File linkDir)
throws IOException {
return getHardLinkCommand.getLinkMultArgLength(fileDir,
fileBaseNames, linkDir);
}
/**
* Return this private value for use by unit tests.
* Shell commands are not allowed to have a total string length
* exceeding this size.
*/
protected static int getMaxAllowedCmdArgLength() {
return getHardLinkCommand.getMaxAllowedCmdArgLength();
}
/*
* ****************************************************
* Complexity is above. User-visible functionality is below
@ -370,7 +150,7 @@ protected static int getMaxAllowedCmdArgLength() {
* @param linkName - desired target link file
*/
public static void createHardLink(File file, File linkName)
throws IOException {
throws IOException {
if (file == null) {
throw new IOException(
"invalid arguments to createHardLink: source file is null");
@ -379,17 +159,7 @@ public static void createHardLink(File file, File linkName)
throw new IOException(
"invalid arguments to createHardLink: link name is null");
}
// construct and execute shell command
String[] hardLinkCommand = getHardLinkCommand.linkOne(file, linkName);
ShellCommandExecutor shexec = new ShellCommandExecutor(hardLinkCommand);
try {
shexec.execute();
} catch (ExitCodeException e) {
throw new IOException("Failed to execute command " +
Arrays.toString(hardLinkCommand) +
"; command output: \"" + shexec.getOutput() + "\"" +
"; WrappedException: \"" + e.getMessage() + "\"");
}
createLink(linkName.toPath(), file.toPath());
}
/**
@ -398,30 +168,10 @@ public static void createHardLink(File file, File linkName)
* @param parentDir - directory containing source files
* @param fileBaseNames - list of path-less file names, as returned by
* parentDir.list()
* @param linkDir - where the hardlinks should be put. It must already exist.
*
* If the list of files is too long (overflows maxAllowedCmdArgLength),
* we will automatically split it into multiple invocations of the
* underlying method.
* @param linkDir - where the hardlinks should be put. It must already exist.
*/
public static void createHardLinkMult(File parentDir, String[] fileBaseNames,
public static void createHardLinkMult(File parentDir, String[] fileBaseNames,
File linkDir) throws IOException {
//This is the public method all non-test clients are expected to use.
//Normal case - allow up to maxAllowedCmdArgLength characters in the cmd
createHardLinkMult(parentDir, fileBaseNames, linkDir,
getHardLinkCommand.getMaxAllowedCmdArgLength());
}
/*
* Implements {@link createHardLinkMult} with added variable "maxLength",
* to ease unit testing of the auto-splitting feature for long lists.
* Likewise why it returns "callCount", the number of sub-arrays that
* the file list had to be split into.
* Non-test clients are expected to call the public method instead.
*/
protected static int createHardLinkMult(File parentDir,
String[] fileBaseNames, File linkDir, int maxLength)
throws IOException {
if (parentDir == null) {
throw new IOException(
"invalid arguments to createHardLinkMult: parent directory is null");
@ -435,40 +185,13 @@ protected static int createHardLinkMult(File parentDir,
"invalid arguments to createHardLinkMult: "
+ "filename list can be empty but not null");
}
if (fileBaseNames.length == 0) {
//the OS cmds can't handle empty list of filenames,
//but it's legal, so just return.
return 0;
}
if (!linkDir.exists()) {
throw new FileNotFoundException(linkDir + " not found.");
}
//if the list is too long, split into multiple invocations
int callCount = 0;
if (getLinkMultArgLength(parentDir, fileBaseNames, linkDir) > maxLength
&& fileBaseNames.length > 1) {
String[] list1 = Arrays.copyOf(fileBaseNames, fileBaseNames.length/2);
callCount += createHardLinkMult(parentDir, list1, linkDir, maxLength);
String[] list2 = Arrays.copyOfRange(fileBaseNames, fileBaseNames.length/2,
fileBaseNames.length);
callCount += createHardLinkMult(parentDir, list2, linkDir, maxLength);
return callCount;
} else {
callCount = 1;
for (String name : fileBaseNames) {
createLink(linkDir.toPath().resolve(name),
parentDir.toPath().resolve(name));
}
// construct and execute shell command
String[] hardLinkCommand = getHardLinkCommand.linkMult(fileBaseNames,
linkDir);
ShellCommandExecutor shexec = new ShellCommandExecutor(hardLinkCommand,
parentDir, null, 0L);
try {
shexec.execute();
} catch (ExitCodeException e) {
throw new IOException(shexec.getOutput() + e.getMessage());
}
return callCount;
}
/**

View File

@ -295,91 +295,11 @@ public void testCreateHardLinkMultEmptyList() throws IOException {
String[] emptyList = {};
//test the case of empty file list
int callCount = createHardLinkMult(src, emptyList, tgt_mult,
getMaxAllowedCmdArgLength());
//check no exec calls were made
assertEquals(0, callCount);
createHardLinkMult(src, emptyList, tgt_mult);
//check nothing changed in the directory tree
validateSetup();
}
/**
* Test createHardLinkMult(), again, this time with the "too long list"
* case where the total size of the command line arguments exceed the
* allowed maximum. In this case, the list should be automatically
* broken up into chunks, each chunk no larger than the max allowed.
*
* We use an extended version of the method call, specifying the
* size limit explicitly, to simulate the "too long" list with a
* relatively short list.
*/
@Test
public void testCreateHardLinkMultOversizeAndEmpty() throws IOException {
// prep long filenames - each name takes 10 chars in the arg list
// (9 actual chars plus terminal null or delimeter blank)
String name1 = "x11111111";
String name2 = "x22222222";
String name3 = "x33333333";
File x1_long = new File(src, name1);
File x2_long = new File(src, name2);
File x3_long = new File(src, name3);
//set up source files with long file names
x1.renameTo(x1_long);
x2.renameTo(x2_long);
x3.renameTo(x3_long);
//validate setup
assertTrue(x1_long.exists());
assertTrue(x2_long.exists());
assertTrue(x3_long.exists());
assertFalse(x1.exists());
assertFalse(x2.exists());
assertFalse(x3.exists());
//prep appropriate length information to construct test case for
//oversize filename list
int callCount;
String[] emptyList = {};
String[] fileNames = src.list();
//get fixed size of arg list without any filenames
int overhead = getLinkMultArgLength(src, emptyList, tgt_mult);
//select a maxLength that is slightly too short to hold 3 filenames
int maxLength = overhead + (int)(2.5 * (float)(1 + name1.length()));
//now test list of three filenames when there is room for only 2.5
callCount = createHardLinkMult(src, fileNames, tgt_mult, maxLength);
//check the request was completed in exactly two "chunks"
assertEquals(2, callCount);
String[] tgt_multNames = tgt_mult.list();
//sort directory listings before comparsion
Arrays.sort(fileNames);
Arrays.sort(tgt_multNames);
//and check the results were as expected in the dir tree
assertArrayEquals(fileNames, tgt_multNames);
//Test the case where maxlength is too small even for one filename.
//It should go ahead and try the single files.
//Clear the test dir tree
FileUtil.fullyDelete(tgt_mult);
assertFalse(tgt_mult.exists());
tgt_mult.mkdirs();
assertTrue(tgt_mult.exists() && tgt_mult.list().length == 0);
//set a limit size much smaller than a single filename
maxLength = overhead + (int)(0.5 * (float)(1 + name1.length()));
//attempt the method call
callCount = createHardLinkMult(src, fileNames, tgt_mult,
maxLength);
//should go ahead with each of the three single file names
assertEquals(3, callCount);
tgt_multNames = tgt_mult.list();
//sort directory listings before comparsion
Arrays.sort(fileNames);
Arrays.sort(tgt_multNames);
//and check the results were as expected in the dir tree
assertArrayEquals(fileNames, tgt_multNames);
}
/*
* Assume that this test won't usually be run on a Windows box.
* This test case allows testing of the correct syntax of the Windows
@ -392,18 +312,13 @@ public void testCreateHardLinkMultOversizeAndEmpty() throws IOException {
*/
@Test
public void testWindowsSyntax() {
class win extends HardLinkCGWin {};
class win extends HardLinkCGWin {}
//basic checks on array lengths
assertEquals(5, win.hardLinkCommand.length);
assertEquals(7, win.hardLinkMultPrefix.length);
assertEquals(7, win.hardLinkMultSuffix.length);
assertEquals(4, win.getLinkCountCommand.length);
assertTrue(win.hardLinkMultPrefix[4].equals("%f"));
//make sure "%f" was not munged
assertEquals(2, ("%f").length());
assertTrue(win.hardLinkMultDir.equals("\\%f"));
//make sure "\\%f" was munged correctly
assertEquals(3, ("\\%f").length());
assertTrue(win.getLinkCountCommand[1].equals("hardlink"));