diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt index 51187471cf..573b2de8f5 100644 --- a/hadoop-common-project/hadoop-common/CHANGES.txt +++ b/hadoop-common-project/hadoop-common/CHANGES.txt @@ -768,6 +768,9 @@ Release 2.8.0 - UNRELEASED HADOOP-12358. Add -safely flag to rm to prompt when deleting many files. (xyao via wang) + HADOOP-12384. Add "-direct" flag option for fs copy so that user can choose + not to create "._COPYING_" file (J.Andreina via vinayakumarb) + OPTIMIZATIONS HADOOP-11785. Reduce the number of listStatus operation in distcp diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/CommandWithDestination.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/CommandWithDestination.java index 65e52fd3b0..2fdde3fcdf 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/CommandWithDestination.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/CommandWithDestination.java @@ -61,7 +61,8 @@ abstract class CommandWithDestination extends FsCommand { private boolean verifyChecksum = true; private boolean writeChecksum = true; private boolean lazyPersist = false; - + private boolean direct = false; + /** * The name of the raw xattr namespace. It would be nice to use * XAttr.RAW.name() but we can't reference the hadoop-hdfs project. @@ -94,7 +95,11 @@ protected void setVerifyChecksum(boolean flag) { protected void setWriteChecksum(boolean flag) { writeChecksum = flag; } - + + protected void setDirectWrite(boolean flag) { + direct = flag; + } + /** * If true, the last modified time, last access time, * owner, group and permission information of the source @@ -372,9 +377,11 @@ private boolean checkPathsForReservedRaw(Path src, Path target) } /** - * Copies the stream contents to a temporary file. If the copy is + * If direct write is disabled ,copies the stream contents to a temporary + * file "._COPYING_". If the copy is * successful, the temporary file will be renamed to the real path, * else the temporary file will be deleted. + * if direct write is enabled , then creation temporary file is skipped. * @param in the input stream for the copy * @param target where to store the contents of the stream * @throws IOException if copy fails @@ -386,10 +393,12 @@ protected void copyStreamToTarget(InputStream in, PathData target) } TargetFileSystem targetFs = new TargetFileSystem(target.fs); try { - PathData tempTarget = target.suffix("._COPYING_"); + PathData tempTarget = direct ? target : target.suffix("._COPYING_"); targetFs.setWriteChecksum(writeChecksum); - targetFs.writeStreamToFile(in, tempTarget, lazyPersist); - targetFs.rename(tempTarget, target); + targetFs.writeStreamToFile(in, tempTarget, lazyPersist, direct); + if (!direct) { + targetFs.rename(tempTarget, target); + } } finally { targetFs.close(); // last ditch effort to ensure temp file is removed } @@ -459,10 +468,11 @@ private static class TargetFileSystem extends FilterFileSystem { } void writeStreamToFile(InputStream in, PathData target, - boolean lazyPersist) throws IOException { + boolean lazyPersist, boolean direct) + throws IOException { FSDataOutputStream out = null; try { - out = create(target, lazyPersist); + out = create(target, lazyPersist, direct); IOUtils.copyBytes(in, out, getConf(), true); } finally { IOUtils.closeStream(out); // just in case copyBytes didn't @@ -470,7 +480,8 @@ void writeStreamToFile(InputStream in, PathData target, } // tag created files as temp files - FSDataOutputStream create(PathData item, boolean lazyPersist) + FSDataOutputStream create(PathData item, boolean lazyPersist, + boolean direct) throws IOException { try { if (lazyPersist) { @@ -488,7 +499,9 @@ FSDataOutputStream create(PathData item, boolean lazyPersist) return create(item.path, true); } } finally { // might have been created but stream was interrupted - deleteOnExit(item.path); + if (!direct) { + deleteOnExit(item.path); + } } } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/CopyCommands.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/CopyCommands.java index afd11150d9..d16d27ef54 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/CopyCommands.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/CopyCommands.java @@ -133,7 +133,8 @@ protected void processPath(PathData src) throws IOException { static class Cp extends CommandWithDestination { public static final String NAME = "cp"; - public static final String USAGE = "[-f] [-p | -p[topax]] ... "; + public static final String USAGE = + "[-f] [-p | -p[topax]] [-d] ... "; public static final String DESCRIPTION = "Copy files that match the file pattern to a " + "destination. When copying multiple files, the destination " + @@ -147,13 +148,15 @@ static class Cp extends CommandWithDestination { "if (1) they are supported (HDFS only) and, (2) all of the source and " + "target pathnames are in the /.reserved/raw hierarchy. raw namespace " + "xattr preservation is determined solely by the presence (or absence) " + - "of the /.reserved/raw prefix and not by the -p option.\n"; + "of the /.reserved/raw prefix and not by the -p option. Passing -d "+ + "will skip creation of temporary file(._COPYING_).\n"; @Override protected void processOptions(LinkedList args) throws IOException { popPreserveOption(args); - CommandFormat cf = new CommandFormat(2, Integer.MAX_VALUE, "f"); + CommandFormat cf = new CommandFormat(2, Integer.MAX_VALUE, "f", "d"); cf.parse(args); + setDirectWrite(cf.getOpt("d")); setOverwrite(cf.getOpt("f")); // should have a -r option setRecursive(true); @@ -215,7 +218,8 @@ protected void processOptions(LinkedList args) */ public static class Put extends CommandWithDestination { public static final String NAME = "put"; - public static final String USAGE = "[-f] [-p] [-l] ... "; + public static final String USAGE = + "[-f] [-p] [-l] [-d] ... "; public static final String DESCRIPTION = "Copy files from the local file system " + "into fs. Copying fails if the file already " + @@ -225,15 +229,18 @@ public static class Put extends CommandWithDestination { " -f : Overwrites the destination if it already exists.\n" + " -l : Allow DataNode to lazily persist the file to disk. Forces\n" + " replication factor of 1. This flag will result in reduced\n" + - " durability. Use with care.\n"; + " durability. Use with care.\n" + + " -d : Skip creation of temporary file(._COPYING_).\n"; @Override protected void processOptions(LinkedList args) throws IOException { - CommandFormat cf = new CommandFormat(1, Integer.MAX_VALUE, "f", "p", "l"); + CommandFormat cf = + new CommandFormat(1, Integer.MAX_VALUE, "f", "p", "l", "d"); cf.parse(args); setOverwrite(cf.getOpt("f")); setPreserve(cf.getOpt("p")); setLazyPersist(cf.getOpt("l")); + setDirectWrite(cf.getOpt("d")); getRemoteDestination(args); // should have a -r option setRecursive(true); diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestFsShellCopy.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestFsShellCopy.java index c0a6b20def..1d626f9803 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestFsShellCopy.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestFsShellCopy.java @@ -485,6 +485,52 @@ public void testGetWindowsLocalPath() throws Exception { checkPath(dstPath, false); } + @Test + public void testDirectCopy() throws Exception { + Path testRoot = new Path(testRootDir, "testPutFile"); + lfs.delete(testRoot, true); + lfs.mkdirs(testRoot); + + Path target_COPYING_File = new Path(testRoot, "target._COPYING_"); + Path target_File = new Path(testRoot, "target"); + Path srcFile = new Path(testRoot, new Path("srcFile")); + lfs.createNewFile(srcFile); + + // If direct write is false , then creation of "file1" ,will delete file + // (file1._COPYING_) if already exist. + checkDirectCopy(srcFile, target_File, target_COPYING_File, false); + shell.run(new String[] { "-rm", target_File.toString() }); + + // If direct write is true , then creation of "file1", will not create a + // temporary file and will not delete (file1._COPYING_) if already exist. + checkDirectCopy(srcFile, target_File, target_COPYING_File, true); + } + + private void checkDirectCopy(Path srcFile, Path target_File, + Path target_COPYING_File,boolean direct) throws Exception { + int directWriteExitCode = direct ? 0 : 1; + shell + .run(new String[] { "-copyFromLocal", srcFile.toString(), + target_COPYING_File.toString() }); + int srcFileexist = shell + .run(new String[] { "-cat", target_COPYING_File.toString() }); + assertEquals(0, srcFileexist); + + if (!direct) { + shell.run(new String[] { "-copyFromLocal", srcFile.toString(), + target_File.toString() }); + } else { + shell.run(new String[] { "-copyFromLocal", "-d", srcFile.toString(), + target_File.toString() }); + } + // cat of "target._COPYING_" will return exitcode : + // as 1(file does not exist), if direct write is false. + // as 0, if direct write is true. + srcFileexist = shell.run(new String[] { "-cat", + target_COPYING_File.toString() }); + assertEquals(directWriteExitCode, srcFileexist); + } + private void createFile(Path ... paths) throws IOException { for (Path path : paths) { FSDataOutputStream out = lfs.create(path); diff --git a/hadoop-common-project/hadoop-common/src/test/resources/testConf.xml b/hadoop-common-project/hadoop-common/src/test/resources/testConf.xml index aa9ee76c7f..7513843997 100644 --- a/hadoop-common-project/hadoop-common/src/test/resources/testConf.xml +++ b/hadoop-common-project/hadoop-common/src/test/resources/testConf.xml @@ -336,7 +336,7 @@ RegexpComparator - ^-cp \[-f\] \[-p \| -p\[topax\]\] <src> \.\.\. <dst> :\s* + ^-cp \[-f\] \[-p \| -p\[topax\]\] \[-d\] <src> \.\.\. <dst> :\s* RegexpComparator @@ -376,7 +376,11 @@ RegexpComparator - ^\s*\(or absence\) of the \/\.reserved\/raw prefix and not by the -p option.( )* + ^\s*\(or absence\) of the \/\.reserved\/raw prefix and not by the -p option\. Passing -d( )* + + + RegexpComparator + ^\s*will skip creation of temporary file\(<dst>\._COPYING_\)\.( )* @@ -472,7 +476,7 @@ RegexpComparator - ^-put \[-f\] \[-p\] \[-l\] <localsrc> \.\.\. <dst> :( )* + ^-put \[-f\] \[-p\] \[-l\] \[-d\] <localsrc> \.\.\. <dst> :( )* RegexpComparator @@ -506,6 +510,10 @@ RegexpComparator ^\s*durability. Use with care.( )* + + RegexpComparator + ^\s*-d Skip creation of temporary file\(<dst>\._COPYING_\).( )* + @@ -519,7 +527,7 @@ RegexpComparator - ^-copyFromLocal \[-f\] \[-p\] \[-l\] <localsrc> \.\.\. <dst> :\s* + ^-copyFromLocal \[-f\] \[-p\] \[-l\] \[-d\] <localsrc> \.\.\. <dst> :\s* RegexpComparator