diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index 5a0f6f2a49..574faa253a 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -522,6 +522,9 @@ Release 2.7.1 - UNRELEASED HDFS-8127. NameNode Failover during HA upgrade can cause DataNode to finalize upgrade. (jing9) + HDFS-8151. Always use snapshot path as source when invalid snapshot names + are used for diff based distcp. (jing9) + Release 2.7.0 - UNRELEASED INCOMPATIBLE CHANGES diff --git a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpSync.java b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpSync.java index 8e71b6f53d..5bf638d5b3 100644 --- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpSync.java +++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpSync.java @@ -47,8 +47,8 @@ static boolean sync(DistCpOptions inputOptions, Configuration conf) List sourcePaths = inputOptions.getSourcePaths(); if (sourcePaths.size() != 1) { // we only support one source dir which must be a snapshottable directory - DistCp.LOG.warn(sourcePaths.size() + " source paths are provided"); - return false; + throw new IllegalArgumentException(sourcePaths.size() + + " source paths are provided"); } final Path sourceDir = sourcePaths.get(0); final Path targetDir = inputOptions.getTargetPath(); @@ -59,15 +59,17 @@ static boolean sync(DistCpOptions inputOptions, Configuration conf) // DistributedFileSystem. if (!(sfs instanceof DistributedFileSystem) || !(tfs instanceof DistributedFileSystem)) { - DistCp.LOG.warn("To use diff-based distcp, the FileSystems needs to" + - " be DistributedFileSystem"); - return false; + throw new IllegalArgumentException("The FileSystems needs to" + + " be DistributedFileSystem for using snapshot-diff-based distcp"); } final DistributedFileSystem sourceFs = (DistributedFileSystem) sfs; final DistributedFileSystem targetFs= (DistributedFileSystem) tfs; // make sure targetFS has no change between from and the current states if (!checkNoChange(inputOptions, targetFs, targetDir)) { + // set the source path using the snapshot path + inputOptions.setSourcePaths(Arrays.asList(getSourceSnapshotPath(sourceDir, + inputOptions.getToSnapshot()))); return false; } diff --git a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestDistCpSync.java b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestDistCpSync.java index 75d1de5058..0a9a11fb66 100644 --- a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestDistCpSync.java +++ b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestDistCpSync.java @@ -88,24 +88,37 @@ public void tearDown() throws Exception { public void testFallback() throws Exception { // the source/target dir are not snapshottable dir Assert.assertFalse(DistCpSync.sync(options, conf)); + // make sure the source path has been updated to the snapshot path + final Path spath = new Path(source, + HdfsConstants.DOT_SNAPSHOT_DIR + Path.SEPARATOR + "s2"); + Assert.assertEquals(spath, options.getSourcePaths().get(0)); + // reset source path in options + options.setSourcePaths(Arrays.asList(source)); // the source/target does not have the given snapshots dfs.allowSnapshot(source); dfs.allowSnapshot(target); Assert.assertFalse(DistCpSync.sync(options, conf)); + Assert.assertEquals(spath, options.getSourcePaths().get(0)); + // reset source path in options + options.setSourcePaths(Arrays.asList(source)); dfs.createSnapshot(source, "s1"); dfs.createSnapshot(source, "s2"); dfs.createSnapshot(target, "s1"); Assert.assertTrue(DistCpSync.sync(options, conf)); + // reset source paths in options options.setSourcePaths(Arrays.asList(source)); - // changes have been made in target final Path subTarget = new Path(target, "sub"); dfs.mkdirs(subTarget); Assert.assertFalse(DistCpSync.sync(options, conf)); + // make sure the source path has been updated to the snapshot path + Assert.assertEquals(spath, options.getSourcePaths().get(0)); + // reset source paths in options + options.setSourcePaths(Arrays.asList(source)); dfs.delete(subTarget, true); Assert.assertTrue(DistCpSync.sync(options, conf)); }