diff --git a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/util/DistCpUtils.java b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/util/DistCpUtils.java index 73c49bb8f1..9c1666b658 100644 --- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/util/DistCpUtils.java +++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/util/DistCpUtils.java @@ -199,6 +199,9 @@ public static void preserve(FileSystem targetFS, Path path, EnumSet attributes, boolean preserveRawXattrs) throws IOException { + // strip out those attributes we don't need any more + attributes.remove(FileAttribute.BLOCKSIZE); + attributes.remove(FileAttribute.CHECKSUMTYPE); // If not preserving anything from FileStatus, don't bother fetching it. FileStatus targetFileStatus = attributes.isEmpty() ? null : targetFS.getFileStatus(path); diff --git a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/util/TestDistCpUtils.java b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/util/TestDistCpUtils.java index 4acb022786..d76e227ffd 100644 --- a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/util/TestDistCpUtils.java +++ b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/util/TestDistCpUtils.java @@ -45,6 +45,7 @@ import com.google.common.collect.Lists; +import java.io.FileNotFoundException; import java.io.IOException; import java.io.OutputStream; import java.util.EnumSet; @@ -66,6 +67,7 @@ import static org.apache.hadoop.test.LambdaTestUtils.intercept; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotEquals; import static org.junit.Assert.assertTrue; import static org.assertj.core.api.Assertions.assertThat; @@ -191,15 +193,95 @@ public void testPreserveDefaults() throws IOException { DistCpUtils.preserve(fs, dst, srcStatus, attributes, false); - CopyListingFileStatus dstStatus = new CopyListingFileStatus(fs.getFileStatus(dst)); + assertStatusEqual(fs, dst, srcStatus); + } + + private void assertStatusEqual(final FileSystem fs, + final Path dst, + final CopyListingFileStatus srcStatus) throws IOException { + FileStatus destStatus = fs.getFileStatus(dst); + CopyListingFileStatus dstStatus = new CopyListingFileStatus( + destStatus); + + String text = String.format("Source %s; dest %s: wrong ", srcStatus, + destStatus); // FileStatus.equals only compares path field, must explicitly compare all fields - Assert.assertTrue(srcStatus.getPermission().equals(dstStatus.getPermission())); - Assert.assertTrue(srcStatus.getOwner().equals(dstStatus.getOwner())); - Assert.assertTrue(srcStatus.getGroup().equals(dstStatus.getGroup())); - Assert.assertTrue(srcStatus.getAccessTime() == dstStatus.getAccessTime()); - Assert.assertTrue(srcStatus.getModificationTime() == dstStatus.getModificationTime()); - Assert.assertTrue(srcStatus.getReplication() == dstStatus.getReplication()); + assertEquals(text + "permission", + srcStatus.getPermission(), dstStatus.getPermission()); + assertEquals(text + "owner", + srcStatus.getOwner(), dstStatus.getOwner()); + assertEquals(text + "group", + srcStatus.getGroup(), dstStatus.getGroup()); + assertEquals(text + "accessTime", + srcStatus.getAccessTime(), dstStatus.getAccessTime()); + assertEquals(text + "modificationTime", + srcStatus.getModificationTime(), dstStatus.getModificationTime()); + assertEquals(text + "replication", + srcStatus.getReplication(), dstStatus.getReplication()); + } + + private void assertStatusNotEqual(final FileSystem fs, + final Path dst, + final CopyListingFileStatus srcStatus) throws IOException { + FileStatus destStatus = fs.getFileStatus(dst); + CopyListingFileStatus dstStatus = new CopyListingFileStatus( + destStatus); + + String text = String.format("Source %s; dest %s: wrong ", + srcStatus, destStatus); + // FileStatus.equals only compares path field, + // must explicitly compare all fields + assertNotEquals(text + "permission", + srcStatus.getPermission(), dstStatus.getPermission()); + assertNotEquals(text + "owner", + srcStatus.getOwner(), dstStatus.getOwner()); + assertNotEquals(text + "group", + srcStatus.getGroup(), dstStatus.getGroup()); + assertNotEquals(text + "accessTime", + srcStatus.getAccessTime(), dstStatus.getAccessTime()); + assertNotEquals(text + "modificationTime", + srcStatus.getModificationTime(), dstStatus.getModificationTime()); + assertNotEquals(text + "replication", + srcStatus.getReplication(), dstStatus.getReplication()); + } + + + @Test + public void testSkipsNeedlessAttributes() throws Exception { + FileSystem fs = FileSystem.get(config); + + // preserve replication, block size, user, group, permission, + // checksum type and timestamps + + Path src = new Path("/tmp/testSkipsNeedlessAttributes/source"); + Path dst = new Path("/tmp/testSkipsNeedlessAttributes/dest"); + + // there is no need to actually create a source file, just a file + // status of one + CopyListingFileStatus srcStatus = new CopyListingFileStatus( + new FileStatus(0, false, 1, 32, 0, src)); + + // if an attribute is needed, preserve will fail to find the file + EnumSet attrs = EnumSet.of(FileAttribute.ACL, + FileAttribute.GROUP, + FileAttribute.PERMISSION, + FileAttribute.TIMES, + FileAttribute.XATTR); + for (FileAttribute attr : attrs) { + intercept(FileNotFoundException.class, () -> + DistCpUtils.preserve(fs, dst, srcStatus, + EnumSet.of(attr), + false)); + } + + // but with the preservation flags only used + // in file creation, this does not happen + DistCpUtils.preserve(fs, dst, srcStatus, + EnumSet.of( + FileAttribute.BLOCKSIZE, + FileAttribute.CHECKSUMTYPE), + false); } @Test @@ -258,16 +340,8 @@ public void testPreserveAclsforDefaultACL() throws IOException { // FileStatus.equals only compares path field, must explicitly compare all // fields - Assert.assertEquals("getPermission", srcStatus.getPermission(), - dstStatus.getPermission()); - Assert.assertEquals("Owner", srcStatus.getOwner(), dstStatus.getOwner()); - Assert.assertEquals("Group", srcStatus.getGroup(), dstStatus.getGroup()); - Assert.assertEquals("AccessTime", srcStatus.getAccessTime(), - dstStatus.getAccessTime()); - Assert.assertEquals("ModificationTime", srcStatus.getModificationTime(), - dstStatus.getModificationTime()); - Assert.assertEquals("Replication", srcStatus.getReplication(), - dstStatus.getReplication()); + assertStatusEqual(fs, dest, srcStatus); + Assert.assertArrayEquals(en1.toArray(), dd2.toArray()); } @@ -486,12 +560,7 @@ public void testPreserveNothingOnFile() throws IOException { CopyListingFileStatus dstStatus = new CopyListingFileStatus(fs.getFileStatus(dst)); // FileStatus.equals only compares path field, must explicitly compare all fields - Assert.assertFalse(srcStatus.getPermission().equals(dstStatus.getPermission())); - Assert.assertFalse(srcStatus.getOwner().equals(dstStatus.getOwner())); - Assert.assertFalse(srcStatus.getGroup().equals(dstStatus.getGroup())); - Assert.assertFalse(srcStatus.getAccessTime() == dstStatus.getAccessTime()); - Assert.assertFalse(srcStatus.getModificationTime() == dstStatus.getModificationTime()); - Assert.assertFalse(srcStatus.getReplication() == dstStatus.getReplication()); + assertStatusNotEqual(fs, dst, srcStatus); } @Test @@ -842,13 +911,7 @@ public void testPreserveOnFileUpwardRecursion() throws IOException { // FileStatus.equals only compares path field, must explicitly compare all fields // attributes of src -> f2 ? should be yes - CopyListingFileStatus f2Status = new CopyListingFileStatus(fs.getFileStatus(f2)); - Assert.assertTrue(srcStatus.getPermission().equals(f2Status.getPermission())); - Assert.assertTrue(srcStatus.getOwner().equals(f2Status.getOwner())); - Assert.assertTrue(srcStatus.getGroup().equals(f2Status.getGroup())); - Assert.assertTrue(srcStatus.getAccessTime() == f2Status.getAccessTime()); - Assert.assertTrue(srcStatus.getModificationTime() == f2Status.getModificationTime()); - Assert.assertTrue(srcStatus.getReplication() == f2Status.getReplication()); + assertStatusEqual(fs, f2, srcStatus); // attributes of src -> f1 ? should be no CopyListingFileStatus f1Status = new CopyListingFileStatus(fs.getFileStatus(f1)); @@ -1047,13 +1110,7 @@ public void testPreserveOnFileDownwardRecursion() throws IOException { // FileStatus.equals only compares path field, must explicitly compare all fields // attributes of src -> f0 ? should be yes - CopyListingFileStatus f0Status = new CopyListingFileStatus(fs.getFileStatus(f0)); - Assert.assertTrue(srcStatus.getPermission().equals(f0Status.getPermission())); - Assert.assertTrue(srcStatus.getOwner().equals(f0Status.getOwner())); - Assert.assertTrue(srcStatus.getGroup().equals(f0Status.getGroup())); - Assert.assertTrue(srcStatus.getAccessTime() == f0Status.getAccessTime()); - Assert.assertTrue(srcStatus.getModificationTime() == f0Status.getModificationTime()); - Assert.assertTrue(srcStatus.getReplication() == f0Status.getReplication()); + assertStatusEqual(fs, f0, srcStatus); // attributes of src -> f1 ? should be no CopyListingFileStatus f1Status = new CopyListingFileStatus(fs.getFileStatus(f1));