HADOOP-16932. distcp copy calls getFileStatus() needlessly and can fail against S3 (#1936)
Contributed by Steve Loughran. This strips out all the -p preservation options which have already been processed when uploading a file before deciding whether or not to query the far end for the status of the (existing/uploaded) file to see if any other attributes need changing. This will avoid 404 caching-related issues in S3, wherein a newly created file can have a 404 entry in the S3 load balancer's cache from the probes for the file's existence prior to the upload. It partially addresses a regression caused by HADOOP-8143, "Change distcp to have -pb on by default" that causes a resurfacing of HADOOP-13145, "In DistCp, prevent unnecessary getFileStatus call when not preserving metadata"
This commit is contained in:
parent
bffb43b00e
commit
20eec95867
@ -199,6 +199,9 @@ public static void preserve(FileSystem targetFS, Path path,
|
||||
EnumSet<FileAttribute> attributes,
|
||||
boolean preserveRawXattrs) throws IOException {
|
||||
|
||||
// strip out those attributes we don't need any more
|
||||
attributes.remove(FileAttribute.BLOCKSIZE);
|
||||
attributes.remove(FileAttribute.CHECKSUMTYPE);
|
||||
// If not preserving anything from FileStatus, don't bother fetching it.
|
||||
FileStatus targetFileStatus = attributes.isEmpty() ? null :
|
||||
targetFS.getFileStatus(path);
|
||||
|
@ -45,6 +45,7 @@
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.IOException;
|
||||
import java.io.OutputStream;
|
||||
import java.util.EnumSet;
|
||||
@ -66,6 +67,7 @@
|
||||
import static org.apache.hadoop.test.LambdaTestUtils.intercept;
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.Assert.assertFalse;
|
||||
import static org.junit.Assert.assertNotEquals;
|
||||
import static org.junit.Assert.assertTrue;
|
||||
import static org.assertj.core.api.Assertions.assertThat;
|
||||
|
||||
@ -191,15 +193,95 @@ public void testPreserveDefaults() throws IOException {
|
||||
|
||||
DistCpUtils.preserve(fs, dst, srcStatus, attributes, false);
|
||||
|
||||
CopyListingFileStatus dstStatus = new CopyListingFileStatus(fs.getFileStatus(dst));
|
||||
assertStatusEqual(fs, dst, srcStatus);
|
||||
}
|
||||
|
||||
private void assertStatusEqual(final FileSystem fs,
|
||||
final Path dst,
|
||||
final CopyListingFileStatus srcStatus) throws IOException {
|
||||
FileStatus destStatus = fs.getFileStatus(dst);
|
||||
CopyListingFileStatus dstStatus = new CopyListingFileStatus(
|
||||
destStatus);
|
||||
|
||||
String text = String.format("Source %s; dest %s: wrong ", srcStatus,
|
||||
destStatus);
|
||||
|
||||
// FileStatus.equals only compares path field, must explicitly compare all fields
|
||||
Assert.assertTrue(srcStatus.getPermission().equals(dstStatus.getPermission()));
|
||||
Assert.assertTrue(srcStatus.getOwner().equals(dstStatus.getOwner()));
|
||||
Assert.assertTrue(srcStatus.getGroup().equals(dstStatus.getGroup()));
|
||||
Assert.assertTrue(srcStatus.getAccessTime() == dstStatus.getAccessTime());
|
||||
Assert.assertTrue(srcStatus.getModificationTime() == dstStatus.getModificationTime());
|
||||
Assert.assertTrue(srcStatus.getReplication() == dstStatus.getReplication());
|
||||
assertEquals(text + "permission",
|
||||
srcStatus.getPermission(), dstStatus.getPermission());
|
||||
assertEquals(text + "owner",
|
||||
srcStatus.getOwner(), dstStatus.getOwner());
|
||||
assertEquals(text + "group",
|
||||
srcStatus.getGroup(), dstStatus.getGroup());
|
||||
assertEquals(text + "accessTime",
|
||||
srcStatus.getAccessTime(), dstStatus.getAccessTime());
|
||||
assertEquals(text + "modificationTime",
|
||||
srcStatus.getModificationTime(), dstStatus.getModificationTime());
|
||||
assertEquals(text + "replication",
|
||||
srcStatus.getReplication(), dstStatus.getReplication());
|
||||
}
|
||||
|
||||
private void assertStatusNotEqual(final FileSystem fs,
|
||||
final Path dst,
|
||||
final CopyListingFileStatus srcStatus) throws IOException {
|
||||
FileStatus destStatus = fs.getFileStatus(dst);
|
||||
CopyListingFileStatus dstStatus = new CopyListingFileStatus(
|
||||
destStatus);
|
||||
|
||||
String text = String.format("Source %s; dest %s: wrong ",
|
||||
srcStatus, destStatus);
|
||||
// FileStatus.equals only compares path field,
|
||||
// must explicitly compare all fields
|
||||
assertNotEquals(text + "permission",
|
||||
srcStatus.getPermission(), dstStatus.getPermission());
|
||||
assertNotEquals(text + "owner",
|
||||
srcStatus.getOwner(), dstStatus.getOwner());
|
||||
assertNotEquals(text + "group",
|
||||
srcStatus.getGroup(), dstStatus.getGroup());
|
||||
assertNotEquals(text + "accessTime",
|
||||
srcStatus.getAccessTime(), dstStatus.getAccessTime());
|
||||
assertNotEquals(text + "modificationTime",
|
||||
srcStatus.getModificationTime(), dstStatus.getModificationTime());
|
||||
assertNotEquals(text + "replication",
|
||||
srcStatus.getReplication(), dstStatus.getReplication());
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testSkipsNeedlessAttributes() throws Exception {
|
||||
FileSystem fs = FileSystem.get(config);
|
||||
|
||||
// preserve replication, block size, user, group, permission,
|
||||
// checksum type and timestamps
|
||||
|
||||
Path src = new Path("/tmp/testSkipsNeedlessAttributes/source");
|
||||
Path dst = new Path("/tmp/testSkipsNeedlessAttributes/dest");
|
||||
|
||||
// there is no need to actually create a source file, just a file
|
||||
// status of one
|
||||
CopyListingFileStatus srcStatus = new CopyListingFileStatus(
|
||||
new FileStatus(0, false, 1, 32, 0, src));
|
||||
|
||||
// if an attribute is needed, preserve will fail to find the file
|
||||
EnumSet<FileAttribute> attrs = EnumSet.of(FileAttribute.ACL,
|
||||
FileAttribute.GROUP,
|
||||
FileAttribute.PERMISSION,
|
||||
FileAttribute.TIMES,
|
||||
FileAttribute.XATTR);
|
||||
for (FileAttribute attr : attrs) {
|
||||
intercept(FileNotFoundException.class, () ->
|
||||
DistCpUtils.preserve(fs, dst, srcStatus,
|
||||
EnumSet.of(attr),
|
||||
false));
|
||||
}
|
||||
|
||||
// but with the preservation flags only used
|
||||
// in file creation, this does not happen
|
||||
DistCpUtils.preserve(fs, dst, srcStatus,
|
||||
EnumSet.of(
|
||||
FileAttribute.BLOCKSIZE,
|
||||
FileAttribute.CHECKSUMTYPE),
|
||||
false);
|
||||
}
|
||||
|
||||
@Test
|
||||
@ -258,16 +340,8 @@ public void testPreserveAclsforDefaultACL() throws IOException {
|
||||
|
||||
// FileStatus.equals only compares path field, must explicitly compare all
|
||||
// fields
|
||||
Assert.assertEquals("getPermission", srcStatus.getPermission(),
|
||||
dstStatus.getPermission());
|
||||
Assert.assertEquals("Owner", srcStatus.getOwner(), dstStatus.getOwner());
|
||||
Assert.assertEquals("Group", srcStatus.getGroup(), dstStatus.getGroup());
|
||||
Assert.assertEquals("AccessTime", srcStatus.getAccessTime(),
|
||||
dstStatus.getAccessTime());
|
||||
Assert.assertEquals("ModificationTime", srcStatus.getModificationTime(),
|
||||
dstStatus.getModificationTime());
|
||||
Assert.assertEquals("Replication", srcStatus.getReplication(),
|
||||
dstStatus.getReplication());
|
||||
assertStatusEqual(fs, dest, srcStatus);
|
||||
|
||||
Assert.assertArrayEquals(en1.toArray(), dd2.toArray());
|
||||
}
|
||||
|
||||
@ -486,12 +560,7 @@ public void testPreserveNothingOnFile() throws IOException {
|
||||
CopyListingFileStatus dstStatus = new CopyListingFileStatus(fs.getFileStatus(dst));
|
||||
|
||||
// FileStatus.equals only compares path field, must explicitly compare all fields
|
||||
Assert.assertFalse(srcStatus.getPermission().equals(dstStatus.getPermission()));
|
||||
Assert.assertFalse(srcStatus.getOwner().equals(dstStatus.getOwner()));
|
||||
Assert.assertFalse(srcStatus.getGroup().equals(dstStatus.getGroup()));
|
||||
Assert.assertFalse(srcStatus.getAccessTime() == dstStatus.getAccessTime());
|
||||
Assert.assertFalse(srcStatus.getModificationTime() == dstStatus.getModificationTime());
|
||||
Assert.assertFalse(srcStatus.getReplication() == dstStatus.getReplication());
|
||||
assertStatusNotEqual(fs, dst, srcStatus);
|
||||
}
|
||||
|
||||
@Test
|
||||
@ -842,13 +911,7 @@ public void testPreserveOnFileUpwardRecursion() throws IOException {
|
||||
|
||||
// FileStatus.equals only compares path field, must explicitly compare all fields
|
||||
// attributes of src -> f2 ? should be yes
|
||||
CopyListingFileStatus f2Status = new CopyListingFileStatus(fs.getFileStatus(f2));
|
||||
Assert.assertTrue(srcStatus.getPermission().equals(f2Status.getPermission()));
|
||||
Assert.assertTrue(srcStatus.getOwner().equals(f2Status.getOwner()));
|
||||
Assert.assertTrue(srcStatus.getGroup().equals(f2Status.getGroup()));
|
||||
Assert.assertTrue(srcStatus.getAccessTime() == f2Status.getAccessTime());
|
||||
Assert.assertTrue(srcStatus.getModificationTime() == f2Status.getModificationTime());
|
||||
Assert.assertTrue(srcStatus.getReplication() == f2Status.getReplication());
|
||||
assertStatusEqual(fs, f2, srcStatus);
|
||||
|
||||
// attributes of src -> f1 ? should be no
|
||||
CopyListingFileStatus f1Status = new CopyListingFileStatus(fs.getFileStatus(f1));
|
||||
@ -1047,13 +1110,7 @@ public void testPreserveOnFileDownwardRecursion() throws IOException {
|
||||
|
||||
// FileStatus.equals only compares path field, must explicitly compare all fields
|
||||
// attributes of src -> f0 ? should be yes
|
||||
CopyListingFileStatus f0Status = new CopyListingFileStatus(fs.getFileStatus(f0));
|
||||
Assert.assertTrue(srcStatus.getPermission().equals(f0Status.getPermission()));
|
||||
Assert.assertTrue(srcStatus.getOwner().equals(f0Status.getOwner()));
|
||||
Assert.assertTrue(srcStatus.getGroup().equals(f0Status.getGroup()));
|
||||
Assert.assertTrue(srcStatus.getAccessTime() == f0Status.getAccessTime());
|
||||
Assert.assertTrue(srcStatus.getModificationTime() == f0Status.getModificationTime());
|
||||
Assert.assertTrue(srcStatus.getReplication() == f0Status.getReplication());
|
||||
assertStatusEqual(fs, f0, srcStatus);
|
||||
|
||||
// attributes of src -> f1 ? should be no
|
||||
CopyListingFileStatus f1Status = new CopyListingFileStatus(fs.getFileStatus(f1));
|
||||
|
Loading…
Reference in New Issue
Block a user