HADOOP-15273.distcp can't handle remote stores with different checksum algorithms.

Contributed by Steve Loughran.
This commit is contained in:
Steve Loughran 2018-03-08 11:24:06 +00:00
parent 3bd6b1fd85
commit 7ef4d942dd
3 changed files with 29 additions and 19 deletions

View File

@ -534,11 +534,6 @@ private void validate() {
+ "mutually exclusive"); + "mutually exclusive");
} }
if (!syncFolder && skipCRC) {
throw new IllegalArgumentException(
"Skip CRC is valid only with update options");
}
if (!syncFolder && append) { if (!syncFolder && append) {
throw new IllegalArgumentException( throw new IllegalArgumentException(
"Append is valid only with update options"); "Append is valid only with update options");

View File

@ -210,15 +210,30 @@ private void compareCheckSums(FileSystem sourceFS, Path source,
throws IOException { throws IOException {
if (!DistCpUtils.checksumsAreEqual(sourceFS, source, sourceChecksum, if (!DistCpUtils.checksumsAreEqual(sourceFS, source, sourceChecksum,
targetFS, target)) { targetFS, target)) {
StringBuilder errorMessage = new StringBuilder("Check-sum mismatch between ") StringBuilder errorMessage =
new StringBuilder("Checksum mismatch between ")
.append(source).append(" and ").append(target).append("."); .append(source).append(" and ").append(target).append(".");
if (sourceFS.getFileStatus(source).getBlockSize() != boolean addSkipHint = false;
String srcScheme = sourceFS.getScheme();
String targetScheme = targetFS.getScheme();
if (!srcScheme.equals(targetScheme)
&& !(srcScheme.contains("hdfs") && targetScheme.contains("hdfs"))) {
// the filesystems are different and they aren't both hdfs connectors
errorMessage.append("Source and destination filesystems are of"
+ " different types\n")
.append("Their checksum algorithms may be incompatible");
addSkipHint = true;
} else if (sourceFS.getFileStatus(source).getBlockSize() !=
targetFS.getFileStatus(target).getBlockSize()) { targetFS.getFileStatus(target).getBlockSize()) {
errorMessage.append(" Source and target differ in block-size.") errorMessage.append(" Source and target differ in block-size.\n")
.append(" Use -pb to preserve block-sizes during copy.") .append(" Use -pb to preserve block-sizes during copy.");
.append(" Alternatively, skip checksum-checks altogether, using -skipCrc.") addSkipHint = true;
}
if (addSkipHint) {
errorMessage.append(" You can skip checksum-checks altogether "
+ " with -skipcrccheck.\n")
.append(" (NOTE: By skipping checksums, one runs the risk of " + .append(" (NOTE: By skipping checksums, one runs the risk of " +
"masking data-corruption during file-transfer.)"); "masking data-corruption during file-transfer.)\n");
} }
throw new IOException(errorMessage.toString()); throw new IOException(errorMessage.toString());
} }

View File

@ -45,6 +45,7 @@
import org.apache.hadoop.metrics2.MetricsRecordBuilder; import org.apache.hadoop.metrics2.MetricsRecordBuilder;
import org.apache.hadoop.security.AccessControlException; import org.apache.hadoop.security.AccessControlException;
import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.test.GenericTestUtils;
import org.apache.hadoop.tools.CopyListingFileStatus; import org.apache.hadoop.tools.CopyListingFileStatus;
import org.apache.hadoop.tools.DistCpConstants; import org.apache.hadoop.tools.DistCpConstants;
import org.apache.hadoop.tools.DistCpOptionSwitch; import org.apache.hadoop.tools.DistCpOptionSwitch;
@ -937,7 +938,7 @@ public void testPreserveBlockSizeAndReplication() {
} }
@Test(timeout=40000) @Test(timeout=40000)
public void testCopyFailOnBlockSizeDifference() { public void testCopyFailOnBlockSizeDifference() throws Exception {
try { try {
deleteState(); deleteState();
createSourceDataWithDifferentBlockSize(); createSourceDataWithDifferentBlockSize();
@ -964,12 +965,11 @@ public void testCopyFailOnBlockSizeDifference() {
Assert.fail("Copy should have failed because of block-size difference."); Assert.fail("Copy should have failed because of block-size difference.");
} }
catch (Exception exception) { catch (IOException exception) {
// Check that the exception suggests the use of -pb/-skipCrc. // Check that the exception suggests the use of -pb/-skipcrccheck.
Assert.assertTrue("Failure exception should have suggested the use of -pb.", Throwable cause = exception.getCause().getCause();
exception.getCause().getCause().getMessage().contains("pb")); GenericTestUtils.assertExceptionContains("-pb", cause);
Assert.assertTrue("Failure exception should have suggested the use of -skipCrc.", GenericTestUtils.assertExceptionContains("-skipcrccheck", cause);
exception.getCause().getCause().getMessage().contains("skipCrc"));
} }
} }