diff --git a/hadoop-project/pom.xml b/hadoop-project/pom.xml
index 3a2f9d9064..bee2e5810c 100644
--- a/hadoop-project/pom.xml
+++ b/hadoop-project/pom.xml
@@ -353,6 +353,12 @@
hadoop-distcp
${project.version}
+
+ org.apache.hadoop
+ hadoop-distcp
+ ${project.version}
+ test-jar
+
org.apache.hadoop
hadoop-datajoin
diff --git a/hadoop-tools/hadoop-aws/pom.xml b/hadoop-tools/hadoop-aws/pom.xml
index dfcb1b0fbe..c95f1e6cd9 100644
--- a/hadoop-tools/hadoop-aws/pom.xml
+++ b/hadoop-tools/hadoop-aws/pom.xml
@@ -252,5 +252,16 @@
test
jar
+
+ org.apache.hadoop
+ hadoop-distcp
+ test
+
+
+ org.apache.hadoop
+ hadoop-distcp
+ test
+ test-jar
+
diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md
index 95e3274dd7..fe81400d3d 100644
--- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md
+++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md
@@ -731,6 +731,15 @@ or in batch runs.
Smaller values should result in faster test runs, especially when the object
store is a long way away.
+DistCp tests targeting S3A support a configurable file size. The default is
+10 MB, but the configuration value is expressed in KB so that it can be tuned
+smaller to achieve faster test runs.
+
+
+ scale.test.distcp.file.size.kb
+ 10240
+
+
### Running the Tests
After completing the configuration, execute the test run through Maven.
diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/TestS3AContractDistCp.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/TestS3AContractDistCp.java
new file mode 100644
index 0000000000..7eb0afab13
--- /dev/null
+++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/TestS3AContractDistCp.java
@@ -0,0 +1,46 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.contract.s3a;
+
+import static org.apache.hadoop.fs.s3a.Constants.MIN_MULTIPART_THRESHOLD;
+import static org.apache.hadoop.fs.s3a.Constants.MULTIPART_SIZE;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.tools.contract.AbstractContractDistCpTest;
+
+/**
+ * Contract test suite covering S3A integration with DistCp.
+ */
+public class TestS3AContractDistCp extends AbstractContractDistCpTest {
+
+ private static final long MULTIPART_SETTING = 8 * 1024 * 1024; // 8 MB
+
+ @Override
+ protected Configuration createConfiguration() {
+ Configuration newConf = super.createConfiguration();
+ newConf.setLong(MIN_MULTIPART_THRESHOLD, MULTIPART_SETTING);
+ newConf.setLong(MULTIPART_SIZE, MULTIPART_SETTING);
+ return newConf;
+ }
+
+ @Override
+ protected S3AContract createContract(Configuration conf) {
+ return new S3AContract(conf);
+ }
+}
diff --git a/hadoop-tools/hadoop-azure/pom.xml b/hadoop-tools/hadoop-azure/pom.xml
index 8344ed719f..02a1240141 100644
--- a/hadoop-tools/hadoop-azure/pom.xml
+++ b/hadoop-tools/hadoop-azure/pom.xml
@@ -193,6 +193,25 @@
test-jar
+
+ org.apache.hadoop
+ hadoop-mapreduce-client-jobclient
+ test
+
+
+
+ org.apache.hadoop
+ hadoop-distcp
+ test
+
+
+
+ org.apache.hadoop
+ hadoop-distcp
+ test
+ test-jar
+
+
org.mockito
mockito-all
diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/contract/TestAzureNativeContractDistCp.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/contract/TestAzureNativeContractDistCp.java
new file mode 100644
index 0000000000..a3750d46ab
--- /dev/null
+++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/contract/TestAzureNativeContractDistCp.java
@@ -0,0 +1,33 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.azure.contract;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.tools.contract.AbstractContractDistCpTest;
+
+/**
+ * Contract test suite covering WASB integration with DistCp.
+ */
+public class TestAzureNativeContractDistCp extends AbstractContractDistCpTest {
+
+ @Override
+ protected NativeAzureFileSystemContract createContract(Configuration conf) {
+ return new NativeAzureFileSystemContract(conf);
+ }
+}
diff --git a/hadoop-tools/hadoop-distcp/pom.xml b/hadoop-tools/hadoop-distcp/pom.xml
index 4ea38c302c..2cec22f0ad 100644
--- a/hadoop-tools/hadoop-distcp/pom.xml
+++ b/hadoop-tools/hadoop-distcp/pom.xml
@@ -186,6 +186,22 @@
+
+
+ prepare-jar
+ prepare-package
+
+ jar
+
+
+
+ prepare-test-jar
+ prepare-package
+
+ test-jar
+
+
+
org.apache.maven.plugins
diff --git a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/util/DistCpUtils.java b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/util/DistCpUtils.java
index d3d7677ecf..1784c5de51 100644
--- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/util/DistCpUtils.java
+++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/util/DistCpUtils.java
@@ -195,9 +195,13 @@ public static void preserve(FileSystem targetFS, Path path,
EnumSet attributes,
boolean preserveRawXattrs) throws IOException {
- FileStatus targetFileStatus = targetFS.getFileStatus(path);
- String group = targetFileStatus.getGroup();
- String user = targetFileStatus.getOwner();
+ // If not preserving anything from FileStatus, don't bother fetching it.
+ FileStatus targetFileStatus = attributes.isEmpty() ? null :
+ targetFS.getFileStatus(path);
+ String group = targetFileStatus == null ? null :
+ targetFileStatus.getGroup();
+ String user = targetFileStatus == null ? null :
+ targetFileStatus.getOwner();
boolean chown = false;
if (attributes.contains(FileAttribute.ACL)) {
diff --git a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/contract/AbstractContractDistCpTest.java b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/contract/AbstractContractDistCpTest.java
new file mode 100644
index 0000000000..a4f50c71ab
--- /dev/null
+++ b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/contract/AbstractContractDistCpTest.java
@@ -0,0 +1,204 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.tools.contract;
+
+import static org.apache.hadoop.fs.contract.ContractTestUtils.*;
+import static org.junit.Assert.*;
+
+import java.util.Arrays;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.contract.AbstractFSContractTestBase;
+import org.apache.hadoop.fs.contract.ContractTestUtils;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.test.GenericTestUtils;
+import org.apache.hadoop.tools.DistCp;
+import org.apache.hadoop.tools.DistCpOptions;
+
+import org.junit.Before;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.TestName;
+
+/**
+ * Contract test suite covering a file system's integration with DistCp. The
+ * tests coordinate two file system instances: one "local", which is the local
+ * file system, and the other "remote", which is the file system implementation
+ * under test. The tests in the suite cover both copying from local to remote
+ * (e.g. a backup use case) and copying from remote to local (e.g. a restore use
+ * case).
+ */
+public abstract class AbstractContractDistCpTest
+ extends AbstractFSContractTestBase {
+
+ @Rule
+ public TestName testName = new TestName();
+
+ private Configuration conf;
+ private FileSystem localFS, remoteFS;
+ private Path localDir, remoteDir;
+
+ @Override
+ protected Configuration createConfiguration() {
+ Configuration newConf = new Configuration();
+ newConf.set("mapred.job.tracker", "local");
+ return newConf;
+ }
+
+ @Before
+ @Override
+ public void setup() throws Exception {
+ super.setup();
+ conf = getContract().getConf();
+ localFS = FileSystem.getLocal(conf);
+ remoteFS = getFileSystem();
+ // Test paths are isolated by concrete subclass name and test method name.
+ // All paths are fully qualified including scheme (not taking advantage of
+ // default file system), so if something fails, the messages will make it
+ // clear which paths are local and which paths are remote.
+ Path testSubDir = new Path(getClass().getSimpleName(),
+ testName.getMethodName());
+ localDir = localFS.makeQualified(new Path(new Path(
+ GenericTestUtils.getTestDir().toURI()), testSubDir));
+ mkdirs(localFS, localDir);
+ remoteDir = remoteFS.makeQualified(
+ new Path(getContract().getTestPath(), testSubDir));
+ mkdirs(remoteFS, remoteDir);
+ }
+
+ @Test
+ public void deepDirectoryStructureToRemote() throws Exception {
+ describe("copy a deep directory structure from local to remote");
+ deepDirectoryStructure(localFS, localDir, remoteFS, remoteDir);
+ }
+
+ @Test
+ public void largeFilesToRemote() throws Exception {
+ describe("copy multiple large files from local to remote");
+ largeFiles(localFS, localDir, remoteFS, remoteDir);
+ }
+
+ @Test
+ public void deepDirectoryStructureFromRemote() throws Exception {
+ describe("copy a deep directory structure from remote to local");
+ deepDirectoryStructure(remoteFS, remoteDir, localFS, localDir);
+ }
+
+ @Test
+ public void largeFilesFromRemote() throws Exception {
+ describe("copy multiple large files from remote to local");
+ largeFiles(remoteFS, remoteDir, localFS, localDir);
+ }
+
+ /**
+ * Executes a test using a file system sub-tree with multiple nesting levels.
+ *
+ * @param srcFS source FileSystem
+ * @param srcDir source directory
+ * @param dstFS destination FileSystem
+ * @param dstDir destination directory
+ * @throws Exception if there is a failure
+ */
+ private void deepDirectoryStructure(FileSystem srcFS, Path srcDir,
+ FileSystem dstFS, Path dstDir) throws Exception {
+ Path inputDir = new Path(srcDir, "inputDir");
+ Path inputSubDir1 = new Path(inputDir, "subDir1");
+ Path inputSubDir2 = new Path(inputDir, "subDir2/subDir3");
+ Path inputFile1 = new Path(inputDir, "file1");
+ Path inputFile2 = new Path(inputSubDir1, "file2");
+ Path inputFile3 = new Path(inputSubDir2, "file3");
+ mkdirs(srcFS, inputSubDir1);
+ mkdirs(srcFS, inputSubDir2);
+ byte[] data1 = dataset(100, 33, 43);
+ createFile(srcFS, inputFile1, true, data1);
+ byte[] data2 = dataset(200, 43, 53);
+ createFile(srcFS, inputFile2, true, data2);
+ byte[] data3 = dataset(300, 53, 63);
+ createFile(srcFS, inputFile3, true, data3);
+ Path target = new Path(dstDir, "outputDir");
+ runDistCp(inputDir, target);
+ ContractTestUtils.assertIsDirectory(dstFS, target);
+ verifyFileContents(dstFS, new Path(target, "inputDir/file1"), data1);
+ verifyFileContents(dstFS,
+ new Path(target, "inputDir/subDir1/file2"), data2);
+ verifyFileContents(dstFS,
+ new Path(target, "inputDir/subDir2/subDir3/file3"), data3);
+ }
+
+ /**
+ * Executes a test using multiple large files.
+ *
+ * @param srcFS source FileSystem
+ * @param srcDir source directory
+ * @param dstFS destination FileSystem
+ * @param dstDir destination directory
+ * @throws Exception if there is a failure
+ */
+ private void largeFiles(FileSystem srcFS, Path srcDir, FileSystem dstFS,
+ Path dstDir) throws Exception {
+ Path inputDir = new Path(srcDir, "inputDir");
+ Path inputFile1 = new Path(inputDir, "file1");
+ Path inputFile2 = new Path(inputDir, "file2");
+ Path inputFile3 = new Path(inputDir, "file3");
+ mkdirs(srcFS, inputDir);
+ int fileSizeKb = conf.getInt("scale.test.distcp.file.size.kb", 10 * 1024);
+ int fileSizeMb = fileSizeKb * 1024;
+ getLog().info("{} with file size {}", testName.getMethodName(), fileSizeMb);
+ byte[] data1 = dataset((fileSizeMb + 1) * 1024 * 1024, 33, 43);
+ createFile(srcFS, inputFile1, true, data1);
+ byte[] data2 = dataset((fileSizeMb + 2) * 1024 * 1024, 43, 53);
+ createFile(srcFS, inputFile2, true, data2);
+ byte[] data3 = dataset((fileSizeMb + 3) * 1024 * 1024, 53, 63);
+ createFile(srcFS, inputFile3, true, data3);
+ Path target = new Path(dstDir, "outputDir");
+ runDistCp(inputDir, target);
+ ContractTestUtils.assertIsDirectory(dstFS, target);
+ verifyFileContents(dstFS, new Path(target, "inputDir/file1"), data1);
+ verifyFileContents(dstFS, new Path(target, "inputDir/file2"), data2);
+ verifyFileContents(dstFS, new Path(target, "inputDir/file3"), data3);
+ }
+
+ /**
+ * Executes DistCp and asserts that the job finished successfully.
+ *
+ * @param src source path
+ * @param dst destination path
+ * @throws Exception if there is a failure
+ */
+ private void runDistCp(Path src, Path dst) throws Exception {
+ DistCpOptions options = new DistCpOptions(Arrays.asList(src), dst);
+ Job job = new DistCp(conf, options).execute();
+ assertNotNull("Unexpected null job returned from DistCp execution.", job);
+ assertTrue("DistCp job did not complete.", job.isComplete());
+ assertTrue("DistCp job did not complete successfully.", job.isSuccessful());
+ }
+
+ /**
+ * Creates a directory and any ancestor directories required.
+ *
+ * @param fs FileSystem in which to create directories
+ * @param dir path of directory to create
+ * @throws Exception if there is a failure
+ */
+ private static void mkdirs(FileSystem fs, Path dir) throws Exception {
+ assertTrue("Failed to mkdir " + dir, fs.mkdirs(dir));
+ }
+}