diff --git a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/SimpleCopyListing.java b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/SimpleCopyListing.java index 900ce6296d..fb7ace5f1e 100644 --- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/SimpleCopyListing.java +++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/SimpleCopyListing.java @@ -65,7 +65,8 @@ * Note: The SimpleCopyListing doesn't handle wild-cards in the input-paths. */ public class SimpleCopyListing extends CopyListing { - private static final Logger LOG = LoggerFactory.getLogger(SimpleCopyListing.class); + public static final Logger LOG = + LoggerFactory.getLogger(SimpleCopyListing.class); public static final int DEFAULT_FILE_STATUS_SIZE = 1000; public static final boolean DEFAULT_RANDOMIZE_FILE_LISTING = true; @@ -601,7 +602,7 @@ public WorkReport processItem( } private void printStats() { - LOG.info("Paths (files+dirs) cnt = {}; dirCnt = ", totalPaths, totalDirs); + LOG.info("Paths (files+dirs) cnt = {}; dirCnt = {}", totalPaths, totalDirs); } private void maybePrintStats() { diff --git a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/CopyCommitter.java b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/CopyCommitter.java index e346d0b938..33ab3ee4eb 100644 --- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/CopyCommitter.java +++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/CopyCommitter.java @@ -564,12 +564,15 @@ private Path listTargetFiles(final Configuration conf, // thread count is picked up from the job int threads = conf.getInt(DistCpConstants.CONF_LABEL_LISTSTATUS_THREADS, DistCpConstants.DEFAULT_LISTSTATUS_THREADS); + boolean useIterator = + conf.getBoolean(DistCpConstants.CONF_LABEL_USE_ITERATOR, false); LOG.info("Scanning destination directory {} with thread count: {}", targetFinalPath, threads); DistCpOptions options = new DistCpOptions.Builder(targets, resultNonePath) .withOverwrite(overwrite) .withSyncFolder(syncFolder) .withNumListstatusThreads(threads) + .withUseIterator(useIterator) .build(); DistCpContext distCpContext = new DistCpContext(options); distCpContext.setTargetPathExists(targetPathExists); diff --git a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/contract/AbstractContractDistCpTest.java b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/contract/AbstractContractDistCpTest.java index 202ead69a1..fab14d138b 100644 --- a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/contract/AbstractContractDistCpTest.java +++ b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/contract/AbstractContractDistCpTest.java @@ -42,6 +42,7 @@ import org.apache.hadoop.tools.DistCp; import org.apache.hadoop.tools.DistCpConstants; import org.apache.hadoop.tools.DistCpOptions; +import org.apache.hadoop.tools.SimpleCopyListing; import org.apache.hadoop.tools.mapred.CopyMapper; import org.apache.hadoop.tools.util.DistCpTestUtils; import org.apache.hadoop.util.functional.RemoteIterators; @@ -628,11 +629,17 @@ public void testDistCpWithIterator() throws Exception { GenericTestUtils .createFiles(remoteFS, source, getDepth(), getWidth(), getWidth()); - DistCpTestUtils.assertRunDistCp(DistCpConstants.SUCCESS, source.toString(), - dest.toString(), "-useiterator", conf); + GenericTestUtils.LogCapturer log = + GenericTestUtils.LogCapturer.captureLogs(SimpleCopyListing.LOG); - Assertions - .assertThat(RemoteIterators.toList(localFS.listFiles(dest, true))) + DistCpTestUtils.assertRunDistCp(DistCpConstants.SUCCESS, source.toString(), + dest.toString(), "-useiterator -update -delete", conf); + + // Check the target listing was also done using iterator. + Assertions.assertThat(log.getOutput()).contains( + "Building listing using iterator mode for " + dest.toString()); + + Assertions.assertThat(RemoteIterators.toList(localFS.listFiles(dest, true))) .describedAs("files").hasSize(getTotalFiles()); }