From 84e999b35c12126daaf022537f4d44caef61f145 Mon Sep 17 00:00:00 2001 From: Ayush Saxena Date: Thu, 23 Feb 2023 00:59:41 +0530 Subject: [PATCH] HADOOP-18582. Addendum: Skip unnecessary cleanup logic in DistCp. (#5409) Followup to the original HADOOP-18582. Temporary path cleanup is re-enabled for -append jobs as these will create temporary files when creating or overwriting files. Contributed by Ayush Saxena --- .../apache/hadoop/tools/mapred/CopyCommitter.java | 5 +---- .../hadoop/tools/mapred/TestCopyCommitter.java | 12 +++++------- 2 files changed, 6 insertions(+), 11 deletions(-) diff --git a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/CopyCommitter.java b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/CopyCommitter.java index e5c74094e9..1cafb23feb 100644 --- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/CopyCommitter.java +++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/CopyCommitter.java @@ -153,10 +153,7 @@ private void cleanupTempFiles(JobContext context) { final boolean directWrite = conf.getBoolean( DistCpOptionSwitch.DIRECT_WRITE.getConfigLabel(), false); - final boolean append = conf.getBoolean( - DistCpOptionSwitch.APPEND.getConfigLabel(), false); - final boolean useTempTarget = !append && !directWrite; - if (!useTempTarget) { + if (directWrite) { return; } diff --git a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/mapred/TestCopyCommitter.java b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/mapred/TestCopyCommitter.java index f2dd246db5..6a537dc6e7 100644 --- a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/mapred/TestCopyCommitter.java +++ b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/mapred/TestCopyCommitter.java @@ -586,13 +586,11 @@ fs, new Path(targetBase + srcFilename), @Test public void testCommitWithCleanupTempFiles() throws IOException { - testCommitWithCleanup(true, false); - testCommitWithCleanup(false, true); - testCommitWithCleanup(true, true); - testCommitWithCleanup(false, false); + testCommitWithCleanup(true); + testCommitWithCleanup(false); } - private void testCommitWithCleanup(boolean append, boolean directWrite)throws IOException { + private void testCommitWithCleanup(boolean directWrite) throws IOException { TaskAttemptContext taskAttemptContext = getTaskAttemptContext(config); JobID jobID = taskAttemptContext.getTaskAttemptID().getJobID(); JobContext jobContext = new JobContextImpl( @@ -611,7 +609,7 @@ private void testCommitWithCleanup(boolean append, boolean directWrite)throws IO DistCpOptions options = new DistCpOptions.Builder( Collections.singletonList(new Path(sourceBase)), new Path("/out")) - .withAppend(append) + .withAppend(true) .withSyncFolder(true) .withDirectWrite(directWrite) .build(); @@ -631,7 +629,7 @@ private void testCommitWithCleanup(boolean append, boolean directWrite)throws IO null, taskAttemptContext); committer.commitJob(jobContext); - if (append || directWrite) { + if (directWrite) { ContractTestUtils.assertPathExists(fs, "Temp files should not be cleanup with append or direct option", tempFilePath); } else {