From ee0c722dc8fb81ec902cd1da5958ce5adb0ab08f Mon Sep 17 00:00:00 2001 From: Steve Loughran Date: Thu, 29 Sep 2016 16:59:33 +0100 Subject: [PATCH] HADOOP-13164 Optimize S3AFileSystem::deleteUnnecessaryFakeDirectories. Contributed by Rajesh Balamohan. --- .../contract/AbstractFSContractTestBase.java | 2 +- .../apache/hadoop/fs/s3a/S3AFileSystem.java | 63 +++++++------- .../hadoop/fs/s3a/S3AInstrumentation.java | 10 +++ .../org/apache/hadoop/fs/s3a/Statistic.java | 4 + .../fs/s3a/ITestS3AFileOperationCost.java | 85 +++++++++++++++++++ .../apache/hadoop/fs/s3a/S3ATestUtils.java | 13 ++- 6 files changed, 143 insertions(+), 34 deletions(-) diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractFSContractTestBase.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractFSContractTestBase.java index baea968b67..b2e68f5316 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractFSContractTestBase.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractFSContractTestBase.java @@ -359,7 +359,7 @@ protected void assertMinusOne(String text, int result) { assertEquals(text + " wrong read result " + result, -1, result); } - boolean rename(Path src, Path dst) throws IOException { + protected boolean rename(Path src, Path dst) throws IOException { return getFileSystem().rename(src, dst); } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java index dffef15100..e3b2c63907 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java @@ -668,7 +668,7 @@ private boolean innerRename(Path src, Path dst) throws IOException, copyFile(summary.getKey(), newDstKey, summary.getSize()); if (keysToDelete.size() == MAX_ENTRIES_TO_DELETE) { - removeKeys(keysToDelete, true); + removeKeys(keysToDelete, true, false); } } @@ -676,7 +676,7 @@ private boolean innerRename(Path src, Path dst) throws IOException, objects = continueListObjects(objects); } else { if (!keysToDelete.isEmpty()) { - removeKeys(keysToDelete, false); + removeKeys(keysToDelete, false, false); } break; } @@ -924,17 +924,25 @@ public void incrementPutProgressStatistics(String key, long bytes) { * @param keysToDelete collection of keys to delete on the s3-backend * @param clearKeys clears the keysToDelete-list after processing the list * when set to true + * @param deleteFakeDir indicates whether this is for deleting fake dirs */ private void removeKeys(List keysToDelete, - boolean clearKeys) throws AmazonClientException { + boolean clearKeys, boolean deleteFakeDir) throws AmazonClientException { + if (keysToDelete.isEmpty()) { + // no keys + return; + } if (enableMultiObjectsDelete) { deleteObjects(new DeleteObjectsRequest(bucket).withKeys(keysToDelete)); - instrumentation.fileDeleted(keysToDelete.size()); } else { for (DeleteObjectsRequest.KeyVersion keyVersion : keysToDelete) { deleteObject(keyVersion.getKey()); } + } + if (!deleteFakeDir) { instrumentation.fileDeleted(keysToDelete.size()); + } else { + instrumentation.fakeDirsDeleted(keysToDelete.size()); } if (clearKeys) { keysToDelete.clear(); @@ -1017,7 +1025,7 @@ private boolean innerDelete(S3AFileStatus status, boolean recursive) LOG.debug("Got object to delete {}", summary.getKey()); if (keys.size() == MAX_ENTRIES_TO_DELETE) { - removeKeys(keys, true); + removeKeys(keys, true, false); } } @@ -1025,7 +1033,7 @@ private boolean innerDelete(S3AFileStatus status, boolean recursive) objects = continueListObjects(objects); } else { if (!keys.isEmpty()) { - removeKeys(keys, false); + removeKeys(keys, false, false); } break; } @@ -1504,37 +1512,30 @@ public void finishedWrite(String key) { /** * Delete mock parent directories which are no longer needed. * This code swallows IO exceptions encountered - * @param f path + * @param path path */ - private void deleteUnnecessaryFakeDirectories(Path f) { - while (true) { - String key = ""; - try { - key = pathToKey(f); - if (key.isEmpty()) { - break; + private void deleteUnnecessaryFakeDirectories(Path path) { + List keysToRemove = new ArrayList<>(); + while (!path.isRoot()) { + String key = pathToKey(path); + key = (key.endsWith("/")) ? key : (key + "/"); + keysToRemove.add(new DeleteObjectsRequest.KeyVersion(key)); + path = path.getParent(); + } + try { + removeKeys(keysToRemove, false, true); + } catch(AmazonClientException e) { + instrumentation.errorIgnored(); + if (LOG.isDebugEnabled()) { + StringBuilder sb = new StringBuilder(); + for(DeleteObjectsRequest.KeyVersion kv : keysToRemove) { + sb.append(kv.getKey()).append(","); } - - S3AFileStatus status = getFileStatus(f); - - if (status.isDirectory() && status.isEmptyDirectory()) { - LOG.debug("Deleting fake directory {}/", key); - deleteObject(key + "/"); - } - } catch (IOException | AmazonClientException e) { - LOG.debug("While deleting key {} ", key, e); - instrumentation.errorIgnored(); + LOG.debug("While deleting keys {} ", sb.toString(), e); } - - if (f.isRoot()) { - break; - } - - f = f.getParent(); } } - private void createFakeDirectory(final String objectName) throws AmazonClientException, AmazonServiceException, InterruptedIOException { diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AInstrumentation.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AInstrumentation.java index b4c4063976..26b5b51b6c 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AInstrumentation.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AInstrumentation.java @@ -75,6 +75,7 @@ public class S3AInstrumentation { private final MutableCounterLong numberOfFilesCopied; private final MutableCounterLong bytesOfFilesCopied; private final MutableCounterLong numberOfFilesDeleted; + private final MutableCounterLong numberOfFakeDirectoryDeletes; private final MutableCounterLong numberOfDirectoriesCreated; private final MutableCounterLong numberOfDirectoriesDeleted; private final Map streamMetrics = @@ -135,6 +136,7 @@ public S3AInstrumentation(URI name) { numberOfFilesCopied = counter(FILES_COPIED); bytesOfFilesCopied = counter(FILES_COPIED_BYTES); numberOfFilesDeleted = counter(FILES_DELETED); + numberOfFakeDirectoryDeletes = counter(FAKE_DIRECTORIES_DELETED); numberOfDirectoriesCreated = counter(DIRECTORIES_CREATED); numberOfDirectoriesDeleted = counter(DIRECTORIES_DELETED); ignoredErrors = counter(IGNORED_ERRORS); @@ -295,6 +297,14 @@ public void fileDeleted(int count) { numberOfFilesDeleted.incr(count); } + /** + * Indicate that fake directory request was made. + * @param count number of directory entries included in the delete request. + */ + public void fakeDirsDeleted(int count) { + numberOfFakeDirectoryDeletes.incr(count); + } + /** * Indicate that S3A created a directory. */ diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Statistic.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Statistic.java index cbc34d665d..d84a355991 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Statistic.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Statistic.java @@ -42,6 +42,10 @@ public enum Statistic { "Total number of files created through the object store."), FILES_DELETED("files_deleted", "Total number of files deleted from the object store."), + FAKE_DIRECTORIES_CREATED("fake_directories_created", + "Total number of fake directory entries created in the object store."), + FAKE_DIRECTORIES_DELETED("fake_directories_deleted", + "Total number of fake directory deletes submitted to object store."), IGNORED_ERRORS("ignored_errors", "Errors caught and ignored"), INVOCATION_COPY_FROM_LOCAL_FILE(CommonStatisticNames.OP_COPY_FROM_LOCAL_FILE, "Calls of copyFromLocalFile()"), diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AFileOperationCost.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AFileOperationCost.java index 2a6ba0c3b2..f19ea954bc 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AFileOperationCost.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AFileOperationCost.java @@ -188,4 +188,89 @@ public void testCostOfCopyFromLocalFile() throws Throwable { tmpFile.delete(); } } + + private void reset(MetricDiff... diffs) { + for (MetricDiff diff : diffs) { + diff.reset(); + } + } + + @Test + public void testFakeDirectoryDeletion() throws Throwable { + describe("Verify whether create file works after renaming a file. " + + "In S3, rename deletes any fake directories as a part of " + + "clean up activity"); + S3AFileSystem fs = getFileSystem(); + Path srcBaseDir = path("src"); + mkdirs(srcBaseDir); + MetricDiff deleteRequests = + new MetricDiff(fs, Statistic.OBJECT_DELETE_REQUESTS); + MetricDiff directoriesDeleted = + new MetricDiff(fs, Statistic.DIRECTORIES_DELETED); + MetricDiff fakeDirectoriesDeleted = + new MetricDiff(fs, Statistic.FAKE_DIRECTORIES_DELETED); + MetricDiff directoriesCreated = + new MetricDiff(fs, Statistic.DIRECTORIES_CREATED); + + Path srcDir = new Path(srcBaseDir, "1/2/3/4/5/6"); + Path srcFilePath = new Path(srcDir, "source.txt"); + int srcDirDepth = directoriesInPath(srcDir); + // one dir created, one removed + mkdirs(srcDir); + String state = "after mkdir(srcDir)"; + directoriesCreated.assertDiffEquals(state, 1); +/* TODO: uncomment once HADOOP-13222 is in + deleteRequests.assertDiffEquals(state, 1); + directoriesDeleted.assertDiffEquals(state, 0); + fakeDirectoriesDeleted.assertDiffEquals(state, srcDirDepth); +*/ + reset(deleteRequests, directoriesCreated, directoriesDeleted, + fakeDirectoriesDeleted); + + // creating a file should trigger demise of the src dir + touch(fs, srcFilePath); + state = "after touch(fs, srcFilePath)"; + deleteRequests.assertDiffEquals(state, 1); + directoriesCreated.assertDiffEquals(state, 0); + directoriesDeleted.assertDiffEquals(state, 0); + fakeDirectoriesDeleted.assertDiffEquals(state, srcDirDepth); + + reset(deleteRequests, directoriesCreated, directoriesDeleted, + fakeDirectoriesDeleted); + + Path destBaseDir = path("dest"); + Path destDir = new Path(destBaseDir, "1/2/3/4/5/6"); + Path destFilePath = new Path(destDir, "dest.txt"); + mkdirs(destDir); + state = "after mkdir(destDir)"; + + int destDirDepth = directoriesInPath(destDir); + directoriesCreated.assertDiffEquals(state, 1); +/* TODO: uncomment once HADOOP-13222 is in + deleteRequests.assertDiffEquals(state,1); + directoriesDeleted.assertDiffEquals(state,0); + fakeDirectoriesDeleted.assertDiffEquals(state,destDirDepth); +*/ + reset(deleteRequests, directoriesCreated, directoriesDeleted, + fakeDirectoriesDeleted); + + fs.rename(srcFilePath, destFilePath); + state = "after rename(srcFilePath, destFilePath)"; + directoriesCreated.assertDiffEquals(state, 1); + // one for the renamed file, one for the parent + deleteRequests.assertDiffEquals(state, 2); + directoriesDeleted.assertDiffEquals(state, 0); + fakeDirectoriesDeleted.assertDiffEquals(state, destDirDepth); + + reset(deleteRequests, directoriesCreated, directoriesDeleted, + fakeDirectoriesDeleted); + + assertIsFile(destFilePath); + assertIsDirectory(srcDir); + } + + private int directoriesInPath(Path path) { + return path.isRoot() ? 0 : 1 + directoriesInPath(path.getParent()); + } + } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/S3ATestUtils.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/S3ATestUtils.java index e45db483e7..95f6d4b86f 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/S3ATestUtils.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/S3ATestUtils.java @@ -296,13 +296,22 @@ public String toString() { return sb.toString(); } + /** + * Assert that the value of {@link #diff()} matches that expected. + * @param message message to print; metric name is appended + * @param expected expected value. + */ + public void assertDiffEquals(String message, long expected) { + Assert.assertEquals(message + ": " + statistic.getSymbol(), + expected, diff()); + } + /** * Assert that the value of {@link #diff()} matches that expected. * @param expected expected value. */ public void assertDiffEquals(long expected) { - Assert.assertEquals("Count of " + this, - expected, diff()); + assertDiffEquals("Count of " + this, expected); } /**