From 94da630cd2d1e056393f83acdac2747cf4b12bad Mon Sep 17 00:00:00 2001 From: Mukund Thakur Date: Tue, 14 Apr 2020 21:49:51 +0530 Subject: [PATCH] HADOOP-16465 listLocatedStatus() optimisation (#1943) Contributed by Mukund Thakur Optimize S3AFileSystem.listLocatedStatus() to perform list operations directly and then fallback to head checks for files Change-Id: Ia2c0fa6fcc5967c49b914b92f41135d07dab0464 --- .../apache/hadoop/fs/s3a/S3AFileSystem.java | 87 ++++++++++++------- .../fs/s3a/ITestS3AFileOperationCost.java | 57 ++++++++++++ 2 files changed, 114 insertions(+), 30 deletions(-) diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java index 9bd33a48df..9630a9eff7 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java @@ -4283,41 +4283,68 @@ public RemoteIterator listLocatedStatus(final Path f, RemoteIterator iterator = once("listLocatedStatus", path.toString(), () -> { - // lookup dir triggers existence check - final S3AFileStatus fileStatus = - (S3AFileStatus) getFileStatus(path); - if (fileStatus.isFile()) { - // simple case: File - LOG.debug("Path is a file"); - return new Listing.SingleStatusRemoteIterator( - filter.accept(path) ? toLocatedFileStatus(fileStatus) : null); - } else { - // directory: trigger a lookup - final String key = maybeAddTrailingSlash(pathToKey(path)); - final Listing.FileStatusAcceptor acceptor = - new Listing.AcceptAllButSelfAndS3nDirs(path); - boolean allowAuthoritative = allowAuthoritative(f); - DirListingMetadata meta = - S3Guard.listChildrenWithTtl(metadataStore, path, - ttlTimeProvider, allowAuthoritative); - final RemoteIterator cachedFileStatusIterator = - listing.createProvidedFileStatusIterator( - S3Guard.dirMetaToStatuses(meta), filter, acceptor); - return (allowAuthoritative && meta != null - && meta.isAuthoritative()) - ? listing.createLocatedFileStatusIterator( - cachedFileStatusIterator) - : listing.createLocatedFileStatusIterator( - listing.createFileStatusListingIterator(path, - createListObjectsRequest(key, "/"), - filter, - acceptor, - cachedFileStatusIterator)); + // Assuming the path to be a directory, + // trigger a list call directly. + final RemoteIterator + locatedFileStatusIteratorForDir = + getLocatedFileStatusIteratorForDir(path, filter); + + // If no listing is present then path might be a file. + if (!locatedFileStatusIteratorForDir.hasNext()) { + final S3AFileStatus fileStatus = + (S3AFileStatus) getFileStatus(path); + if (fileStatus.isFile()) { + // simple case: File + LOG.debug("Path is a file"); + return new Listing.SingleStatusRemoteIterator( + filter.accept(path) + ? toLocatedFileStatus(fileStatus) + : null); + } } + // Either empty or non-empty directory. + return locatedFileStatusIteratorForDir; }); return toLocatedFileStatusIterator(iterator); } + /** + * Generate list located status for a directory. + * Also performing tombstone reconciliation for guarded directories. + * @param dir directory to check. + * @param filter a path filter. + * @return an iterator that traverses statuses of the given dir. + * @throws IOException in case of failure. + */ + private RemoteIterator getLocatedFileStatusIteratorForDir( + Path dir, PathFilter filter) throws IOException { + final String key = maybeAddTrailingSlash(pathToKey(dir)); + final Listing.FileStatusAcceptor acceptor = + new Listing.AcceptAllButSelfAndS3nDirs(dir); + boolean allowAuthoritative = allowAuthoritative(dir); + DirListingMetadata meta = + S3Guard.listChildrenWithTtl(metadataStore, dir, + ttlTimeProvider, allowAuthoritative); + Set tombstones = meta != null + ? meta.listTombstones() + : null; + final RemoteIterator cachedFileStatusIterator = + listing.createProvidedFileStatusIterator( + S3Guard.dirMetaToStatuses(meta), filter, acceptor); + return (allowAuthoritative && meta != null + && meta.isAuthoritative()) + ? listing.createLocatedFileStatusIterator( + cachedFileStatusIterator) + : listing.createTombstoneReconcilingIterator( + listing.createLocatedFileStatusIterator( + listing.createFileStatusListingIterator(dir, + createListObjectsRequest(key, "/"), + filter, + acceptor, + cachedFileStatusIterator)), + tombstones); + } + /** * Build a {@link S3ALocatedFileStatus} from a {@link FileStatus} instance. * @param status file status diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AFileOperationCost.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AFileOperationCost.java index e2f7fead46..b2b983c4d4 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AFileOperationCost.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AFileOperationCost.java @@ -111,6 +111,63 @@ public void setup() throws Exception { skipDuringFaultInjection(fs); } + @Test + public void testCostOfLocatedFileStatusOnFile() throws Throwable { + describe("performing listLocatedStatus on a file"); + Path file = path(getMethodName() + ".txt"); + S3AFileSystem fs = getFileSystem(); + touch(fs, file); + resetMetricDiffs(); + fs.listLocatedStatus(file); + if (!fs.hasMetadataStore()) { + // Unguarded FS. + metadataRequests.assertDiffEquals(1); + } + listRequests.assertDiffEquals(1); + } + + @Test + public void testCostOfListLocatedStatusOnEmptyDir() throws Throwable { + describe("performing listLocatedStatus on an empty dir"); + Path dir = path(getMethodName()); + S3AFileSystem fs = getFileSystem(); + fs.mkdirs(dir); + resetMetricDiffs(); + fs.listLocatedStatus(dir); + if (!fs.hasMetadataStore()) { + // Unguarded FS. + verifyOperationCount(2, 1); + } else { + if (fs.allowAuthoritative(dir)) { + verifyOperationCount(0, 0); + } else { + verifyOperationCount(0, 1); + } + } + } + + @Test + public void testCostOfListLocatedStatusOnNonEmptyDir() throws Throwable { + describe("performing listLocatedStatus on a non empty dir"); + Path dir = path(getMethodName() + "dir"); + S3AFileSystem fs = getFileSystem(); + fs.mkdirs(dir); + Path file = new Path(dir, "file.txt"); + touch(fs, file); + resetMetricDiffs(); + fs.listLocatedStatus(dir); + if (!fs.hasMetadataStore()) { + // Unguarded FS. + verifyOperationCount(0, 1); + } else { + if(fs.allowAuthoritative(dir)) { + verifyOperationCount(0, 0); + } else { + verifyOperationCount(0, 1); + } + } + } + @Test public void testCostOfGetFileStatusOnFile() throws Throwable { describe("performing getFileStatus on a file");