HADOOP-16465 listLocatedStatus() optimisation (#1943)

Contributed by Mukund Thakur

Optimize S3AFileSystem.listLocatedStatus() to perform list
operations directly and then fallback to head checks for files

Change-Id: Ia2c0fa6fcc5967c49b914b92f41135d07dab0464
This commit is contained in:
Mukund Thakur 2020-04-14 21:49:51 +05:30 committed by Steve Loughran
parent 4bd37f2283
commit 94da630cd2
No known key found for this signature in database
GPG Key ID: D22CF846DBB162A0
2 changed files with 114 additions and 30 deletions

View File

@ -4283,23 +4283,51 @@ public RemoteIterator<LocatedFileStatus> listLocatedStatus(final Path f,
RemoteIterator<? extends LocatedFileStatus> iterator = RemoteIterator<? extends LocatedFileStatus> iterator =
once("listLocatedStatus", path.toString(), once("listLocatedStatus", path.toString(),
() -> { () -> {
// lookup dir triggers existence check // Assuming the path to be a directory,
// trigger a list call directly.
final RemoteIterator<S3ALocatedFileStatus>
locatedFileStatusIteratorForDir =
getLocatedFileStatusIteratorForDir(path, filter);
// If no listing is present then path might be a file.
if (!locatedFileStatusIteratorForDir.hasNext()) {
final S3AFileStatus fileStatus = final S3AFileStatus fileStatus =
(S3AFileStatus) getFileStatus(path); (S3AFileStatus) getFileStatus(path);
if (fileStatus.isFile()) { if (fileStatus.isFile()) {
// simple case: File // simple case: File
LOG.debug("Path is a file"); LOG.debug("Path is a file");
return new Listing.SingleStatusRemoteIterator( return new Listing.SingleStatusRemoteIterator(
filter.accept(path) ? toLocatedFileStatus(fileStatus) : null); filter.accept(path)
} else { ? toLocatedFileStatus(fileStatus)
// directory: trigger a lookup : null);
final String key = maybeAddTrailingSlash(pathToKey(path)); }
}
// Either empty or non-empty directory.
return locatedFileStatusIteratorForDir;
});
return toLocatedFileStatusIterator(iterator);
}
/**
* Generate list located status for a directory.
* Also performing tombstone reconciliation for guarded directories.
* @param dir directory to check.
* @param filter a path filter.
* @return an iterator that traverses statuses of the given dir.
* @throws IOException in case of failure.
*/
private RemoteIterator<S3ALocatedFileStatus> getLocatedFileStatusIteratorForDir(
Path dir, PathFilter filter) throws IOException {
final String key = maybeAddTrailingSlash(pathToKey(dir));
final Listing.FileStatusAcceptor acceptor = final Listing.FileStatusAcceptor acceptor =
new Listing.AcceptAllButSelfAndS3nDirs(path); new Listing.AcceptAllButSelfAndS3nDirs(dir);
boolean allowAuthoritative = allowAuthoritative(f); boolean allowAuthoritative = allowAuthoritative(dir);
DirListingMetadata meta = DirListingMetadata meta =
S3Guard.listChildrenWithTtl(metadataStore, path, S3Guard.listChildrenWithTtl(metadataStore, dir,
ttlTimeProvider, allowAuthoritative); ttlTimeProvider, allowAuthoritative);
Set<Path> tombstones = meta != null
? meta.listTombstones()
: null;
final RemoteIterator<S3AFileStatus> cachedFileStatusIterator = final RemoteIterator<S3AFileStatus> cachedFileStatusIterator =
listing.createProvidedFileStatusIterator( listing.createProvidedFileStatusIterator(
S3Guard.dirMetaToStatuses(meta), filter, acceptor); S3Guard.dirMetaToStatuses(meta), filter, acceptor);
@ -4307,15 +4335,14 @@ public RemoteIterator<LocatedFileStatus> listLocatedStatus(final Path f,
&& meta.isAuthoritative()) && meta.isAuthoritative())
? listing.createLocatedFileStatusIterator( ? listing.createLocatedFileStatusIterator(
cachedFileStatusIterator) cachedFileStatusIterator)
: listing.createLocatedFileStatusIterator( : listing.createTombstoneReconcilingIterator(
listing.createFileStatusListingIterator(path, listing.createLocatedFileStatusIterator(
listing.createFileStatusListingIterator(dir,
createListObjectsRequest(key, "/"), createListObjectsRequest(key, "/"),
filter, filter,
acceptor, acceptor,
cachedFileStatusIterator)); cachedFileStatusIterator)),
} tombstones);
});
return toLocatedFileStatusIterator(iterator);
} }
/** /**

View File

@ -111,6 +111,63 @@ public void setup() throws Exception {
skipDuringFaultInjection(fs); skipDuringFaultInjection(fs);
} }
@Test
public void testCostOfLocatedFileStatusOnFile() throws Throwable {
describe("performing listLocatedStatus on a file");
Path file = path(getMethodName() + ".txt");
S3AFileSystem fs = getFileSystem();
touch(fs, file);
resetMetricDiffs();
fs.listLocatedStatus(file);
if (!fs.hasMetadataStore()) {
// Unguarded FS.
metadataRequests.assertDiffEquals(1);
}
listRequests.assertDiffEquals(1);
}
@Test
public void testCostOfListLocatedStatusOnEmptyDir() throws Throwable {
describe("performing listLocatedStatus on an empty dir");
Path dir = path(getMethodName());
S3AFileSystem fs = getFileSystem();
fs.mkdirs(dir);
resetMetricDiffs();
fs.listLocatedStatus(dir);
if (!fs.hasMetadataStore()) {
// Unguarded FS.
verifyOperationCount(2, 1);
} else {
if (fs.allowAuthoritative(dir)) {
verifyOperationCount(0, 0);
} else {
verifyOperationCount(0, 1);
}
}
}
@Test
public void testCostOfListLocatedStatusOnNonEmptyDir() throws Throwable {
describe("performing listLocatedStatus on a non empty dir");
Path dir = path(getMethodName() + "dir");
S3AFileSystem fs = getFileSystem();
fs.mkdirs(dir);
Path file = new Path(dir, "file.txt");
touch(fs, file);
resetMetricDiffs();
fs.listLocatedStatus(dir);
if (!fs.hasMetadataStore()) {
// Unguarded FS.
verifyOperationCount(0, 1);
} else {
if(fs.allowAuthoritative(dir)) {
verifyOperationCount(0, 0);
} else {
verifyOperationCount(0, 1);
}
}
}
@Test @Test
public void testCostOfGetFileStatusOnFile() throws Throwable { public void testCostOfGetFileStatusOnFile() throws Throwable {
describe("performing getFileStatus on a file"); describe("performing getFileStatus on a file");