HADOOP-16465 listLocatedStatus() optimisation (#1943)
Contributed by Mukund Thakur Optimize S3AFileSystem.listLocatedStatus() to perform list operations directly and then fallback to head checks for files Change-Id: Ia2c0fa6fcc5967c49b914b92f41135d07dab0464
This commit is contained in:
parent
4bd37f2283
commit
94da630cd2
@ -4283,23 +4283,51 @@ public RemoteIterator<LocatedFileStatus> listLocatedStatus(final Path f,
|
|||||||
RemoteIterator<? extends LocatedFileStatus> iterator =
|
RemoteIterator<? extends LocatedFileStatus> iterator =
|
||||||
once("listLocatedStatus", path.toString(),
|
once("listLocatedStatus", path.toString(),
|
||||||
() -> {
|
() -> {
|
||||||
// lookup dir triggers existence check
|
// Assuming the path to be a directory,
|
||||||
|
// trigger a list call directly.
|
||||||
|
final RemoteIterator<S3ALocatedFileStatus>
|
||||||
|
locatedFileStatusIteratorForDir =
|
||||||
|
getLocatedFileStatusIteratorForDir(path, filter);
|
||||||
|
|
||||||
|
// If no listing is present then path might be a file.
|
||||||
|
if (!locatedFileStatusIteratorForDir.hasNext()) {
|
||||||
final S3AFileStatus fileStatus =
|
final S3AFileStatus fileStatus =
|
||||||
(S3AFileStatus) getFileStatus(path);
|
(S3AFileStatus) getFileStatus(path);
|
||||||
if (fileStatus.isFile()) {
|
if (fileStatus.isFile()) {
|
||||||
// simple case: File
|
// simple case: File
|
||||||
LOG.debug("Path is a file");
|
LOG.debug("Path is a file");
|
||||||
return new Listing.SingleStatusRemoteIterator(
|
return new Listing.SingleStatusRemoteIterator(
|
||||||
filter.accept(path) ? toLocatedFileStatus(fileStatus) : null);
|
filter.accept(path)
|
||||||
} else {
|
? toLocatedFileStatus(fileStatus)
|
||||||
// directory: trigger a lookup
|
: null);
|
||||||
final String key = maybeAddTrailingSlash(pathToKey(path));
|
}
|
||||||
|
}
|
||||||
|
// Either empty or non-empty directory.
|
||||||
|
return locatedFileStatusIteratorForDir;
|
||||||
|
});
|
||||||
|
return toLocatedFileStatusIterator(iterator);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Generate list located status for a directory.
|
||||||
|
* Also performing tombstone reconciliation for guarded directories.
|
||||||
|
* @param dir directory to check.
|
||||||
|
* @param filter a path filter.
|
||||||
|
* @return an iterator that traverses statuses of the given dir.
|
||||||
|
* @throws IOException in case of failure.
|
||||||
|
*/
|
||||||
|
private RemoteIterator<S3ALocatedFileStatus> getLocatedFileStatusIteratorForDir(
|
||||||
|
Path dir, PathFilter filter) throws IOException {
|
||||||
|
final String key = maybeAddTrailingSlash(pathToKey(dir));
|
||||||
final Listing.FileStatusAcceptor acceptor =
|
final Listing.FileStatusAcceptor acceptor =
|
||||||
new Listing.AcceptAllButSelfAndS3nDirs(path);
|
new Listing.AcceptAllButSelfAndS3nDirs(dir);
|
||||||
boolean allowAuthoritative = allowAuthoritative(f);
|
boolean allowAuthoritative = allowAuthoritative(dir);
|
||||||
DirListingMetadata meta =
|
DirListingMetadata meta =
|
||||||
S3Guard.listChildrenWithTtl(metadataStore, path,
|
S3Guard.listChildrenWithTtl(metadataStore, dir,
|
||||||
ttlTimeProvider, allowAuthoritative);
|
ttlTimeProvider, allowAuthoritative);
|
||||||
|
Set<Path> tombstones = meta != null
|
||||||
|
? meta.listTombstones()
|
||||||
|
: null;
|
||||||
final RemoteIterator<S3AFileStatus> cachedFileStatusIterator =
|
final RemoteIterator<S3AFileStatus> cachedFileStatusIterator =
|
||||||
listing.createProvidedFileStatusIterator(
|
listing.createProvidedFileStatusIterator(
|
||||||
S3Guard.dirMetaToStatuses(meta), filter, acceptor);
|
S3Guard.dirMetaToStatuses(meta), filter, acceptor);
|
||||||
@ -4307,15 +4335,14 @@ public RemoteIterator<LocatedFileStatus> listLocatedStatus(final Path f,
|
|||||||
&& meta.isAuthoritative())
|
&& meta.isAuthoritative())
|
||||||
? listing.createLocatedFileStatusIterator(
|
? listing.createLocatedFileStatusIterator(
|
||||||
cachedFileStatusIterator)
|
cachedFileStatusIterator)
|
||||||
: listing.createLocatedFileStatusIterator(
|
: listing.createTombstoneReconcilingIterator(
|
||||||
listing.createFileStatusListingIterator(path,
|
listing.createLocatedFileStatusIterator(
|
||||||
|
listing.createFileStatusListingIterator(dir,
|
||||||
createListObjectsRequest(key, "/"),
|
createListObjectsRequest(key, "/"),
|
||||||
filter,
|
filter,
|
||||||
acceptor,
|
acceptor,
|
||||||
cachedFileStatusIterator));
|
cachedFileStatusIterator)),
|
||||||
}
|
tombstones);
|
||||||
});
|
|
||||||
return toLocatedFileStatusIterator(iterator);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -111,6 +111,63 @@ public void setup() throws Exception {
|
|||||||
skipDuringFaultInjection(fs);
|
skipDuringFaultInjection(fs);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testCostOfLocatedFileStatusOnFile() throws Throwable {
|
||||||
|
describe("performing listLocatedStatus on a file");
|
||||||
|
Path file = path(getMethodName() + ".txt");
|
||||||
|
S3AFileSystem fs = getFileSystem();
|
||||||
|
touch(fs, file);
|
||||||
|
resetMetricDiffs();
|
||||||
|
fs.listLocatedStatus(file);
|
||||||
|
if (!fs.hasMetadataStore()) {
|
||||||
|
// Unguarded FS.
|
||||||
|
metadataRequests.assertDiffEquals(1);
|
||||||
|
}
|
||||||
|
listRequests.assertDiffEquals(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testCostOfListLocatedStatusOnEmptyDir() throws Throwable {
|
||||||
|
describe("performing listLocatedStatus on an empty dir");
|
||||||
|
Path dir = path(getMethodName());
|
||||||
|
S3AFileSystem fs = getFileSystem();
|
||||||
|
fs.mkdirs(dir);
|
||||||
|
resetMetricDiffs();
|
||||||
|
fs.listLocatedStatus(dir);
|
||||||
|
if (!fs.hasMetadataStore()) {
|
||||||
|
// Unguarded FS.
|
||||||
|
verifyOperationCount(2, 1);
|
||||||
|
} else {
|
||||||
|
if (fs.allowAuthoritative(dir)) {
|
||||||
|
verifyOperationCount(0, 0);
|
||||||
|
} else {
|
||||||
|
verifyOperationCount(0, 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testCostOfListLocatedStatusOnNonEmptyDir() throws Throwable {
|
||||||
|
describe("performing listLocatedStatus on a non empty dir");
|
||||||
|
Path dir = path(getMethodName() + "dir");
|
||||||
|
S3AFileSystem fs = getFileSystem();
|
||||||
|
fs.mkdirs(dir);
|
||||||
|
Path file = new Path(dir, "file.txt");
|
||||||
|
touch(fs, file);
|
||||||
|
resetMetricDiffs();
|
||||||
|
fs.listLocatedStatus(dir);
|
||||||
|
if (!fs.hasMetadataStore()) {
|
||||||
|
// Unguarded FS.
|
||||||
|
verifyOperationCount(0, 1);
|
||||||
|
} else {
|
||||||
|
if(fs.allowAuthoritative(dir)) {
|
||||||
|
verifyOperationCount(0, 0);
|
||||||
|
} else {
|
||||||
|
verifyOperationCount(0, 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testCostOfGetFileStatusOnFile() throws Throwable {
|
public void testCostOfGetFileStatusOnFile() throws Throwable {
|
||||||
describe("performing getFileStatus on a file");
|
describe("performing getFileStatus on a file");
|
||||||
|
Loading…
Reference in New Issue
Block a user