From 79fc58def303b7ef625b90fecab08ca97ea00d82 Mon Sep 17 00:00:00 2001 From: jimmy-zuber-amzn <67486813+jimmy-zuber-amzn@users.noreply.github.com> Date: Mon, 13 Jul 2020 11:07:48 -0700 Subject: [PATCH] HADOOP-17105. S3AFS - Do not attempt to resolve symlinks in globStatus (#2113) Contributed by Jimmy Zuber. Change-Id: I2f247c2d2ab4f38214073e55f5cfbaa15aeaeb11 --- .../apache/hadoop/fs/s3a/S3AFileSystem.java | 10 +++-- .../fs/s3a/ITestS3AFileOperationCost.java | 44 +++++++++++++++++++ 2 files changed, 50 insertions(+), 4 deletions(-) diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java index e5b08f1bb9..f123f6b337 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java @@ -3980,6 +3980,8 @@ public boolean isMagicCommitPath(Path path) { /** * Increments the statistic {@link Statistic#INVOCATION_GLOB_STATUS}. + * Override superclass so as to disable symlink resolution as symlinks + * are not supported by S3A. * {@inheritDoc} */ @Override @@ -3988,9 +3990,9 @@ public FileStatus[] globStatus(Path pathPattern) throws IOException { } /** - * Override superclass so as to disable symlink resolution and so avoid - * some calls to the FS which may have problems when the store is being - * inconsistent. + * Increments the statistic {@link Statistic#INVOCATION_GLOB_STATUS}. + * Override superclass so as to disable symlink resolution as symlinks + * are not supported by S3A. * {@inheritDoc} */ @Override @@ -4002,7 +4004,7 @@ public FileStatus[] globStatus( return Globber.createGlobber(this) .withPathPattern(pathPattern) .withPathFiltern(filter) - .withResolveSymlinks(true) + .withResolveSymlinks(false) .build() .glob(); } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AFileOperationCost.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AFileOperationCost.java index b2b983c4d4..cd8d7d5d53 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AFileOperationCost.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AFileOperationCost.java @@ -574,4 +574,48 @@ public void testCreateCost() throws Throwable { } } + + @Test + public void testCostOfGlobStatus() throws Throwable { + describe("Test globStatus has expected cost"); + S3AFileSystem fs = getFileSystem(); + assume("Unguarded FS only", !fs.hasMetadataStore()); + + Path basePath = path("testCostOfGlobStatus/nextFolder/"); + + // create a bunch of files + int filesToCreate = 10; + for (int i = 0; i < filesToCreate; i++) { + try (FSDataOutputStream out = fs.create(basePath.suffix("/" + i))) { + verifyOperationCount(1, 1); + } + } + + fs.globStatus(basePath.suffix("/*")); + // 2 head + 1 list from getFileStatus on path, + // plus 1 list to match the glob pattern + verifyOperationCount(2, 2); + } + + @Test + public void testCostOfGlobStatusNoSymlinkResolution() throws Throwable { + describe("Test globStatus does not attempt to resolve symlinks"); + S3AFileSystem fs = getFileSystem(); + assume("Unguarded FS only", !fs.hasMetadataStore()); + + Path basePath = path("testCostOfGlobStatusNoSymlinkResolution/f/"); + + // create a single file, globStatus returning a single file on a pattern + // triggers attempts at symlinks resolution if configured + String fileName = "/notASymlinkDOntResolveMeLikeOne"; + try (FSDataOutputStream out = fs.create(basePath.suffix(fileName))) { + verifyOperationCount(1, 1); + } + + fs.globStatus(basePath.suffix("/*")); + // unguarded: 2 head + 1 list from getFileStatus on path, + // plus 1 list to match the glob pattern + // no additional operations from symlink resolution + verifyOperationCount(2, 2); + } }