From e77767bb1e8dfb8b0bd7af4664c900f7238b4fa0 Mon Sep 17 00:00:00 2001 From: Mukund Thakur Date: Fri, 21 Feb 2020 13:43:39 +0000 Subject: [PATCH] HADOOP-16711. This adds a new option fs.s3a.bucket.probe, range (0-2) to control which probe for a bucket existence to perform on startup. 0: no checks 1: v1 check (as has been performend until now) 2: v2 bucket check, which also incudes a permission check. Default. When set to 0, bucket existence checks won't be done during initialization thus making it faster. When the bucket is not available in S3, or if fs.s3a.endpoint points to the wrong instance of a private S3 store consecutive calls like listing, read, write etc. will fail with an UnknownStoreException. Contributed by: * Mukund Thakur (main patch and tests) * Rajesh Balamohan (v0 list and performance tests) * lqjacklee (HADOOP-15990/v2 list) * Steve Loughran (UnknownStoreException support) modified: hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java modified: hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java modified: hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ARetryPolicy.java modified: hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java new file: hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/UnknownStoreException.java new file: hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/ErrorTranslation.java modified: hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md modified: hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/performance.md modified: hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/troubleshooting_s3a.md modified: hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/AbstractS3AMockTest.java new file: hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ABucketExistence.java modified: hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/MockS3ClientFactory.java modified: hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AExceptionTranslation.java modified: hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/AbstractS3GuardToolTestBase.java modified: hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/ITestS3GuardToolDynamoDB.java modified: hadoop-tools/hadoop-aws/src/test/resources/core-site.xml Change-Id: Ic174f803e655af172d81c1274ed92b51bdceb384 --- .../org/apache/hadoop/fs/s3a/Constants.java | 14 ++ .../apache/hadoop/fs/s3a/S3AFileSystem.java | 68 ++++++- .../apache/hadoop/fs/s3a/S3ARetryPolicy.java | 1 + .../org/apache/hadoop/fs/s3a/S3AUtils.java | 13 ++ .../hadoop/fs/s3a/UnknownStoreException.java | 57 ++++++ .../hadoop/fs/s3a/impl/ErrorTranslation.java | 73 ++++++++ .../site/markdown/tools/hadoop-aws/index.md | 20 +++ .../markdown/tools/hadoop-aws/performance.md | 16 ++ .../tools/hadoop-aws/troubleshooting_s3a.md | 45 +++-- .../hadoop/fs/s3a/AbstractS3AMockTest.java | 1 + .../fs/s3a/ITestS3ABucketExistence.java | 170 ++++++++++++++++++ .../hadoop/fs/s3a/MockS3ClientFactory.java | 1 + .../fs/s3a/TestS3AExceptionTranslation.java | 20 ++- .../s3guard/AbstractS3GuardToolTestBase.java | 4 +- .../s3a/s3guard/ITestS3GuardToolDynamoDB.java | 3 +- .../src/test/resources/core-site.xml | 6 + 16 files changed, 484 insertions(+), 28 deletions(-) create mode 100644 hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/UnknownStoreException.java create mode 100644 hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/ErrorTranslation.java create mode 100644 hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ABucketExistence.java diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java index 561ab4a84a..0ca4aa01a7 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java @@ -481,6 +481,20 @@ private Constants() { "fs.s3a.metadatastore.authoritative"; public static final boolean DEFAULT_METADATASTORE_AUTHORITATIVE = false; + /** + * Bucket validation parameter which can be set by client. This will be + * used in {@code S3AFileSystem.initialize(URI, Configuration)}. + * Value: {@value} + */ + public static final String S3A_BUCKET_PROBE = "fs.s3a.bucket.probe"; + + /** + * Default value of bucket validation parameter. An existence of bucket + * will be validated using {@code S3AFileSystem.verifyBucketExistsV2()}. + * Value: {@value} + */ + public static final int S3A_BUCKET_PROBE_DEFAULT = 2; + /** * How long a directory listing in the MS is considered as authoritative. */ diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java index ce7729fa39..1e5175c154 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java @@ -173,6 +173,7 @@ import static org.apache.hadoop.fs.s3a.auth.delegation.S3ADelegationTokens.hasDelegationTokenBinding; import static org.apache.hadoop.fs.s3a.impl.CallableSupplier.submit; import static org.apache.hadoop.fs.s3a.impl.CallableSupplier.waitForCompletionIgnoringExceptions; +import static org.apache.hadoop.fs.s3a.impl.ErrorTranslation.isUnknownBucket; import static org.apache.hadoop.fs.s3a.impl.InternalConstants.SC_404; import static org.apache.hadoop.fs.s3a.impl.NetworkBinding.fixBucketRegion; import static org.apache.hadoop.io.IOUtils.cleanupWithLogger; @@ -392,9 +393,7 @@ public void initialize(URI name, Configuration originalConf) initCannedAcls(conf); // This initiates a probe against S3 for the bucket existing. - // It is where all network and authentication configuration issues - // surface, and is potentially slow. - verifyBucketExists(); + doBucketProbing(); inputPolicy = S3AInputPolicy.getPolicy( conf.getTrimmed(INPUT_FADVISE, INPUT_FADV_NORMAL)); @@ -463,6 +462,41 @@ public void initialize(URI name, Configuration originalConf) } + /** + * Test bucket existence in S3. + * When the value of {@link Constants#S3A_BUCKET_PROBE} is set to 0, + * bucket existence check is not done to improve performance of + * S3AFileSystem initialization. When set to 1 or 2, bucket existence check + * will be performed which is potentially slow. + * If 3 or higher: warn and use the v2 check. + * @throws UnknownStoreException the bucket is absent + * @throws IOException any other problem talking to S3 + */ + @Retries.RetryTranslated + private void doBucketProbing() throws IOException { + int bucketProbe = getConf() + .getInt(S3A_BUCKET_PROBE, S3A_BUCKET_PROBE_DEFAULT); + Preconditions.checkArgument(bucketProbe >= 0, + "Value of " + S3A_BUCKET_PROBE + " should be >= 0"); + switch (bucketProbe) { + case 0: + LOG.debug("skipping check for bucket existence"); + break; + case 1: + verifyBucketExists(); + break; + case 2: + verifyBucketExistsV2(); + break; + default: + // we have no idea what this is, assume it is from a later release. + LOG.warn("Unknown bucket probe option {}: {}; falling back to check #2", + S3A_BUCKET_PROBE, bucketProbe); + verifyBucketExistsV2(); + break; + } + } + /** * Initialize the thread pool. * This must be re-invoked after replacing the S3Client during test @@ -510,15 +544,31 @@ protected static S3AStorageStatistics createStorageStatistics() { * Verify that the bucket exists. This does not check permissions, * not even read access. * Retry policy: retrying, translated. - * @throws FileNotFoundException the bucket is absent + * @throws UnknownStoreException the bucket is absent * @throws IOException any other problem talking to S3 */ @Retries.RetryTranslated protected void verifyBucketExists() - throws FileNotFoundException, IOException { + throws UnknownStoreException, IOException { if (!invoker.retry("doesBucketExist", bucket, true, () -> s3.doesBucketExist(bucket))) { - throw new FileNotFoundException("Bucket " + bucket + " does not exist"); + throw new UnknownStoreException("Bucket " + bucket + " does not exist"); + } + } + + /** + * Verify that the bucket exists. This will correctly throw an exception + * when credentials are invalid. + * Retry policy: retrying, translated. + * @throws UnknownStoreException the bucket is absent + * @throws IOException any other problem talking to S3 + */ + @Retries.RetryTranslated + protected void verifyBucketExistsV2() + throws UnknownStoreException, IOException { + if (!invoker.retry("doesBucketExistV2", bucket, true, + () -> s3.doesBucketExistV2(bucket))) { + throw new UnknownStoreException("Bucket " + bucket + " does not exist"); } } @@ -2891,7 +2941,7 @@ S3AFileStatus s3GetFileStatus(final Path path, } catch (AmazonServiceException e) { // if the response is a 404 error, it just means that there is // no file at that path...the remaining checks will be needed. - if (e.getStatusCode() != SC_404) { + if (e.getStatusCode() != SC_404 || isUnknownBucket(e)) { throw translateException("getFileStatus", path, e); } } catch (AmazonClientException e) { @@ -2923,7 +2973,7 @@ S3AFileStatus s3GetFileStatus(final Path path, meta.getVersionId()); } } catch (AmazonServiceException e) { - if (e.getStatusCode() != SC_404) { + if (e.getStatusCode() != SC_404 || isUnknownBucket(e)) { throw translateException("getFileStatus", newKey, e); } } catch (AmazonClientException e) { @@ -2962,7 +3012,7 @@ S3AFileStatus s3GetFileStatus(final Path path, return new S3AFileStatus(Tristate.TRUE, path, username); } } catch (AmazonServiceException e) { - if (e.getStatusCode() != SC_404) { + if (e.getStatusCode() != SC_404 || isUnknownBucket(e)) { throw translateException("getFileStatus", path, e); } } catch (AmazonClientException e) { diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ARetryPolicy.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ARetryPolicy.java index 09e9c993b0..d2954b3a92 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ARetryPolicy.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ARetryPolicy.java @@ -188,6 +188,7 @@ protected Map, RetryPolicy> createExceptionMap() { policyMap.put(AccessDeniedException.class, fail); policyMap.put(NoAuthWithAWSException.class, fail); policyMap.put(FileNotFoundException.class, fail); + policyMap.put(UnknownStoreException.class, fail); policyMap.put(InvalidRequestException.class, fail); // metadata stores should do retries internally when it makes sense diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java index e2a488e8fe..3775848fc8 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java @@ -86,6 +86,7 @@ import static org.apache.commons.lang3.StringUtils.isEmpty; import static org.apache.hadoop.fs.s3a.Constants.*; +import static org.apache.hadoop.fs.s3a.impl.ErrorTranslation.isUnknownBucket; import static org.apache.hadoop.fs.s3a.impl.MultiObjectDeleteSupport.translateDeleteException; import static org.apache.hadoop.io.IOUtils.cleanupWithLogger; @@ -249,6 +250,18 @@ public static IOException translateException(@Nullable String operation, // the object isn't there case 404: + if (isUnknownBucket(ase)) { + // this is a missing bucket + ioe = new UnknownStoreException(path, ase); + } else { + // a normal unknown object + ioe = new FileNotFoundException(message); + ioe.initCause(ase); + } + break; + + // this also surfaces sometimes and is considered to + // be ~ a not found exception. case 410: ioe = new FileNotFoundException(message); ioe.initCause(ase); diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/UnknownStoreException.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/UnknownStoreException.java new file mode 100644 index 0000000000..0129005e06 --- /dev/null +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/UnknownStoreException.java @@ -0,0 +1,57 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.s3a; + +import java.io.IOException; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; + +/** + * The bucket or other AWS resource is unknown. + * + * Why not a subclass of FileNotFoundException? + * There's too much code which caches an FNFE and infers that the file isn't + * there - a missing bucket is far more significant and generally should + * not be ignored. + */ +@InterfaceAudience.Public +@InterfaceStability.Evolving +public class UnknownStoreException extends IOException { + + /** + * Constructor. + * @param message message + */ + public UnknownStoreException(final String message) { + this(message, null); + } + + /** + * Constructor. + * @param message message + * @param cause cause (may be null) + */ + public UnknownStoreException(final String message, Throwable cause) { + super(message); + if (cause != null) { + initCause(cause); + } + } +} diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/ErrorTranslation.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/ErrorTranslation.java new file mode 100644 index 0000000000..d1baf3c898 --- /dev/null +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/ErrorTranslation.java @@ -0,0 +1,73 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.s3a.impl; + +import com.amazonaws.AmazonServiceException; + +import static org.apache.hadoop.fs.s3a.impl.InternalConstants.SC_404; + +/** + * Translate from AWS SDK-wrapped exceptions into IOExceptions with + * as much information as possible. + * The core of the translation logic is in S3AUtils, in + * {@code translateException} and nearby; that has grown to be + * a large a complex piece of logic, as it ties in with retry/recovery + * policies, throttling, etc. + * + * This class is where future expansion of that code should go so that we have + * an isolated place for all the changes.. + * The existing code las been left in S3AUtils it is to avoid cherry-picking + * problems on backports. + */ +public class ErrorTranslation { + + /** + * Private constructor for utility class. + */ + private ErrorTranslation() { + } + + /** + * Does this exception indicate that the AWS Bucket was unknown. + * @param e exception. + * @return true if the status code and error code mean that the + * remote bucket is unknown. + */ + public static boolean isUnknownBucket(AmazonServiceException e) { + return e.getStatusCode() == SC_404 + && AwsErrorCodes.E_NO_SUCH_BUCKET.equals(e.getErrorCode()); + } + + /** + * AWS error codes explicitly recognized and processes specially; + * kept in their own class for isolation. + */ + public static final class AwsErrorCodes { + + /** + * The AWS S3 error code used to recognize when a 404 means the bucket is + * unknown. + */ + public static final String E_NO_SUCH_BUCKET = "NoSuchBucket"; + + /** private constructor. */ + private AwsErrorCodes() { + } + } +} diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md index 9697e7ac40..aec778e96f 100644 --- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md +++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md @@ -1000,6 +1000,26 @@ options are covered in [Testing](./testing.md). converged to Integer.MAX_VALUE milliseconds + + + fs.s3a.bucket.probe + 2 + + The value can be 0, 1 or 2 (default). + When set to 0, bucket existence checks won't be done + during initialization thus making it faster. + Though it should be noted that when the bucket is not available in S3, + or if fs.s3a.endpoint points to the wrong instance of a private S3 store + consecutive calls like listing, read, write etc. will fail with + an UnknownStoreException. + When set to 1, the bucket existence check will be done using the + V1 API of the S3 protocol which doesn't verify the client's permissions + to list or read data in the bucket. + When set to 2, the bucket existence check will be done using the + V2 API of the S3 protocol which does verify that the + client has permission to read the bucket. + + ``` ## Retry and Recovery diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/performance.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/performance.md index 5543263471..6ca6060810 100644 --- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/performance.md +++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/performance.md @@ -608,3 +608,19 @@ with HADOOP-15669. Other options may be added to `fs.s3a.ssl.channel.mode` in the future as further SSL optimizations are made. + +## Tuning FileSystem Initialization. + +When an S3A Filesystem instance is created and initialized, the client +checks if the bucket provided is valid. This can be slow. +You can ignore bucket validation by configuring `fs.s3a.bucket.probe` as follows: + +```xml + + fs.s3a.bucket.probe + 0 + +``` + +Note: if the bucket does not exist, this issue will surface when operations are performed +on the filesystem; you will see `UnknownStoreException` stack traces. diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/troubleshooting_s3a.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/troubleshooting_s3a.md index 5408c44aea..47bc81e0ec 100644 --- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/troubleshooting_s3a.md +++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/troubleshooting_s3a.md @@ -1203,29 +1203,44 @@ a new one than read to the end of a large file. Note: the threshold when data is read rather than the stream aborted can be tuned by `fs.s3a.readahead.range`; seek policy in `fs.s3a.experimental.input.fadvise`. -### `FileNotFoundException` Bucket does not exist. +### `UnknownStoreException` Bucket does not exist. The bucket does not exist. ``` -java.io.FileNotFoundException: Bucket stevel45r56666 does not exist - at org.apache.hadoop.fs.s3a.S3AFileSystem.verifyBucketExists(S3AFileSystem.java:361) - at org.apache.hadoop.fs.s3a.S3AFileSystem.initialize(S3AFileSystem.java:293) - at org.apache.hadoop.fs.FileSystem.createFileSystem(FileSystem.java:3288) - at org.apache.hadoop.fs.FileSystem.access$200(FileSystem.java:123) - at org.apache.hadoop.fs.FileSystem$Cache.getInternal(FileSystem.java:3337) - at org.apache.hadoop.fs.FileSystem$Cache.getUnique(FileSystem.java:3311) - at org.apache.hadoop.fs.FileSystem.newInstance(FileSystem.java:529) - at org.apache.hadoop.fs.s3a.s3guard.S3GuardTool$BucketInfo.run(S3GuardTool.java:997) - at org.apache.hadoop.fs.s3a.s3guard.S3GuardTool.run(S3GuardTool.java:309) - at org.apache.hadoop.util.ToolRunner.run(ToolRunner.java:76) - at org.apache.hadoop.fs.s3a.s3guard.S3GuardTool.run(S3GuardTool.java:1218) - at org.apache.hadoop.fs.s3a.s3guard.S3GuardTool.main(S3GuardTool.java:1227) +org.apache.hadoop.fs.s3a.UnknownStoreException: + Bucket random-bucket-33013fb8-f7f7-4edb-9c26-16a6ed019184 does not exist + at org.apache.hadoop.fs.s3a.S3AFileSystem.verifyBucketExists(S3AFileSystem.java:537) + at org.apache.hadoop.fs.s3a.S3AFileSystem.doBucketProbing(S3AFileSystem.java:471) + at org.apache.hadoop.fs.s3a.S3AFileSystem.initialize(S3AFileSystem.java:387) + at org.apache.hadoop.fs.FileSystem.createFileSystem(FileSystem.java:3422) + at org.apache.hadoop.fs.FileSystem.get(FileSystem.java:502) ``` +Check the URI is correct, and that the bucket actually exists. -Check the URI. If using a third-party store, verify that you've configured +If using a third-party store, verify that you've configured the client to talk to the specific server in `fs.s3a.endpoint`. +Forgetting to update this value and asking the AWS S3 endpoint +for a bucket is not an unusual occurrence. + +This can surface during filesystem API calls if the bucket is deleted while you are using it, + -or the startup check for bucket existence has been disabled by setting `fs.s3a.bucket.probe` to 0. + +``` +org.apache.hadoop.fs.s3a.UnknownStoreException: s3a://random-bucket-7d9217b0-b426-4344-82ea-25d6cbb316f1/ + + at org.apache.hadoop.fs.s3a.S3AUtils.translateException(S3AUtils.java:254) + at org.apache.hadoop.fs.s3a.S3AUtils.translateException(S3AUtils.java:167) + at org.apache.hadoop.fs.s3a.S3AFileSystem.innerListFiles(S3AFileSystem.java:4149) + at org.apache.hadoop.fs.s3a.S3AFileSystem.listFiles(S3AFileSystem.java:3983) +Caused by: com.amazonaws.services.s3.model.AmazonS3Exception: +The specified bucket does not exist + (Service: Amazon S3; Status Code: 404; Error Code: NoSuchBucket + at com.amazonaws.http.AmazonHttpClient$RequestExecutor.handleErrorResponse(AmazonHttpClient.java:1712) + at com.amazonaws.http.AmazonHttpClient$RequestExecutor.executeOneRequest(AmazonHttpClient.java:1367) +``` + ## Other Issues diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/AbstractS3AMockTest.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/AbstractS3AMockTest.java index 886795a9d9..99bab73e71 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/AbstractS3AMockTest.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/AbstractS3AMockTest.java @@ -75,6 +75,7 @@ public Configuration createConfiguration() { conf.setBoolean(CommitConstants.MAGIC_COMMITTER_ENABLED, true); // use minimum multipart size for faster triggering conf.setLong(Constants.MULTIPART_SIZE, MULTIPART_MIN_SIZE); + conf.setInt(Constants.S3A_BUCKET_PROBE, 1); return conf; } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ABucketExistence.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ABucketExistence.java new file mode 100644 index 0000000000..6be9003e4e --- /dev/null +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ABucketExistence.java @@ -0,0 +1,170 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.s3a; + +import java.net.URI; +import java.util.UUID; +import java.util.concurrent.Callable; + +import org.junit.Test; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.io.IOUtils; +import org.apache.hadoop.test.LambdaTestUtils; + +import static org.apache.hadoop.fs.contract.ContractTestUtils.dataset; +import static org.apache.hadoop.fs.contract.ContractTestUtils.writeDataset; +import static org.apache.hadoop.fs.s3a.Constants.FS_S3A; +import static org.apache.hadoop.fs.s3a.Constants.S3A_BUCKET_PROBE; +import static org.apache.hadoop.fs.s3a.Constants.S3GUARD_METASTORE_NULL; +import static org.apache.hadoop.fs.s3a.Constants.S3_METADATA_STORE_IMPL; +import static org.apache.hadoop.test.LambdaTestUtils.intercept; + +/** + * Class to test bucket existence APIs. + */ +public class ITestS3ABucketExistence extends AbstractS3ATestBase { + + private FileSystem fs; + + private final String randomBucket = + "random-bucket-" + UUID.randomUUID().toString(); + + private final URI uri = URI.create(FS_S3A + "://" + randomBucket + "/"); + + @SuppressWarnings("deprecation") + @Test + public void testNoBucketProbing() throws Exception { + describe("Disable init-time probes and expect FS operations to fail"); + Configuration conf = createConfigurationWithProbe(0); + // metastores can bypass S3 checks, so disable S3Guard, always + conf.set(S3_METADATA_STORE_IMPL, S3GUARD_METASTORE_NULL); + + fs = FileSystem.get(uri, conf); + + Path root = new Path(uri); + + expectUnknownStore( + () -> fs.getFileStatus(root)); + + expectUnknownStore( + () -> fs.listStatus(root)); + + Path src = new Path(root, "testfile"); + Path dest = new Path(root, "dst"); + expectUnknownStore( + () -> fs.getFileStatus(src)); + + // the exception must not be caught and marked down to an FNFE + expectUnknownStore(() -> fs.exists(src)); + expectUnknownStore(() -> fs.isFile(src)); + expectUnknownStore(() -> fs.isDirectory(src)); + expectUnknownStore(() -> fs.mkdirs(src)); + expectUnknownStore(() -> fs.delete(src)); + expectUnknownStore(() -> fs.rename(src, dest)); + + byte[] data = dataset(1024, 'a', 'z'); + expectUnknownStore( + () -> writeDataset(fs, src, data, data.length, 1024 * 1024, true)); + } + + /** + * Expect an operation to raise an UnknownStoreException. + * @param eval closure + * @param return type of closure + * @throws Exception anything else raised. + */ + public static void expectUnknownStore( + Callable eval) + throws Exception { + intercept(UnknownStoreException.class, eval); + } + + /** + * Expect an operation to raise an UnknownStoreException. + * @param eval closure + * @throws Exception anything else raised. + */ + public static void expectUnknownStore( + LambdaTestUtils.VoidCallable eval) + throws Exception { + intercept(UnknownStoreException.class, eval); + } + + /** + * Create a new configuration with the given bucket probe; + * we also disable FS caching. + * @param probe value to use as the bucket probe. + * @return a configuration. + */ + private Configuration createConfigurationWithProbe(final int probe) { + Configuration conf = new Configuration(getFileSystem().getConf()); + S3ATestUtils.disableFilesystemCaching(conf); + conf.setInt(S3A_BUCKET_PROBE, probe); + return conf; + } + + @Test + public void testBucketProbingV1() throws Exception { + describe("Test the V1 bucket probe"); + Configuration configuration = createConfigurationWithProbe(1); + expectUnknownStore( + () -> FileSystem.get(uri, configuration)); + } + + @Test + public void testBucketProbingV2() throws Exception { + describe("Test the V2 bucket probe"); + Configuration configuration = createConfigurationWithProbe(2); + expectUnknownStore( + () -> FileSystem.get(uri, configuration)); + /* + * Bucket probing should also be done when value of + * S3A_BUCKET_PROBE is greater than 2. + */ + configuration.setInt(S3A_BUCKET_PROBE, 3); + expectUnknownStore( + () -> FileSystem.get(uri, configuration)); + } + + @Test + public void testBucketProbingParameterValidation() throws Exception { + describe("Test bucket probe parameter %s validation", S3A_BUCKET_PROBE); + Configuration configuration = createConfigurationWithProbe(-1); + intercept(IllegalArgumentException.class, + "Value of " + S3A_BUCKET_PROBE + " should be >= 0", + "Should throw IllegalArgumentException", + () -> FileSystem.get(uri, configuration)); + } + + @Override + protected Configuration getConfiguration() { + Configuration configuration = super.getConfiguration(); + S3ATestUtils.disableFilesystemCaching(configuration); + return configuration; + } + + @Override + public void teardown() throws Exception { + IOUtils.cleanupWithLogger(getLogger(), fs); + super.teardown(); + } +} diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/MockS3ClientFactory.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/MockS3ClientFactory.java index 2397f6cbaf..4644cf2476 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/MockS3ClientFactory.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/MockS3ClientFactory.java @@ -41,6 +41,7 @@ public AmazonS3 createS3Client(URI name, final String userAgentSuffix) { AmazonS3 s3 = mock(AmazonS3.class); when(s3.doesBucketExist(bucket)).thenReturn(true); + when(s3.doesBucketExistV2(bucket)).thenReturn(true); // this listing is used in startup if purging is enabled, so // return a stub value MultipartUploadListing noUploads = new MultipartUploadListing(); diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AExceptionTranslation.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AExceptionTranslation.java index 9b86595130..95bd7c21b8 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AExceptionTranslation.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AExceptionTranslation.java @@ -21,6 +21,7 @@ import static org.apache.hadoop.fs.s3a.Constants.*; import static org.apache.hadoop.fs.s3a.S3ATestUtils.*; import static org.apache.hadoop.fs.s3a.S3AUtils.*; +import static org.apache.hadoop.fs.s3a.impl.InternalConstants.SC_404; import static org.junit.Assert.*; import java.io.EOFException; @@ -39,6 +40,8 @@ import org.junit.Test; +import org.apache.hadoop.fs.s3a.impl.ErrorTranslation; + import static org.apache.hadoop.test.GenericTestUtils.assertExceptionContains; /** @@ -98,9 +101,24 @@ public void test403isNotPermittedFound() throws Exception { verifyTranslated(403, AccessDeniedException.class); } + /** + * 404 defaults to FileNotFound. + */ @Test public void test404isNotFound() throws Exception { - verifyTranslated(404, FileNotFoundException.class); + verifyTranslated(SC_404, FileNotFoundException.class); + } + + /** + * 404 + NoSuchBucket == Unknown bucket. + */ + @Test + public void testUnknownBucketException() throws Exception { + AmazonS3Exception ex404 = createS3Exception(SC_404); + ex404.setErrorCode(ErrorTranslation.AwsErrorCodes.E_NO_SUCH_BUCKET); + verifyTranslated( + UnknownStoreException.class, + ex404); } @Test diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/AbstractS3GuardToolTestBase.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/AbstractS3GuardToolTestBase.java index 13d2646317..aa74c002d4 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/AbstractS3GuardToolTestBase.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/AbstractS3GuardToolTestBase.java @@ -21,7 +21,6 @@ import java.io.BufferedReader; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; -import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStreamReader; import java.net.URI; @@ -36,6 +35,7 @@ import java.util.concurrent.TimeUnit; import org.apache.hadoop.fs.s3a.S3AUtils; +import org.apache.hadoop.fs.s3a.UnknownStoreException; import org.apache.hadoop.util.StopWatch; import com.google.common.base.Preconditions; import org.apache.hadoop.fs.FileSystem; @@ -506,7 +506,7 @@ public void testToolsNoBucket() throws Throwable { cmdR.getName(), S3A_THIS_BUCKET_DOES_NOT_EXIST }; - intercept(FileNotFoundException.class, + intercept(UnknownStoreException.class, () -> cmdR.run(argsR)); } } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/ITestS3GuardToolDynamoDB.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/ITestS3GuardToolDynamoDB.java index 915f1cc190..ba93927e8d 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/ITestS3GuardToolDynamoDB.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/ITestS3GuardToolDynamoDB.java @@ -41,6 +41,7 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.s3a.Constants; import org.apache.hadoop.fs.s3a.S3AFileSystem; +import org.apache.hadoop.fs.s3a.UnknownStoreException; import org.apache.hadoop.fs.s3a.s3guard.S3GuardTool.Destroy; import org.apache.hadoop.fs.s3a.s3guard.S3GuardTool.Init; import org.apache.hadoop.util.ExitUtil; @@ -319,7 +320,7 @@ public void testCLIFsckWithParamParentOfRoot() throws Exception { @Test public void testCLIFsckFailInitializeFs() throws Exception { - intercept(FileNotFoundException.class, "does not exist", + intercept(UnknownStoreException.class, () -> run(S3GuardTool.Fsck.NAME, "-check", "s3a://this-bucket-does-not-exist-" + UUID.randomUUID())); } diff --git a/hadoop-tools/hadoop-aws/src/test/resources/core-site.xml b/hadoop-tools/hadoop-aws/src/test/resources/core-site.xml index 5fd7c25f24..a90edbe24f 100644 --- a/hadoop-tools/hadoop-aws/src/test/resources/core-site.xml +++ b/hadoop-tools/hadoop-aws/src/test/resources/core-site.xml @@ -51,6 +51,12 @@ managed by s3guard + + fs.s3a.bucket.landsat-pds.probe + 0 + Let's postpone existence checks to the first IO operation + + s3guard.null