HADOOP-16711.

This adds a new option fs.s3a.bucket.probe, range (0-2) to
control which probe for a bucket existence to perform on startup.

0: no checks
1: v1 check (as has been performend until now)
2: v2 bucket check, which also incudes a permission check. Default.

When set to 0, bucket existence checks won't be done
during initialization thus making it faster.
When the bucket is not available in S3,
or if fs.s3a.endpoint points to the wrong instance of a private S3 store
consecutive calls like listing, read, write etc. will fail with
an UnknownStoreException.

Contributed by:
  * Mukund Thakur (main patch and tests)
  * Rajesh Balamohan (v0 list and performance tests)
  * lqjacklee (HADOOP-15990/v2 list)
  * Steve Loughran (UnknownStoreException support)

       modified:   hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java
       modified:   hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java
       modified:   hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ARetryPolicy.java
       modified:   hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java
       new file:   hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/UnknownStoreException.java
       new file:   hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/ErrorTranslation.java
       modified:   hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md
       modified:   hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/performance.md
       modified:   hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/troubleshooting_s3a.md
       modified:   hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/AbstractS3AMockTest.java
       new file:   hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ABucketExistence.java
       modified:   hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/MockS3ClientFactory.java
       modified:   hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AExceptionTranslation.java
       modified:   hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/AbstractS3GuardToolTestBase.java
       modified:   hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/ITestS3GuardToolDynamoDB.java
       modified:   hadoop-tools/hadoop-aws/src/test/resources/core-site.xml

Change-Id: Ic174f803e655af172d81c1274ed92b51bdceb384
This commit is contained in:
Mukund Thakur 2020-02-21 13:43:39 +00:00 committed by Steve Loughran
parent e3bba5fa22
commit e77767bb1e
No known key found for this signature in database
GPG Key ID: D22CF846DBB162A0
16 changed files with 484 additions and 28 deletions

View File

@ -481,6 +481,20 @@ private Constants() {
"fs.s3a.metadatastore.authoritative"; "fs.s3a.metadatastore.authoritative";
public static final boolean DEFAULT_METADATASTORE_AUTHORITATIVE = false; public static final boolean DEFAULT_METADATASTORE_AUTHORITATIVE = false;
/**
* Bucket validation parameter which can be set by client. This will be
* used in {@code S3AFileSystem.initialize(URI, Configuration)}.
* Value: {@value}
*/
public static final String S3A_BUCKET_PROBE = "fs.s3a.bucket.probe";
/**
* Default value of bucket validation parameter. An existence of bucket
* will be validated using {@code S3AFileSystem.verifyBucketExistsV2()}.
* Value: {@value}
*/
public static final int S3A_BUCKET_PROBE_DEFAULT = 2;
/** /**
* How long a directory listing in the MS is considered as authoritative. * How long a directory listing in the MS is considered as authoritative.
*/ */

View File

@ -173,6 +173,7 @@
import static org.apache.hadoop.fs.s3a.auth.delegation.S3ADelegationTokens.hasDelegationTokenBinding; import static org.apache.hadoop.fs.s3a.auth.delegation.S3ADelegationTokens.hasDelegationTokenBinding;
import static org.apache.hadoop.fs.s3a.impl.CallableSupplier.submit; import static org.apache.hadoop.fs.s3a.impl.CallableSupplier.submit;
import static org.apache.hadoop.fs.s3a.impl.CallableSupplier.waitForCompletionIgnoringExceptions; import static org.apache.hadoop.fs.s3a.impl.CallableSupplier.waitForCompletionIgnoringExceptions;
import static org.apache.hadoop.fs.s3a.impl.ErrorTranslation.isUnknownBucket;
import static org.apache.hadoop.fs.s3a.impl.InternalConstants.SC_404; import static org.apache.hadoop.fs.s3a.impl.InternalConstants.SC_404;
import static org.apache.hadoop.fs.s3a.impl.NetworkBinding.fixBucketRegion; import static org.apache.hadoop.fs.s3a.impl.NetworkBinding.fixBucketRegion;
import static org.apache.hadoop.io.IOUtils.cleanupWithLogger; import static org.apache.hadoop.io.IOUtils.cleanupWithLogger;
@ -392,9 +393,7 @@ public void initialize(URI name, Configuration originalConf)
initCannedAcls(conf); initCannedAcls(conf);
// This initiates a probe against S3 for the bucket existing. // This initiates a probe against S3 for the bucket existing.
// It is where all network and authentication configuration issues doBucketProbing();
// surface, and is potentially slow.
verifyBucketExists();
inputPolicy = S3AInputPolicy.getPolicy( inputPolicy = S3AInputPolicy.getPolicy(
conf.getTrimmed(INPUT_FADVISE, INPUT_FADV_NORMAL)); conf.getTrimmed(INPUT_FADVISE, INPUT_FADV_NORMAL));
@ -463,6 +462,41 @@ public void initialize(URI name, Configuration originalConf)
} }
/**
* Test bucket existence in S3.
* When the value of {@link Constants#S3A_BUCKET_PROBE} is set to 0,
* bucket existence check is not done to improve performance of
* S3AFileSystem initialization. When set to 1 or 2, bucket existence check
* will be performed which is potentially slow.
* If 3 or higher: warn and use the v2 check.
* @throws UnknownStoreException the bucket is absent
* @throws IOException any other problem talking to S3
*/
@Retries.RetryTranslated
private void doBucketProbing() throws IOException {
int bucketProbe = getConf()
.getInt(S3A_BUCKET_PROBE, S3A_BUCKET_PROBE_DEFAULT);
Preconditions.checkArgument(bucketProbe >= 0,
"Value of " + S3A_BUCKET_PROBE + " should be >= 0");
switch (bucketProbe) {
case 0:
LOG.debug("skipping check for bucket existence");
break;
case 1:
verifyBucketExists();
break;
case 2:
verifyBucketExistsV2();
break;
default:
// we have no idea what this is, assume it is from a later release.
LOG.warn("Unknown bucket probe option {}: {}; falling back to check #2",
S3A_BUCKET_PROBE, bucketProbe);
verifyBucketExistsV2();
break;
}
}
/** /**
* Initialize the thread pool. * Initialize the thread pool.
* This must be re-invoked after replacing the S3Client during test * This must be re-invoked after replacing the S3Client during test
@ -510,15 +544,31 @@ protected static S3AStorageStatistics createStorageStatistics() {
* Verify that the bucket exists. This does not check permissions, * Verify that the bucket exists. This does not check permissions,
* not even read access. * not even read access.
* Retry policy: retrying, translated. * Retry policy: retrying, translated.
* @throws FileNotFoundException the bucket is absent * @throws UnknownStoreException the bucket is absent
* @throws IOException any other problem talking to S3 * @throws IOException any other problem talking to S3
*/ */
@Retries.RetryTranslated @Retries.RetryTranslated
protected void verifyBucketExists() protected void verifyBucketExists()
throws FileNotFoundException, IOException { throws UnknownStoreException, IOException {
if (!invoker.retry("doesBucketExist", bucket, true, if (!invoker.retry("doesBucketExist", bucket, true,
() -> s3.doesBucketExist(bucket))) { () -> s3.doesBucketExist(bucket))) {
throw new FileNotFoundException("Bucket " + bucket + " does not exist"); throw new UnknownStoreException("Bucket " + bucket + " does not exist");
}
}
/**
* Verify that the bucket exists. This will correctly throw an exception
* when credentials are invalid.
* Retry policy: retrying, translated.
* @throws UnknownStoreException the bucket is absent
* @throws IOException any other problem talking to S3
*/
@Retries.RetryTranslated
protected void verifyBucketExistsV2()
throws UnknownStoreException, IOException {
if (!invoker.retry("doesBucketExistV2", bucket, true,
() -> s3.doesBucketExistV2(bucket))) {
throw new UnknownStoreException("Bucket " + bucket + " does not exist");
} }
} }
@ -2891,7 +2941,7 @@ S3AFileStatus s3GetFileStatus(final Path path,
} catch (AmazonServiceException e) { } catch (AmazonServiceException e) {
// if the response is a 404 error, it just means that there is // if the response is a 404 error, it just means that there is
// no file at that path...the remaining checks will be needed. // no file at that path...the remaining checks will be needed.
if (e.getStatusCode() != SC_404) { if (e.getStatusCode() != SC_404 || isUnknownBucket(e)) {
throw translateException("getFileStatus", path, e); throw translateException("getFileStatus", path, e);
} }
} catch (AmazonClientException e) { } catch (AmazonClientException e) {
@ -2923,7 +2973,7 @@ S3AFileStatus s3GetFileStatus(final Path path,
meta.getVersionId()); meta.getVersionId());
} }
} catch (AmazonServiceException e) { } catch (AmazonServiceException e) {
if (e.getStatusCode() != SC_404) { if (e.getStatusCode() != SC_404 || isUnknownBucket(e)) {
throw translateException("getFileStatus", newKey, e); throw translateException("getFileStatus", newKey, e);
} }
} catch (AmazonClientException e) { } catch (AmazonClientException e) {
@ -2962,7 +3012,7 @@ S3AFileStatus s3GetFileStatus(final Path path,
return new S3AFileStatus(Tristate.TRUE, path, username); return new S3AFileStatus(Tristate.TRUE, path, username);
} }
} catch (AmazonServiceException e) { } catch (AmazonServiceException e) {
if (e.getStatusCode() != SC_404) { if (e.getStatusCode() != SC_404 || isUnknownBucket(e)) {
throw translateException("getFileStatus", path, e); throw translateException("getFileStatus", path, e);
} }
} catch (AmazonClientException e) { } catch (AmazonClientException e) {

View File

@ -188,6 +188,7 @@ protected Map<Class<? extends Exception>, RetryPolicy> createExceptionMap() {
policyMap.put(AccessDeniedException.class, fail); policyMap.put(AccessDeniedException.class, fail);
policyMap.put(NoAuthWithAWSException.class, fail); policyMap.put(NoAuthWithAWSException.class, fail);
policyMap.put(FileNotFoundException.class, fail); policyMap.put(FileNotFoundException.class, fail);
policyMap.put(UnknownStoreException.class, fail);
policyMap.put(InvalidRequestException.class, fail); policyMap.put(InvalidRequestException.class, fail);
// metadata stores should do retries internally when it makes sense // metadata stores should do retries internally when it makes sense

View File

@ -86,6 +86,7 @@
import static org.apache.commons.lang3.StringUtils.isEmpty; import static org.apache.commons.lang3.StringUtils.isEmpty;
import static org.apache.hadoop.fs.s3a.Constants.*; import static org.apache.hadoop.fs.s3a.Constants.*;
import static org.apache.hadoop.fs.s3a.impl.ErrorTranslation.isUnknownBucket;
import static org.apache.hadoop.fs.s3a.impl.MultiObjectDeleteSupport.translateDeleteException; import static org.apache.hadoop.fs.s3a.impl.MultiObjectDeleteSupport.translateDeleteException;
import static org.apache.hadoop.io.IOUtils.cleanupWithLogger; import static org.apache.hadoop.io.IOUtils.cleanupWithLogger;
@ -249,6 +250,18 @@ public static IOException translateException(@Nullable String operation,
// the object isn't there // the object isn't there
case 404: case 404:
if (isUnknownBucket(ase)) {
// this is a missing bucket
ioe = new UnknownStoreException(path, ase);
} else {
// a normal unknown object
ioe = new FileNotFoundException(message);
ioe.initCause(ase);
}
break;
// this also surfaces sometimes and is considered to
// be ~ a not found exception.
case 410: case 410:
ioe = new FileNotFoundException(message); ioe = new FileNotFoundException(message);
ioe.initCause(ase); ioe.initCause(ase);

View File

@ -0,0 +1,57 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs.s3a;
import java.io.IOException;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
/**
* The bucket or other AWS resource is unknown.
*
* Why not a subclass of FileNotFoundException?
* There's too much code which caches an FNFE and infers that the file isn't
* there - a missing bucket is far more significant and generally should
* not be ignored.
*/
@InterfaceAudience.Public
@InterfaceStability.Evolving
public class UnknownStoreException extends IOException {
/**
* Constructor.
* @param message message
*/
public UnknownStoreException(final String message) {
this(message, null);
}
/**
* Constructor.
* @param message message
* @param cause cause (may be null)
*/
public UnknownStoreException(final String message, Throwable cause) {
super(message);
if (cause != null) {
initCause(cause);
}
}
}

View File

@ -0,0 +1,73 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs.s3a.impl;
import com.amazonaws.AmazonServiceException;
import static org.apache.hadoop.fs.s3a.impl.InternalConstants.SC_404;
/**
* Translate from AWS SDK-wrapped exceptions into IOExceptions with
* as much information as possible.
* The core of the translation logic is in S3AUtils, in
* {@code translateException} and nearby; that has grown to be
* a large a complex piece of logic, as it ties in with retry/recovery
* policies, throttling, etc.
*
* This class is where future expansion of that code should go so that we have
* an isolated place for all the changes..
* The existing code las been left in S3AUtils it is to avoid cherry-picking
* problems on backports.
*/
public class ErrorTranslation {
/**
* Private constructor for utility class.
*/
private ErrorTranslation() {
}
/**
* Does this exception indicate that the AWS Bucket was unknown.
* @param e exception.
* @return true if the status code and error code mean that the
* remote bucket is unknown.
*/
public static boolean isUnknownBucket(AmazonServiceException e) {
return e.getStatusCode() == SC_404
&& AwsErrorCodes.E_NO_SUCH_BUCKET.equals(e.getErrorCode());
}
/**
* AWS error codes explicitly recognized and processes specially;
* kept in their own class for isolation.
*/
public static final class AwsErrorCodes {
/**
* The AWS S3 error code used to recognize when a 404 means the bucket is
* unknown.
*/
public static final String E_NO_SUCH_BUCKET = "NoSuchBucket";
/** private constructor. */
private AwsErrorCodes() {
}
}
}

View File

@ -1000,6 +1000,26 @@ options are covered in [Testing](./testing.md).
converged to Integer.MAX_VALUE milliseconds converged to Integer.MAX_VALUE milliseconds
</description> </description>
</property> </property>
<property>
<name>fs.s3a.bucket.probe</name>
<value>2</value>
<description>
The value can be 0, 1 or 2 (default).
When set to 0, bucket existence checks won't be done
during initialization thus making it faster.
Though it should be noted that when the bucket is not available in S3,
or if fs.s3a.endpoint points to the wrong instance of a private S3 store
consecutive calls like listing, read, write etc. will fail with
an UnknownStoreException.
When set to 1, the bucket existence check will be done using the
V1 API of the S3 protocol which doesn't verify the client's permissions
to list or read data in the bucket.
When set to 2, the bucket existence check will be done using the
V2 API of the S3 protocol which does verify that the
client has permission to read the bucket.
</description>
</property>
``` ```
## <a name="retry_and_recovery"></a>Retry and Recovery ## <a name="retry_and_recovery"></a>Retry and Recovery

View File

@ -608,3 +608,19 @@ with HADOOP-15669.
Other options may be added to `fs.s3a.ssl.channel.mode` in the future as Other options may be added to `fs.s3a.ssl.channel.mode` in the future as
further SSL optimizations are made. further SSL optimizations are made.
## Tuning FileSystem Initialization.
When an S3A Filesystem instance is created and initialized, the client
checks if the bucket provided is valid. This can be slow.
You can ignore bucket validation by configuring `fs.s3a.bucket.probe` as follows:
```xml
<property>
<name>fs.s3a.bucket.probe</name>
<value>0</value>
</property>
```
Note: if the bucket does not exist, this issue will surface when operations are performed
on the filesystem; you will see `UnknownStoreException` stack traces.

View File

@ -1203,29 +1203,44 @@ a new one than read to the end of a large file.
Note: the threshold when data is read rather than the stream aborted can be tuned Note: the threshold when data is read rather than the stream aborted can be tuned
by `fs.s3a.readahead.range`; seek policy in `fs.s3a.experimental.input.fadvise`. by `fs.s3a.readahead.range`; seek policy in `fs.s3a.experimental.input.fadvise`.
### <a name="no_such_bucket"></a> `FileNotFoundException` Bucket does not exist. ### <a name="no_such_bucket"></a> `UnknownStoreException` Bucket does not exist.
The bucket does not exist. The bucket does not exist.
``` ```
java.io.FileNotFoundException: Bucket stevel45r56666 does not exist org.apache.hadoop.fs.s3a.UnknownStoreException:
at org.apache.hadoop.fs.s3a.S3AFileSystem.verifyBucketExists(S3AFileSystem.java:361) Bucket random-bucket-33013fb8-f7f7-4edb-9c26-16a6ed019184 does not exist
at org.apache.hadoop.fs.s3a.S3AFileSystem.initialize(S3AFileSystem.java:293) at org.apache.hadoop.fs.s3a.S3AFileSystem.verifyBucketExists(S3AFileSystem.java:537)
at org.apache.hadoop.fs.FileSystem.createFileSystem(FileSystem.java:3288) at org.apache.hadoop.fs.s3a.S3AFileSystem.doBucketProbing(S3AFileSystem.java:471)
at org.apache.hadoop.fs.FileSystem.access$200(FileSystem.java:123) at org.apache.hadoop.fs.s3a.S3AFileSystem.initialize(S3AFileSystem.java:387)
at org.apache.hadoop.fs.FileSystem$Cache.getInternal(FileSystem.java:3337) at org.apache.hadoop.fs.FileSystem.createFileSystem(FileSystem.java:3422)
at org.apache.hadoop.fs.FileSystem$Cache.getUnique(FileSystem.java:3311) at org.apache.hadoop.fs.FileSystem.get(FileSystem.java:502)
at org.apache.hadoop.fs.FileSystem.newInstance(FileSystem.java:529)
at org.apache.hadoop.fs.s3a.s3guard.S3GuardTool$BucketInfo.run(S3GuardTool.java:997)
at org.apache.hadoop.fs.s3a.s3guard.S3GuardTool.run(S3GuardTool.java:309)
at org.apache.hadoop.util.ToolRunner.run(ToolRunner.java:76)
at org.apache.hadoop.fs.s3a.s3guard.S3GuardTool.run(S3GuardTool.java:1218)
at org.apache.hadoop.fs.s3a.s3guard.S3GuardTool.main(S3GuardTool.java:1227)
``` ```
Check the URI is correct, and that the bucket actually exists.
Check the URI. If using a third-party store, verify that you've configured If using a third-party store, verify that you've configured
the client to talk to the specific server in `fs.s3a.endpoint`. the client to talk to the specific server in `fs.s3a.endpoint`.
Forgetting to update this value and asking the AWS S3 endpoint
for a bucket is not an unusual occurrence.
This can surface during filesystem API calls if the bucket is deleted while you are using it,
-or the startup check for bucket existence has been disabled by setting `fs.s3a.bucket.probe` to 0.
```
org.apache.hadoop.fs.s3a.UnknownStoreException: s3a://random-bucket-7d9217b0-b426-4344-82ea-25d6cbb316f1/
at org.apache.hadoop.fs.s3a.S3AUtils.translateException(S3AUtils.java:254)
at org.apache.hadoop.fs.s3a.S3AUtils.translateException(S3AUtils.java:167)
at org.apache.hadoop.fs.s3a.S3AFileSystem.innerListFiles(S3AFileSystem.java:4149)
at org.apache.hadoop.fs.s3a.S3AFileSystem.listFiles(S3AFileSystem.java:3983)
Caused by: com.amazonaws.services.s3.model.AmazonS3Exception:
The specified bucket does not exist
(Service: Amazon S3; Status Code: 404; Error Code: NoSuchBucket
at com.amazonaws.http.AmazonHttpClient$RequestExecutor.handleErrorResponse(AmazonHttpClient.java:1712)
at com.amazonaws.http.AmazonHttpClient$RequestExecutor.executeOneRequest(AmazonHttpClient.java:1367)
```
## Other Issues ## Other Issues

View File

@ -75,6 +75,7 @@ public Configuration createConfiguration() {
conf.setBoolean(CommitConstants.MAGIC_COMMITTER_ENABLED, true); conf.setBoolean(CommitConstants.MAGIC_COMMITTER_ENABLED, true);
// use minimum multipart size for faster triggering // use minimum multipart size for faster triggering
conf.setLong(Constants.MULTIPART_SIZE, MULTIPART_MIN_SIZE); conf.setLong(Constants.MULTIPART_SIZE, MULTIPART_MIN_SIZE);
conf.setInt(Constants.S3A_BUCKET_PROBE, 1);
return conf; return conf;
} }

View File

@ -0,0 +1,170 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs.s3a;
import java.net.URI;
import java.util.UUID;
import java.util.concurrent.Callable;
import org.junit.Test;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.test.LambdaTestUtils;
import static org.apache.hadoop.fs.contract.ContractTestUtils.dataset;
import static org.apache.hadoop.fs.contract.ContractTestUtils.writeDataset;
import static org.apache.hadoop.fs.s3a.Constants.FS_S3A;
import static org.apache.hadoop.fs.s3a.Constants.S3A_BUCKET_PROBE;
import static org.apache.hadoop.fs.s3a.Constants.S3GUARD_METASTORE_NULL;
import static org.apache.hadoop.fs.s3a.Constants.S3_METADATA_STORE_IMPL;
import static org.apache.hadoop.test.LambdaTestUtils.intercept;
/**
* Class to test bucket existence APIs.
*/
public class ITestS3ABucketExistence extends AbstractS3ATestBase {
private FileSystem fs;
private final String randomBucket =
"random-bucket-" + UUID.randomUUID().toString();
private final URI uri = URI.create(FS_S3A + "://" + randomBucket + "/");
@SuppressWarnings("deprecation")
@Test
public void testNoBucketProbing() throws Exception {
describe("Disable init-time probes and expect FS operations to fail");
Configuration conf = createConfigurationWithProbe(0);
// metastores can bypass S3 checks, so disable S3Guard, always
conf.set(S3_METADATA_STORE_IMPL, S3GUARD_METASTORE_NULL);
fs = FileSystem.get(uri, conf);
Path root = new Path(uri);
expectUnknownStore(
() -> fs.getFileStatus(root));
expectUnknownStore(
() -> fs.listStatus(root));
Path src = new Path(root, "testfile");
Path dest = new Path(root, "dst");
expectUnknownStore(
() -> fs.getFileStatus(src));
// the exception must not be caught and marked down to an FNFE
expectUnknownStore(() -> fs.exists(src));
expectUnknownStore(() -> fs.isFile(src));
expectUnknownStore(() -> fs.isDirectory(src));
expectUnknownStore(() -> fs.mkdirs(src));
expectUnknownStore(() -> fs.delete(src));
expectUnknownStore(() -> fs.rename(src, dest));
byte[] data = dataset(1024, 'a', 'z');
expectUnknownStore(
() -> writeDataset(fs, src, data, data.length, 1024 * 1024, true));
}
/**
* Expect an operation to raise an UnknownStoreException.
* @param eval closure
* @param <T> return type of closure
* @throws Exception anything else raised.
*/
public static <T> void expectUnknownStore(
Callable<T> eval)
throws Exception {
intercept(UnknownStoreException.class, eval);
}
/**
* Expect an operation to raise an UnknownStoreException.
* @param eval closure
* @throws Exception anything else raised.
*/
public static void expectUnknownStore(
LambdaTestUtils.VoidCallable eval)
throws Exception {
intercept(UnknownStoreException.class, eval);
}
/**
* Create a new configuration with the given bucket probe;
* we also disable FS caching.
* @param probe value to use as the bucket probe.
* @return a configuration.
*/
private Configuration createConfigurationWithProbe(final int probe) {
Configuration conf = new Configuration(getFileSystem().getConf());
S3ATestUtils.disableFilesystemCaching(conf);
conf.setInt(S3A_BUCKET_PROBE, probe);
return conf;
}
@Test
public void testBucketProbingV1() throws Exception {
describe("Test the V1 bucket probe");
Configuration configuration = createConfigurationWithProbe(1);
expectUnknownStore(
() -> FileSystem.get(uri, configuration));
}
@Test
public void testBucketProbingV2() throws Exception {
describe("Test the V2 bucket probe");
Configuration configuration = createConfigurationWithProbe(2);
expectUnknownStore(
() -> FileSystem.get(uri, configuration));
/*
* Bucket probing should also be done when value of
* S3A_BUCKET_PROBE is greater than 2.
*/
configuration.setInt(S3A_BUCKET_PROBE, 3);
expectUnknownStore(
() -> FileSystem.get(uri, configuration));
}
@Test
public void testBucketProbingParameterValidation() throws Exception {
describe("Test bucket probe parameter %s validation", S3A_BUCKET_PROBE);
Configuration configuration = createConfigurationWithProbe(-1);
intercept(IllegalArgumentException.class,
"Value of " + S3A_BUCKET_PROBE + " should be >= 0",
"Should throw IllegalArgumentException",
() -> FileSystem.get(uri, configuration));
}
@Override
protected Configuration getConfiguration() {
Configuration configuration = super.getConfiguration();
S3ATestUtils.disableFilesystemCaching(configuration);
return configuration;
}
@Override
public void teardown() throws Exception {
IOUtils.cleanupWithLogger(getLogger(), fs);
super.teardown();
}
}

View File

@ -41,6 +41,7 @@ public AmazonS3 createS3Client(URI name,
final String userAgentSuffix) { final String userAgentSuffix) {
AmazonS3 s3 = mock(AmazonS3.class); AmazonS3 s3 = mock(AmazonS3.class);
when(s3.doesBucketExist(bucket)).thenReturn(true); when(s3.doesBucketExist(bucket)).thenReturn(true);
when(s3.doesBucketExistV2(bucket)).thenReturn(true);
// this listing is used in startup if purging is enabled, so // this listing is used in startup if purging is enabled, so
// return a stub value // return a stub value
MultipartUploadListing noUploads = new MultipartUploadListing(); MultipartUploadListing noUploads = new MultipartUploadListing();

View File

@ -21,6 +21,7 @@
import static org.apache.hadoop.fs.s3a.Constants.*; import static org.apache.hadoop.fs.s3a.Constants.*;
import static org.apache.hadoop.fs.s3a.S3ATestUtils.*; import static org.apache.hadoop.fs.s3a.S3ATestUtils.*;
import static org.apache.hadoop.fs.s3a.S3AUtils.*; import static org.apache.hadoop.fs.s3a.S3AUtils.*;
import static org.apache.hadoop.fs.s3a.impl.InternalConstants.SC_404;
import static org.junit.Assert.*; import static org.junit.Assert.*;
import java.io.EOFException; import java.io.EOFException;
@ -39,6 +40,8 @@
import org.junit.Test; import org.junit.Test;
import org.apache.hadoop.fs.s3a.impl.ErrorTranslation;
import static org.apache.hadoop.test.GenericTestUtils.assertExceptionContains; import static org.apache.hadoop.test.GenericTestUtils.assertExceptionContains;
/** /**
@ -98,9 +101,24 @@ public void test403isNotPermittedFound() throws Exception {
verifyTranslated(403, AccessDeniedException.class); verifyTranslated(403, AccessDeniedException.class);
} }
/**
* 404 defaults to FileNotFound.
*/
@Test @Test
public void test404isNotFound() throws Exception { public void test404isNotFound() throws Exception {
verifyTranslated(404, FileNotFoundException.class); verifyTranslated(SC_404, FileNotFoundException.class);
}
/**
* 404 + NoSuchBucket == Unknown bucket.
*/
@Test
public void testUnknownBucketException() throws Exception {
AmazonS3Exception ex404 = createS3Exception(SC_404);
ex404.setErrorCode(ErrorTranslation.AwsErrorCodes.E_NO_SUCH_BUCKET);
verifyTranslated(
UnknownStoreException.class,
ex404);
} }
@Test @Test

View File

@ -21,7 +21,6 @@
import java.io.BufferedReader; import java.io.BufferedReader;
import java.io.ByteArrayInputStream; import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream; import java.io.ByteArrayOutputStream;
import java.io.FileNotFoundException;
import java.io.IOException; import java.io.IOException;
import java.io.InputStreamReader; import java.io.InputStreamReader;
import java.net.URI; import java.net.URI;
@ -36,6 +35,7 @@
import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeUnit;
import org.apache.hadoop.fs.s3a.S3AUtils; import org.apache.hadoop.fs.s3a.S3AUtils;
import org.apache.hadoop.fs.s3a.UnknownStoreException;
import org.apache.hadoop.util.StopWatch; import org.apache.hadoop.util.StopWatch;
import com.google.common.base.Preconditions; import com.google.common.base.Preconditions;
import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileSystem;
@ -506,7 +506,7 @@ public void testToolsNoBucket() throws Throwable {
cmdR.getName(), cmdR.getName(),
S3A_THIS_BUCKET_DOES_NOT_EXIST S3A_THIS_BUCKET_DOES_NOT_EXIST
}; };
intercept(FileNotFoundException.class, intercept(UnknownStoreException.class,
() -> cmdR.run(argsR)); () -> cmdR.run(argsR));
} }
} }

View File

@ -41,6 +41,7 @@
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.s3a.Constants; import org.apache.hadoop.fs.s3a.Constants;
import org.apache.hadoop.fs.s3a.S3AFileSystem; import org.apache.hadoop.fs.s3a.S3AFileSystem;
import org.apache.hadoop.fs.s3a.UnknownStoreException;
import org.apache.hadoop.fs.s3a.s3guard.S3GuardTool.Destroy; import org.apache.hadoop.fs.s3a.s3guard.S3GuardTool.Destroy;
import org.apache.hadoop.fs.s3a.s3guard.S3GuardTool.Init; import org.apache.hadoop.fs.s3a.s3guard.S3GuardTool.Init;
import org.apache.hadoop.util.ExitUtil; import org.apache.hadoop.util.ExitUtil;
@ -319,7 +320,7 @@ public void testCLIFsckWithParamParentOfRoot() throws Exception {
@Test @Test
public void testCLIFsckFailInitializeFs() throws Exception { public void testCLIFsckFailInitializeFs() throws Exception {
intercept(FileNotFoundException.class, "does not exist", intercept(UnknownStoreException.class,
() -> run(S3GuardTool.Fsck.NAME, "-check", () -> run(S3GuardTool.Fsck.NAME, "-check",
"s3a://this-bucket-does-not-exist-" + UUID.randomUUID())); "s3a://this-bucket-does-not-exist-" + UUID.randomUUID()));
} }

View File

@ -51,6 +51,12 @@
managed by s3guard</description> managed by s3guard</description>
</property> </property>
<property>
<name>fs.s3a.bucket.landsat-pds.probe</name>
<value>0</value>
<description>Let's postpone existence checks to the first IO operation </description>
</property>
<!-- Convenience definitions. --> <!-- Convenience definitions. -->
<property> <property>
<name>s3guard.null</name> <name>s3guard.null</name>