HADOOP-18975 S3A: Add option fs.s3a.endpoint.fips to use AWS FIPS endpoints (#6277)

Adds a new option `fs.s3a.endpoint.fips` to switch the SDK client to use
FIPS endpoints, as an alternative to explicitly declaring them.


* The option is available as a path capability for probes.
* SDK v2 itself doesn't know that some regions don't have FIPS endpoints
* SDK only fails with endpoint + fips flag as a retried exception; wit this
  change the S3A client should fail fast.
  PR fails fast.
* Adds a new "connecting.md" doc; moves existing docs there and restructures.
* New Tests in ITestS3AEndpointRegion

bucket-info command support:

* added to list of path capabilities
* added -fips flag and test for explicit probe
* also now prints bucket region
* and removed some of the obsolete s3guard options
* updated docs

Contributed by Steve Loughran
This commit is contained in:
Steve Loughran 2024-01-16 14:16:12 +00:00 committed by GitHub
parent 36198b5edf
commit d378853790
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
14 changed files with 688 additions and 306 deletions

View File

@ -1335,6 +1335,15 @@ private Constants() {
*/ */
public static final String AWS_S3_DEFAULT_REGION = "us-east-2"; public static final String AWS_S3_DEFAULT_REGION = "us-east-2";
/**
* Is the endpoint a FIPS endpoint?
* Can be queried as a path capability.
* Value {@value}.
*/
public static final String FIPS_ENDPOINT = "fs.s3a.endpoint.fips";
public static final boolean ENDPOINT_FIPS_DEFAULT = false;
/** /**
* Require that all S3 access is made through Access Points. * Require that all S3 access is made through Access Points.
*/ */

View File

@ -22,6 +22,7 @@
import java.net.URI; import java.net.URI;
import java.net.URISyntaxException; import java.net.URISyntaxException;
import org.apache.hadoop.classification.VisibleForTesting;
import org.apache.hadoop.fs.s3a.impl.AWSClientConfig; import org.apache.hadoop.fs.s3a.impl.AWSClientConfig;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
@ -54,6 +55,7 @@
import static org.apache.hadoop.fs.s3a.Constants.AWS_REGION; import static org.apache.hadoop.fs.s3a.Constants.AWS_REGION;
import static org.apache.hadoop.fs.s3a.Constants.AWS_S3_DEFAULT_REGION; import static org.apache.hadoop.fs.s3a.Constants.AWS_S3_DEFAULT_REGION;
import static org.apache.hadoop.fs.s3a.Constants.CENTRAL_ENDPOINT; import static org.apache.hadoop.fs.s3a.Constants.CENTRAL_ENDPOINT;
import static org.apache.hadoop.fs.s3a.Constants.FIPS_ENDPOINT;
import static org.apache.hadoop.fs.s3a.Constants.HTTP_SIGNER_CLASS_NAME; import static org.apache.hadoop.fs.s3a.Constants.HTTP_SIGNER_CLASS_NAME;
import static org.apache.hadoop.fs.s3a.Constants.HTTP_SIGNER_ENABLED; import static org.apache.hadoop.fs.s3a.Constants.HTTP_SIGNER_ENABLED;
import static org.apache.hadoop.fs.s3a.Constants.HTTP_SIGNER_ENABLED_DEFAULT; import static org.apache.hadoop.fs.s3a.Constants.HTTP_SIGNER_ENABLED_DEFAULT;
@ -63,6 +65,7 @@
import static org.apache.hadoop.fs.s3a.auth.SignerFactory.createHttpSigner; import static org.apache.hadoop.fs.s3a.auth.SignerFactory.createHttpSigner;
import static org.apache.hadoop.fs.s3a.impl.AWSHeaders.REQUESTER_PAYS_HEADER; import static org.apache.hadoop.fs.s3a.impl.AWSHeaders.REQUESTER_PAYS_HEADER;
import static org.apache.hadoop.fs.s3a.impl.InternalConstants.AUTH_SCHEME_AWS_SIGV_4; import static org.apache.hadoop.fs.s3a.impl.InternalConstants.AUTH_SCHEME_AWS_SIGV_4;
import static org.apache.hadoop.util.Preconditions.checkArgument;
/** /**
@ -102,6 +105,13 @@ public class DefaultS3ClientFactory extends Configured
/** Exactly once log to inform about ignoring the AWS-SDK Warnings for CSE. */ /** Exactly once log to inform about ignoring the AWS-SDK Warnings for CSE. */
private static final LogExactlyOnce IGNORE_CSE_WARN = new LogExactlyOnce(LOG); private static final LogExactlyOnce IGNORE_CSE_WARN = new LogExactlyOnce(LOG);
/**
* Error message when an endpoint is set with FIPS enabled: {@value}.
*/
@VisibleForTesting
public static final String ERROR_ENDPOINT_WITH_FIPS =
"An endpoint cannot set when " + FIPS_ENDPOINT + " is true";
@Override @Override
public S3Client createS3Client( public S3Client createS3Client(
final URI uri, final URI uri,
@ -248,6 +258,7 @@ protected ClientOverrideConfiguration createClientOverrideConfiguration(
* @param conf conf configuration object * @param conf conf configuration object
* @param <BuilderT> S3 client builder type * @param <BuilderT> S3 client builder type
* @param <ClientT> S3 client type * @param <ClientT> S3 client type
* @throws IllegalArgumentException if endpoint is set when FIPS is enabled.
*/ */
private <BuilderT extends S3BaseClientBuilder<BuilderT, ClientT>, ClientT> void configureEndpointAndRegion( private <BuilderT extends S3BaseClientBuilder<BuilderT, ClientT>, ClientT> void configureEndpointAndRegion(
BuilderT builder, S3ClientCreationParameters parameters, Configuration conf) { BuilderT builder, S3ClientCreationParameters parameters, Configuration conf) {
@ -263,7 +274,18 @@ private <BuilderT extends S3BaseClientBuilder<BuilderT, ClientT>, ClientT> void
region = Region.of(configuredRegion); region = Region.of(configuredRegion);
} }
// FIPs? Log it, then reject any attempt to set an endpoint
final boolean fipsEnabled = parameters.isFipsEnabled();
if (fipsEnabled) {
LOG.debug("Enabling FIPS mode");
}
// always setting it guarantees the value is non-null,
// which tests expect.
builder.fipsEnabled(fipsEnabled);
if (endpoint != null) { if (endpoint != null) {
checkArgument(!fipsEnabled,
"%s : %s", ERROR_ENDPOINT_WITH_FIPS, endpoint);
builder.endpointOverride(endpoint); builder.endpointOverride(endpoint);
// No region was configured, try to determine it from the endpoint. // No region was configured, try to determine it from the endpoint.
if (region == null) { if (region == null) {

View File

@ -461,6 +461,11 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities,
*/ */
private boolean isMultipartCopyEnabled; private boolean isMultipartCopyEnabled;
/**
* Is FIPS enabled?
*/
private boolean fipsEnabled;
/** /**
* A cache of files that should be deleted when the FileSystem is closed * A cache of files that should be deleted when the FileSystem is closed
* or the JVM is exited. * or the JVM is exited.
@ -614,6 +619,8 @@ public void initialize(URI name, Configuration originalConf)
? conf.getTrimmed(AWS_REGION) ? conf.getTrimmed(AWS_REGION)
: accessPoint.getRegion(); : accessPoint.getRegion();
fipsEnabled = conf.getBoolean(FIPS_ENDPOINT, ENDPOINT_FIPS_DEFAULT);
// is this an S3Express store? // is this an S3Express store?
s3ExpressStore = isS3ExpressStore(bucket, endpoint); s3ExpressStore = isS3ExpressStore(bucket, endpoint);
@ -1046,6 +1053,7 @@ private void bindAWSClient(URI name, boolean dtEnabled) throws IOException {
.withMultipartThreshold(multiPartThreshold) .withMultipartThreshold(multiPartThreshold)
.withTransferManagerExecutor(unboundedThreadPool) .withTransferManagerExecutor(unboundedThreadPool)
.withRegion(configuredRegion) .withRegion(configuredRegion)
.withFipsEnabled(fipsEnabled)
.withExpressCreateSession( .withExpressCreateSession(
conf.getBoolean(S3EXPRESS_CREATE_SESSION, S3EXPRESS_CREATE_SESSION_DEFAULT)); conf.getBoolean(S3EXPRESS_CREATE_SESSION, S3EXPRESS_CREATE_SESSION_DEFAULT));
@ -5521,6 +5529,10 @@ public boolean hasPathCapability(final Path path, final String capability)
case OPTIMIZED_COPY_FROM_LOCAL: case OPTIMIZED_COPY_FROM_LOCAL:
return optimizedCopyFromLocal; return optimizedCopyFromLocal;
// probe for a fips endpoint
case FIPS_ENDPOINT:
return fipsEnabled;
default: default:
return super.hasPathCapability(p, cap); return super.hasPathCapability(p, cap);
} }

View File

@ -176,6 +176,11 @@ final class S3ClientCreationParameters {
*/ */
private boolean expressCreateSession = S3EXPRESS_CREATE_SESSION_DEFAULT; private boolean expressCreateSession = S3EXPRESS_CREATE_SESSION_DEFAULT;
/**
* Is FIPS enabled?
*/
private boolean fipsEnabled;
/** /**
* List of execution interceptors to include in the chain * List of execution interceptors to include in the chain
* of interceptors in the SDK. * of interceptors in the SDK.
@ -461,5 +466,23 @@ public String toString() {
", expressCreateSession=" + expressCreateSession + ", expressCreateSession=" + expressCreateSession +
'}'; '}';
} }
/**
* Get the FIPS flag.
* @return is fips enabled
*/
public boolean isFipsEnabled() {
return fipsEnabled;
}
/**
* Set builder value.
* @param value new value
* @return the builder
*/
public S3ClientCreationParameters withFipsEnabled(final boolean value) {
fipsEnabled = value;
return this;
}
} }
} }

View File

@ -38,6 +38,7 @@
import static org.apache.hadoop.fs.Options.OpenFileOptions.FS_OPTION_OPENFILE_STANDARD_OPTIONS; import static org.apache.hadoop.fs.Options.OpenFileOptions.FS_OPTION_OPENFILE_STANDARD_OPTIONS;
import static org.apache.hadoop.fs.s3a.Constants.DIRECTORY_OPERATIONS_PURGE_UPLOADS; import static org.apache.hadoop.fs.s3a.Constants.DIRECTORY_OPERATIONS_PURGE_UPLOADS;
import static org.apache.hadoop.fs.s3a.Constants.ENABLE_MULTI_DELETE; import static org.apache.hadoop.fs.s3a.Constants.ENABLE_MULTI_DELETE;
import static org.apache.hadoop.fs.s3a.Constants.FIPS_ENDPOINT;
import static org.apache.hadoop.fs.s3a.Constants.FS_S3A_CREATE_PERFORMANCE; import static org.apache.hadoop.fs.s3a.Constants.FS_S3A_CREATE_PERFORMANCE;
import static org.apache.hadoop.fs.s3a.Constants.FS_S3A_CREATE_PERFORMANCE_ENABLED; import static org.apache.hadoop.fs.s3a.Constants.FS_S3A_CREATE_PERFORMANCE_ENABLED;
import static org.apache.hadoop.fs.s3a.Constants.STORE_CAPABILITY_AWS_V2; import static org.apache.hadoop.fs.s3a.Constants.STORE_CAPABILITY_AWS_V2;
@ -272,6 +273,7 @@ private InternalConstants() {
FS_CHECKSUMS, FS_CHECKSUMS,
FS_MULTIPART_UPLOADER, FS_MULTIPART_UPLOADER,
DIRECTORY_LISTING_INCONSISTENT, DIRECTORY_LISTING_INCONSISTENT,
FIPS_ENDPOINT,
// s3 specific // s3 specific
STORE_CAPABILITY_AWS_V2, STORE_CAPABILITY_AWS_V2,

View File

@ -357,12 +357,11 @@ public static class BucketInfo extends S3GuardTool {
public static final String NAME = BUCKET_INFO; public static final String NAME = BUCKET_INFO;
public static final String GUARDED_FLAG = "guarded"; public static final String GUARDED_FLAG = "guarded";
public static final String UNGUARDED_FLAG = "unguarded"; public static final String UNGUARDED_FLAG = "unguarded";
public static final String AUTH_FLAG = "auth";
public static final String NONAUTH_FLAG = "nonauth";
public static final String ENCRYPTION_FLAG = "encryption"; public static final String ENCRYPTION_FLAG = "encryption";
public static final String MAGIC_FLAG = "magic"; public static final String MAGIC_FLAG = "magic";
public static final String MARKERS_FLAG = "markers"; public static final String MARKERS_FLAG = "markers";
public static final String MARKERS_AWARE = "aware"; public static final String MARKERS_AWARE = "aware";
public static final String FIPS_FLAG = "fips";
public static final String PURPOSE = "provide/check information" public static final String PURPOSE = "provide/check information"
+ " about a specific bucket"; + " about a specific bucket";
@ -370,8 +369,7 @@ public static class BucketInfo extends S3GuardTool {
private static final String USAGE = NAME + " [OPTIONS] s3a://BUCKET\n" private static final String USAGE = NAME + " [OPTIONS] s3a://BUCKET\n"
+ "\t" + PURPOSE + "\n\n" + "\t" + PURPOSE + "\n\n"
+ "Common options:\n" + "Common options:\n"
+ " -" + AUTH_FLAG + " - Require the S3Guard mode to be \"authoritative\"\n" + " -" + FIPS_FLAG + " - Require the client is using a FIPS endpoint\n"
+ " -" + NONAUTH_FLAG + " - Require the S3Guard mode to be \"non-authoritative\"\n"
+ " -" + MAGIC_FLAG + + " -" + MAGIC_FLAG +
" - Require the S3 filesystem to be support the \"magic\" committer\n" " - Require the S3 filesystem to be support the \"magic\" committer\n"
+ " -" + ENCRYPTION_FLAG + " -" + ENCRYPTION_FLAG
@ -395,7 +393,7 @@ public static class BucketInfo extends S3GuardTool {
+ " directory markers are not deleted"; + " directory markers are not deleted";
public BucketInfo(Configuration conf) { public BucketInfo(Configuration conf) {
super(conf, GUARDED_FLAG, UNGUARDED_FLAG, AUTH_FLAG, NONAUTH_FLAG, MAGIC_FLAG); super(conf, GUARDED_FLAG, UNGUARDED_FLAG, FIPS_FLAG, MAGIC_FLAG);
CommandFormat format = getCommandFormat(); CommandFormat format = getCommandFormat();
format.addOptionWithValue(ENCRYPTION_FLAG); format.addOptionWithValue(ENCRYPTION_FLAG);
format.addOptionWithValue(MARKERS_FLAG); format.addOptionWithValue(MARKERS_FLAG);
@ -462,6 +460,10 @@ public int run(String[] args, PrintStream out)
println(out, "\tEndpoint: %s=%s", println(out, "\tEndpoint: %s=%s",
ENDPOINT, ENDPOINT,
StringUtils.isNotEmpty(endpoint) ? endpoint : "(unset)"); StringUtils.isNotEmpty(endpoint) ? endpoint : "(unset)");
String region = conf.getTrimmed(AWS_REGION, "");
println(out, "\tRegion: %s=%s", AWS_REGION,
StringUtils.isNotEmpty(region) ? region : "(unset)");
String encryption = String encryption =
printOption(out, "\tEncryption", Constants.S3_ENCRYPTION_ALGORITHM, printOption(out, "\tEncryption", Constants.S3_ENCRYPTION_ALGORITHM,
"none"); "none");
@ -487,12 +489,12 @@ public int run(String[] args, PrintStream out)
FS_S3A_COMMITTER_NAME, COMMITTER_NAME_FILE); FS_S3A_COMMITTER_NAME, COMMITTER_NAME_FILE);
switch (committer) { switch (committer) {
case COMMITTER_NAME_FILE: case COMMITTER_NAME_FILE:
println(out, "The original 'file' commmitter is active" println(out, "The original 'file' committer is active"
+ " -this is slow and potentially unsafe"); + " -this is slow and potentially unsafe");
break; break;
case InternalCommitterConstants.COMMITTER_NAME_STAGING: case InternalCommitterConstants.COMMITTER_NAME_STAGING:
println(out, "The 'staging' committer is used " println(out, "The 'staging' committer is used "
+ "-prefer the 'directory' committer"); + "-prefer the 'magic' committer");
// fall through // fall through
case COMMITTER_NAME_DIRECTORY: case COMMITTER_NAME_DIRECTORY:
// fall through // fall through
@ -555,13 +557,17 @@ public int run(String[] args, PrintStream out)
processMarkerOption(out, fs, processMarkerOption(out, fs,
getCommandFormat().getOptValue(MARKERS_FLAG)); getCommandFormat().getOptValue(MARKERS_FLAG));
// and check for capabilitities // and check for capabilities
println(out, "%nStore Capabilities"); println(out, "%nStore Capabilities");
for (String capability : S3A_DYNAMIC_CAPABILITIES) { for (String capability : S3A_DYNAMIC_CAPABILITIES) {
out.printf("\t%s %s%n", capability, out.printf("\t%s %s%n", capability,
fs.hasPathCapability(root, capability)); fs.hasPathCapability(root, capability));
} }
println(out, ""); println(out, "");
if (commands.getOpt(FIPS_FLAG) && !fs.hasPathCapability(root, FIPS_ENDPOINT)) {
throw badState("FIPS endpoint was required but the filesystem is not using it");
}
// and finally flush the output and report a success. // and finally flush the output and report a success.
out.flush(); out.flush();
return SUCCESS; return SUCCESS;

View File

@ -0,0 +1,477 @@
<!---
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
# Connecting to an Amazon S3 Bucket through the S3A Connector
<!-- MACRO{toc|fromDepth=0|toDepth=2} -->
1. This document covers how to connect to and authenticate with S3 stores, primarily AWS S3.
2. There have been changes in this mechanism between the V1 and V2 SDK, in particular specifying
the region is now preferred to specifying the regional S3 endpoint.
3. For connecting to third-party stores, please read [Working with Third-party S3 Stores](third_party_stores.html) *after* reading this document.
## <a name="foundational"></a> Foundational Concepts
### <a name="regions"></a> AWS Regions and Availability Zones
AWS provides storage, compute and other services around the world, in *regions*.
Data in S3 is stored *buckets*; each bucket is a single region.
There are some "special" regions: China, AWS GovCloud.
It is *believed* that the S3A connector works in these places, at least to the extent that nobody has complained about it not working.
### <a name="endpoints"></a> Endpoints
The S3A connector connects to Amazon S3 storage over HTTPS connections, either directly or through an HTTP proxy.
HTTP HEAD and GET, PUT, POST and DELETE requests are invoked to perform different read/write operations against the store.
There are multiple ways to connect to an S3 bucket
* To an [S3 Endpoint](https://docs.aws.amazon.com/general/latest/gr/s3.html); an HTTPS server hosted by amazon or a third party.
* To a FIPS-compliant S3 Endpoint.
* To an AWS S3 [Access Point](https://docs.aws.amazon.com/AmazonS3/latest/userguide/access-points.html).
* Through a VPC connection, [AWS PrivateLink for Amazon S3](https://docs.aws.amazon.com/AmazonS3/latest/userguide/privatelink-interface-endpoints.html).
* AWS [Outposts](https://aws.amazon.com/outposts/).
The S3A connector supports all these; S3 Endpoints are the primary mechanism used -either explicitly declared or automatically determined from the declared region of the bucket.
Not supported:
* AWS [Snowball](https://aws.amazon.com/snowball/).
As of December 2023, AWS S3 uses Transport Layer Security (TLS) [version 1.2](https://aws.amazon.com/blogs/security/tls-1-2-required-for-aws-endpoints/) to secure the communications channel; the S3A client is does this through
the Apache [HttpClient library](https://hc.apache.org/index.html).
### <a name="third-party"></a> Third party stores
Third-party stores implementing the S3 API are also supported.
These often only implement a subset of the S3 API; not all features are available.
If TLS authentication is used, then the HTTPS certificates for the private stores
_MUST_ be installed on the JVMs on hosts within the Hadoop cluster.
See [Working with Third-party S3 Stores](third_party_stores.html) *after* reading this document.
## <a name="settings"></a> Connection Settings
There are three core settings to connect to an S3 store, endpoint, region and whether or not to use path style access.
```xml
<property>
<name>fs.s3a.endpoint</name>
<description>AWS S3 endpoint to connect to. An up-to-date list is
provided in the AWS Documentation: regions and endpoints. Without this
property, the standard region (s3.amazonaws.com) is assumed.
</description>
</property>
<property>
<name>fs.s3a.endpoint.region</name>
<value>REGION</value>
<description>AWS Region of the data</description>
</property>
<property>
<name>fs.s3a.path.style.access</name>
<value>false</value>
<description>Enable S3 path style access by disabling the default virtual hosting behaviour.
Needed for AWS PrivateLink, S3 AccessPoints, and, generally, third party stores.
Default: false.
</description>
</property>
```
Historically the S3A connector has preferred the endpoint as defined by the option `fs.s3a.endpoint`.
With the move to the AWS V2 SDK, there is more emphasis on the region, set by the `fs.s3a.endpoint.region` option.
Normally, declaring the region in `fs.s3a.endpoint.region` should be sufficient to set up the network connection to correctly connect to an AWS-hosted S3 store.
### <a name="timeouts"></a> Network timeouts
See [Timeouts](performance.html#timeouts).
### <a name="networking"></a> Low-level Network Options
```xml
<property>
<name>fs.s3a.connection.maximum</name>
<value>200</value>
<description>Controls the maximum number of simultaneous connections to S3.
This must be bigger than the value of fs.s3a.threads.max so as to stop
threads being blocked waiting for new HTTPS connections.
</description>
</property>
<property>
<name>fs.s3a.connection.ssl.enabled</name>
<value>true</value>
<description>
Enables or disables SSL connections to AWS services.
</description>
</property>
<property>
<name>fs.s3a.ssl.channel.mode</name>
<value>Default_JSSE</value>
<description>
TLS implementation and cipher options.
Values: OpenSSL, Default, Default_JSSE, Default_JSSE_with_GCM
Default_JSSE is not truly the the default JSSE implementation because
the GCM cipher is disabled when running on Java 8. However, the name
was not changed in order to preserve backwards compatibility. Instead,
new mode called Default_JSSE_with_GCM delegates to the default JSSE
implementation with no changes to the list of enabled ciphers.
OpenSSL requires the wildfly JAR on the classpath and a compatible installation of the openssl binaries.
It is often faster than the JVM libraries, but also trickier to
use.
</description>
</property>
<property>
<name>fs.s3a.socket.send.buffer</name>
<value>8192</value>
<description>
Socket send buffer hint to amazon connector. Represented in bytes.
</description>
</property>
<property>
<name>fs.s3a.socket.recv.buffer</name>
<value>8192</value>
<description>
Socket receive buffer hint to amazon connector. Represented in bytes.
</description>
</property>
```
### <a name="proxies"></a> Proxy Settings
Connections to S3A stores can be made through an HTTP or HTTPS proxy.
```xml
<property>
<name>fs.s3a.proxy.host</name>
<description>
Hostname of the (optional) proxy server for S3 connections.
</description>
</property>
<property>
<name>fs.s3a.proxy.ssl.enabled</name>
<value>false</value>
<description>
Does the proxy use a TLS connection?
</description>
</property>
<property>
<name>fs.s3a.proxy.port</name>
<description>
Proxy server port. If this property is not set
but fs.s3a.proxy.host is, port 80 or 443 is assumed (consistent with
the value of fs.s3a.connection.ssl.enabled).
</description>
</property>
<property>
<name>fs.s3a.proxy.username</name>
<description>Username for authenticating with proxy server.</description>
</property>
<property>
<name>fs.s3a.proxy.password</name>
<description>Password for authenticating with proxy server.</description>
</property>
<property>
<name>fs.s3a.proxy.domain</name>
<description>Domain for authenticating with proxy server.</description>
</property>
<property>
<name>fs.s3a.proxy.workstation</name>
<description>Workstation for authenticating with proxy server.</description>
</property>
```
Sometimes the proxy can be source of problems, especially if HTTP connections are kept
in the connection pool for some time.
Experiment with the values of `fs.s3a.connection.ttl` and `fs.s3a.connection.request.timeout`
if long-lived connections have problems.
## <a name="per_bucket_endpoints"></a>Using Per-Bucket Configuration to access data round the world
S3 Buckets are hosted in different "regions", the default being "US-East-1".
The S3A client talks to this region by default, issuing HTTP requests
to the server `s3.amazonaws.com`.
S3A can work with buckets from any region. Each region has its own
S3 endpoint, documented [by Amazon](http://docs.aws.amazon.com/general/latest/gr/rande.html#s3_region).
1. Applications running in EC2 infrastructure do not pay for IO to/from
*local S3 buckets*. They will be billed for access to remote buckets. Always
use local buckets and local copies of data, wherever possible.
2. With the V4 signing protocol, AWS requires the explicit region endpoint
to be used —hence S3A must be configured to use the specific endpoint. This
is done in the configuration option `fs.s3a.endpoint`.
3. All endpoints other than the default endpoint only support interaction
with buckets local to that S3 instance.
4. Standard S3 buckets support "cross-region" access where use of the original `us-east-1`
endpoint allows access to the data, but newer storage types, particularly S3 Express are
not supported.
If the wrong endpoint is used, the request will fail. This may be reported as a 301/redirect error,
or as a 400 Bad Request: take these as cues to check the endpoint setting of
a bucket.
The up to date list of regions is [Available online](https://docs.aws.amazon.com/general/latest/gr/s3.html).
This list can be used to specify the endpoint of individual buckets, for example
for buckets in the central and EU/Ireland endpoints.
```xml
<property>
<name>fs.s3a.bucket.landsat-pds.endpoint</name>
<value>s3-us-west-2.amazonaws.com</value>
</property>
<property>
<name>fs.s3a.bucket.eu-dataset.endpoint</name>
<value>s3.eu-west-1.amazonaws.com</value>
</property>
```
Declaring the region for the data is simpler, as it avoid having to look up the full URL and having to worry about historical quirks of regional endpoint hostnames.
```xml
<property>
<name>fs.s3a.bucket.landsat-pds.endpoint.region</name>
<value>us-west-2</value>
<description>The endpoint for s3a://landsat-pds URLs</description>
</property>
<property>
<name>fs.s3a.bucket.eu-dataset.endpoint.region</name>
<value>eu-west-1</value>
</property>
```
## <a name="privatelink"></a> AWS PrivateLink
[AWS PrivateLink for Amazon S3](https://docs.aws.amazon.com/AmazonS3/latest/userguide/privatelink-interface-endpoints.html) allows for a private connection to a bucket to be defined, with network access rules managing how a bucket can be accessed.
1. Follow the documentation to create the private link
2. retrieve the DNS name from the console, such as `vpce-f264a96c-6d27bfa7c85e.s3.us-west-2.vpce.amazonaws.com`
3. Convert this to an endpoint URL by prefixing "https://bucket."
4. Declare this as the bucket endpoint and switch to path-style access.
5. Declare the region: there is no automated determination of the region from
the `vpce` URL.
```xml
<property>
<name>fs.s3a.bucket.example-usw2.endpoint</name>
<value>https://bucket.vpce-f264a96c-6d27bfa7c85e.s3.us-west-2.vpce.amazonaws.com/</value>
</property>
<property>
<name>fs.s3a.bucket.example-usw2.path.style.access</name>
<value>true</value>
</property>
<property>
<name>fs.s3a.bucket.example-usw2.endpoint.region</name>
<value>us-west-2</value>
</property>
```
## <a name="fips"></a> Federal Information Processing Standards (FIPS) Endpoints
It is possible to use [FIPs-compliant](https://www.nist.gov/itl/fips-general-information) endpoints which
support a restricted subset of TLS algorithms.
Amazon provide a specific set of [FIPS endpoints](https://aws.amazon.com/compliance/fips/)
to use so callers can be confident that the network communication is compliant with the standard:
non-compliant algorithms are unavailable.
The boolean option `fs.s3a.endpoint.fips` (default `false`) switches the S3A connector to using the FIPS endpoint of a region.
```xml
<property>
<name>fs.s3a.endpoint.fips</name>
<value>true</value>
<description>Use the FIPS endpoint</description>
</property>
```
For a single bucket:
```xml
<property>
<name>fs.s3a.bucket.landsat-pds.endpoint.fips</name>
<value>true</value>
<description>Use the FIPS endpoint for the landsat dataset</description>
</property>
```
If this option is `true`, the endpoint option `fs.s3a.endpoint` MUST NOT be set:
```
A custom endpoint cannot be combined with FIPS: https://s3.eu-west-2.amazonaws.com
```
The SDK calculates the FIPS-specific endpoint without any awareness as to whether FIPs is supported by a region. The first attempt to interact with the service will fail
```
java.net.UnknownHostException: software.amazon.awssdk.core.exception.SdkClientException:
Received an UnknownHostException when attempting to interact with a service.
See cause for the exact endpoint that is failing to resolve.
If this is happening on an endpoint that previously worked,
there may be a network connectivity issue or your DNS cache
could be storing endpoints for too long.:
example-london-1.s3-fips.eu-west-2.amazonaws.com
```
*Important* OpenSSL and FIPS endpoints
Linux distributions with an FIPS-compliant SSL library may not be compatible with wildfly.
Always use with the JDK SSL implementation unless you are confident that the library
is compatible, or wish to experiment with the settings outside of production deployments.
```xml
<property>
<name>fs.s3a.ssl.channel.mode</name>
<value>Default_JSSE</value>
</property>
```
## <a name="accesspoints"></a>Configuring S3 AccessPoints usage with S3A
S3A supports [S3 Access Point](https://aws.amazon.com/s3/features/access-points/) usage which
improves VPC integration with S3 and simplifies your data's permission model because different
policies can be applied now on the Access Point level. For more information about why to use and
how to create them make sure to read the official documentation.
Accessing data through an access point, is done by using its ARN, as opposed to just the bucket name.
You can set the Access Point ARN property using the following per bucket configuration property:
```xml
<property>
<name>fs.s3a.bucket.sample-bucket.accesspoint.arn</name>
<value> {ACCESSPOINT_ARN_HERE} </value>
<description>Configure S3a traffic to use this AccessPoint</description>
</property>
```
This configures access to the `sample-bucket` bucket for S3A, to go through the
new Access Point ARN. So, for example `s3a://sample-bucket/key` will now use your
configured ARN when getting data from S3 instead of your bucket.
_the name of the bucket used in the s3a:// URLs is irrelevant; it is not used when connecting with the store_
Example
```xml
<property>
<name>fs.s3a.bucket.example-ap.accesspoint.arn</name>
<value>arn:aws:s3:eu-west-2:152813717728:accesspoint/ap-example-london</value>
<description>AccessPoint bound to bucket name example-ap</description>
</property>
```
The `fs.s3a.accesspoint.required` property can also require all access to S3 to go through Access
Points. This has the advantage of increasing security inside a VPN / VPC as you only allow access
to known sources of data defined through Access Points. In case there is a need to access a bucket
directly (without Access Points) then you can use per bucket overrides to disable this setting on a
bucket by bucket basis i.e. `fs.s3a.bucket.{YOUR-BUCKET}.accesspoint.required`.
```xml
<!-- Require access point only access -->
<property>
<name>fs.s3a.accesspoint.required</name>
<value>true</value>
</property>
<!-- Disable it on a per-bucket basis if needed -->
<property>
<name>fs.s3a.bucket.example-bucket.accesspoint.required</name>
<value>false</value>
</property>
```
Before using Access Points make sure you're not impacted by the following:
- `ListObjectsV1` is not supported, this is also deprecated on AWS S3 for performance reasons;
- The endpoint for S3 requests will automatically change to use
`s3-accesspoint.REGION.amazonaws.{com | com.cn}` depending on the Access Point ARN. While
considering endpoints, if you have any custom signers that use the host endpoint property make
sure to update them if needed;
## <a name="debugging"></a> Debugging network problems
The `storediag` command within the utility [cloudstore](https://github.com/exampleoughran/cloudstore)
JAR is recommended as the way to view and print settings.
If `storediag` doesn't connect to your S3 store, *nothing else will*.
## <a name="common-problems"></a> Common Sources of Connection Problems
Based on the experience of people who field support calls, here are
some of the main connectivity issues which cause problems.
### <a name="inconsistent-config"></a> Inconsistent configuration across a cluster
All hosts in the cluster need to have the configuration secrets;
local environment variables are not enough.
If HTTPS/TLS is used for a private store, the relevant certificates MUST be installed everywhere.
For applications such as distcp, the options need to be passed with the job.
### <a name="public-private-mixup"></a> Confusion between public/private S3 Stores.
If your cluster is configured to use a private store, AWS-hosted buckets are not visible.
If you wish to read access in a private store, you need to change the endpoint.
Private S3 stores generally expect path style access.
### <a name="region-misconfigure"></a> Region and endpoints misconfigured
These usually surface rapidly and with meaningful messages.
Region errors generally surface as
* `UnknownHostException`
* `AWSRedirectException` "Received permanent redirect response to region"
Endpoint configuration problems can be more varied, as they are just HTTPS URLs.
### <a name="wildfly"></a> Wildfly/OpenSSL Brittleness
When it works, it is fast. But it is fussy as to openSSL implementations, TLS protocols and more.
Because it uses the native openssl binaries, operating system updates can trigger regressions.
Disabling it should be the first step to troubleshooting any TLS problems.
### <a name="proxy-misconfiguration"></a> Proxy setup
If there is a proxy, set it up correctly.

View File

@ -33,6 +33,7 @@ full details.
## <a name="documents"></a> Documents ## <a name="documents"></a> Documents
* [Connecting](./connecting.html)
* [Encryption](./encryption.html) * [Encryption](./encryption.html)
* [Performance](./performance.html) * [Performance](./performance.html)
* [The upgrade to AWS Java SDK V2](./aws_sdk_upgrade.html) * [The upgrade to AWS Java SDK V2](./aws_sdk_upgrade.html)
@ -223,6 +224,10 @@ Do not inadvertently share these credentials through means such as:
If you do any of these: change your credentials immediately! If you do any of these: change your credentials immediately!
## Connecting to Amazon S3 or a third-party store
See [Connecting to an Amazon S3 Bucket through the S3A Connector](connecting.md).
## <a name="authenticating"></a> Authenticating with S3 ## <a name="authenticating"></a> Authenticating with S3
Except when interacting with public S3 buckets, the S3A client Except when interacting with public S3 buckets, the S3A client
@ -835,61 +840,15 @@ Here are some the S3A properties for use in production.
</property> </property>
<property> <property>
<name>fs.s3a.connection.ssl.enabled</name> <name>fs.s3a.connection.maximum</name>
<value>true</value> <value>96</value>
<description>Enables or disables SSL connections to AWS services. <description>Controls the maximum number of simultaneous connections to S3.
Also sets the default port to use for the s3a proxy settings, This must be bigger than the value of fs.s3a.threads.max so as to stop
when not explicitly set in fs.s3a.proxy.port.</description> threads being blocked waiting for new HTTPS connections.
</property> Why not equal? The AWS SDK transfer manager also uses these connections.
<property>
<name>fs.s3a.endpoint</name>
<description>AWS S3 endpoint to connect to. An up-to-date list is
provided in the AWS Documentation: regions and endpoints. Without this
property, the standard region (s3.amazonaws.com) is assumed.
</description> </description>
</property> </property>
<property>
<name>fs.s3a.path.style.access</name>
<value>false</value>
<description>Enable S3 path style access ie disabling the default virtual hosting behaviour.
Useful for S3A-compliant storage providers as it removes the need to set up DNS for virtual hosting.
</description>
</property>
<property>
<name>fs.s3a.proxy.host</name>
<description>Hostname of the (optional) proxy server for S3 connections.</description>
</property>
<property>
<name>fs.s3a.proxy.port</name>
<description>Proxy server port. If this property is not set
but fs.s3a.proxy.host is, port 80 or 443 is assumed (consistent with
the value of fs.s3a.connection.ssl.enabled).</description>
</property>
<property>
<name>fs.s3a.proxy.username</name>
<description>Username for authenticating with proxy server.</description>
</property>
<property>
<name>fs.s3a.proxy.password</name>
<description>Password for authenticating with proxy server.</description>
</property>
<property>
<name>fs.s3a.proxy.domain</name>
<description>Domain for authenticating with proxy server.</description>
</property>
<property>
<name>fs.s3a.proxy.workstation</name>
<description>Workstation for authenticating with proxy server.</description>
</property>
<property> <property>
<name>fs.s3a.attempts.maximum</name> <name>fs.s3a.attempts.maximum</name>
<value>5</value> <value>5</value>
@ -1005,14 +964,6 @@ Here are some the S3A properties for use in production.
implementations can still be used</description> implementations can still be used</description>
</property> </property>
<property>
<name>fs.s3a.accesspoint.required</name>
<value>false</value>
<description>Require that all S3 access is made through Access Points and not through
buckets directly. If enabled, use per-bucket overrides to allow bucket access to a specific set
of buckets.</description>
</property>
<property> <property>
<name>fs.s3a.block.size</name> <name>fs.s3a.block.size</name>
<value>32M</value> <value>32M</value>
@ -1218,23 +1169,6 @@ Here are some the S3A properties for use in production.
</description> </description>
</property> </property>
<property>
<name>fs.s3a.connection.request.timeout</name>
<value>0</value>
<description>
Time out on HTTP requests to the AWS service; 0 means no timeout.
Measured in seconds; the usual time suffixes are all supported
Important: this is the maximum duration of any AWS service call,
including upload and copy operations. If non-zero, it must be larger
than the time to upload multi-megabyte blocks to S3 from the client,
and to rename many-GB files. Use with care.
Values that are larger than Integer.MAX_VALUE milliseconds are
converged to Integer.MAX_VALUE milliseconds
</description>
</property>
<property> <property>
<name>fs.s3a.etag.checksum.enabled</name> <name>fs.s3a.etag.checksum.enabled</name>
<value>false</value> <value>false</value>
@ -1699,179 +1633,6 @@ For a site configuration of:
The bucket "nightly" will be encrypted with SSE-KMS using the KMS key The bucket "nightly" will be encrypted with SSE-KMS using the KMS key
`arn:aws:kms:eu-west-2:1528130000000:key/753778e4-2d0f-42e6-b894-6a3ae4ea4e5f` `arn:aws:kms:eu-west-2:1528130000000:key/753778e4-2d0f-42e6-b894-6a3ae4ea4e5f`
### <a name="per_bucket_endpoints"></a>Using Per-Bucket Configuration to access data round the world
S3 Buckets are hosted in different "regions", the default being "US-East".
The S3A client talks to this region by default, issuing HTTP requests
to the server `s3.amazonaws.com`.
S3A can work with buckets from any region. Each region has its own
S3 endpoint, documented [by Amazon](http://docs.aws.amazon.com/general/latest/gr/rande.html#s3_region).
1. Applications running in EC2 infrastructure do not pay for IO to/from
*local S3 buckets*. They will be billed for access to remote buckets. Always
use local buckets and local copies of data, wherever possible.
1. The default S3 endpoint can support data IO with any bucket when the V1 request
signing protocol is used.
1. When the V4 signing protocol is used, AWS requires the explicit region endpoint
to be used —hence S3A must be configured to use the specific endpoint. This
is done in the configuration option `fs.s3a.endpoint`.
1. All endpoints other than the default endpoint only support interaction
with buckets local to that S3 instance.
While it is generally simpler to use the default endpoint, working with
V4-signing-only regions (Frankfurt, Seoul) requires the endpoint to be identified.
Expect better performance from direct connections —traceroute will give you some insight.
If the wrong endpoint is used, the request may fail. This may be reported as a 301/redirect error,
or as a 400 Bad Request: take these as cues to check the endpoint setting of
a bucket.
Here is a list of properties defining all AWS S3 regions, current as of June 2017:
```xml
<!--
This is the default endpoint, which can be used to interact
with any v2 region.
-->
<property>
<name>central.endpoint</name>
<value>s3.amazonaws.com</value>
</property>
<property>
<name>canada.endpoint</name>
<value>s3.ca-central-1.amazonaws.com</value>
</property>
<property>
<name>frankfurt.endpoint</name>
<value>s3.eu-central-1.amazonaws.com</value>
</property>
<property>
<name>ireland.endpoint</name>
<value>s3-eu-west-1.amazonaws.com</value>
</property>
<property>
<name>london.endpoint</name>
<value>s3.eu-west-2.amazonaws.com</value>
</property>
<property>
<name>mumbai.endpoint</name>
<value>s3.ap-south-1.amazonaws.com</value>
</property>
<property>
<name>ohio.endpoint</name>
<value>s3.us-east-2.amazonaws.com</value>
</property>
<property>
<name>oregon.endpoint</name>
<value>s3-us-west-2.amazonaws.com</value>
</property>
<property>
<name>sao-paolo.endpoint</name>
<value>s3-sa-east-1.amazonaws.com</value>
</property>
<property>
<name>seoul.endpoint</name>
<value>s3.ap-northeast-2.amazonaws.com</value>
</property>
<property>
<name>singapore.endpoint</name>
<value>s3-ap-southeast-1.amazonaws.com</value>
</property>
<property>
<name>sydney.endpoint</name>
<value>s3-ap-southeast-2.amazonaws.com</value>
</property>
<property>
<name>tokyo.endpoint</name>
<value>s3-ap-northeast-1.amazonaws.com</value>
</property>
<property>
<name>virginia.endpoint</name>
<value>${central.endpoint}</value>
</property>
```
This list can be used to specify the endpoint of individual buckets, for example
for buckets in the central and EU/Ireland endpoints.
```xml
<property>
<name>fs.s3a.bucket.landsat-pds.endpoint</name>
<value>${central.endpoint}</value>
<description>The endpoint for s3a://landsat-pds URLs</description>
</property>
<property>
<name>fs.s3a.bucket.eu-dataset.endpoint</name>
<value>${ireland.endpoint}</value>
<description>The endpoint for s3a://eu-dataset URLs</description>
</property>
```
Why explicitly declare a bucket bound to the central endpoint? It ensures
that if the default endpoint is changed to a new region, data store in
US-east is still reachable.
## <a name="accesspoints"></a>Configuring S3 AccessPoints usage with S3A
S3a now supports [S3 Access Point](https://aws.amazon.com/s3/features/access-points/) usage which
improves VPC integration with S3 and simplifies your data's permission model because different
policies can be applied now on the Access Point level. For more information about why to use and
how to create them make sure to read the official documentation.
Accessing data through an access point, is done by using its ARN, as opposed to just the bucket name.
You can set the Access Point ARN property using the following per bucket configuration property:
```xml
<property>
<name>fs.s3a.bucket.sample-bucket.accesspoint.arn</name>
<value> {ACCESSPOINT_ARN_HERE} </value>
<description>Configure S3a traffic to use this AccessPoint</description>
</property>
```
This configures access to the `sample-bucket` bucket for S3A, to go through the
new Access Point ARN. So, for example `s3a://sample-bucket/key` will now use your
configured ARN when getting data from S3 instead of your bucket.
The `fs.s3a.accesspoint.required` property can also require all access to S3 to go through Access
Points. This has the advantage of increasing security inside a VPN / VPC as you only allow access
to known sources of data defined through Access Points. In case there is a need to access a bucket
directly (without Access Points) then you can use per bucket overrides to disable this setting on a
bucket by bucket basis i.e. `fs.s3a.bucket.{YOUR-BUCKET}.accesspoint.required`.
```xml
<!-- Require access point only access -->
<property>
<name>fs.s3a.accesspoint.required</name>
<value>true</value>
</property>
<!-- Disable it on a per-bucket basis if needed -->
<property>
<name>fs.s3a.bucket.example-bucket.accesspoint.required</name>
<value>false</value>
</property>
```
Before using Access Points make sure you're not impacted by the following:
- `ListObjectsV1` is not supported, this is also deprecated on AWS S3 for performance reasons;
- The endpoint for S3 requests will automatically change from `s3.amazonaws.com` to use
`s3-accesspoint.REGION.amazonaws.{com | com.cn}` depending on the Access Point ARN. While
considering endpoints, if you have any custom signers that use the host endpoint property make
sure to update them if needed;
## <a name="requester_pays"></a>Requester Pays buckets ## <a name="requester_pays"></a>Requester Pays buckets
S3A supports buckets with S3A supports buckets with

View File

@ -218,6 +218,7 @@ everything uses the same HTTP connection pool.
| `fs.s3a.executor.capacity` | `16` | Maximum threads for any single operation | | `fs.s3a.executor.capacity` | `16` | Maximum threads for any single operation |
| `fs.s3a.max.total.tasks` | `16` | Extra tasks which can be queued excluding prefetching operations | | `fs.s3a.max.total.tasks` | `16` | Extra tasks which can be queued excluding prefetching operations |
### <a name="timeouts"></a> Timeouts.
Network timeout options can be tuned to make the client fail faster *or* retry more. Network timeout options can be tuned to make the client fail faster *or* retry more.
The choice is yours. Generally recovery is better, but sometimes fail-fast is more useful. The choice is yours. Generally recovery is better, but sometimes fail-fast is more useful.

View File

@ -132,20 +132,17 @@ This auditing information can be used to identify opportunities to reduce load.
Prints and optionally checks the status of a bucket. Prints and optionally checks the status of a bucket.
```bash ```bash
hadoop s3guard bucket-info [-guarded] [-unguarded] [-auth] [-nonauth] [-magic] [-encryption ENCRYPTION] [-markers MARKER] s3a://BUCKET hadoop s3guard bucket-info [-fips] [-magic] [-encryption ENCRYPTION] [-markers MARKER] s3a://BUCKET
``` ```
Options Options
| argument | meaning | | argument | meaning |
|-----------|-------------| |----------------------|---------------------------------------------------------------------|
| `-guarded` | Require S3Guard to be enabled. This will now always fail | | `-fips` | Require FIPS endopint to be in use |
| `-unguarded` | Require S3Guard to be disabled. This will now always succeed | | `-magic` | Require the S3 filesystem to be support the "magic" committer |
| `-auth` | Require the S3Guard mode to be "authoritative". This will now always fail | | `-markers` | Directory marker status: `aware`, `keep`, `delete`, `authoritative` |
| `-nonauth` | Require the S3Guard mode to be "non-authoritative". This will now always fail | | `-encryption <type>` | Require a specific encryption algorithm |
| `-magic` | Require the S3 filesystem to be support the "magic" committer |
| `-markers` | Directory marker status: `aware`, `keep`, `delete`, `authoritative` |
| `-encryption <type>` | Require a specific encryption algorithm |
The server side encryption options are not directly related to S3Guard, but The server side encryption options are not directly related to S3Guard, but
it is often convenient to check them at the same time. it is often convenient to check them at the same time.

View File

@ -39,6 +39,7 @@
import static org.apache.hadoop.fs.s3a.Constants.AWS_REGION; import static org.apache.hadoop.fs.s3a.Constants.AWS_REGION;
import static org.apache.hadoop.fs.s3a.Constants.AWS_S3_ACCESSPOINT_REQUIRED; import static org.apache.hadoop.fs.s3a.Constants.AWS_S3_ACCESSPOINT_REQUIRED;
import static org.apache.hadoop.fs.s3a.Constants.ENDPOINT; import static org.apache.hadoop.fs.s3a.Constants.ENDPOINT;
import static org.apache.hadoop.fs.s3a.Constants.FIPS_ENDPOINT;
import static org.apache.hadoop.fs.s3a.Constants.FS_S3A; import static org.apache.hadoop.fs.s3a.Constants.FS_S3A;
import static org.apache.hadoop.fs.s3a.Constants.PATH_STYLE_ACCESS; import static org.apache.hadoop.fs.s3a.Constants.PATH_STYLE_ACCESS;
import static org.apache.hadoop.fs.s3a.Constants.S3A_BUCKET_PROBE; import static org.apache.hadoop.fs.s3a.Constants.S3A_BUCKET_PROBE;
@ -138,6 +139,7 @@ private Configuration createConfigurationWithProbe(final int probe) {
removeBaseAndBucketOverrides(conf, removeBaseAndBucketOverrides(conf,
S3A_BUCKET_PROBE, S3A_BUCKET_PROBE,
ENDPOINT, ENDPOINT,
FIPS_ENDPOINT,
AWS_REGION, AWS_REGION,
PATH_STYLE_ACCESS); PATH_STYLE_ACCESS);
conf.setInt(S3A_BUCKET_PROBE, probe); conf.setInt(S3A_BUCKET_PROBE, probe);

View File

@ -20,7 +20,6 @@
import java.io.IOException; import java.io.IOException;
import java.net.URI; import java.net.URI;
import java.net.URISyntaxException;
import java.net.UnknownHostException; import java.net.UnknownHostException;
import java.nio.file.AccessDeniedException; import java.nio.file.AccessDeniedException;
import java.util.ArrayList; import java.util.ArrayList;
@ -36,16 +35,17 @@
import software.amazon.awssdk.core.interceptor.ExecutionInterceptor; import software.amazon.awssdk.core.interceptor.ExecutionInterceptor;
import software.amazon.awssdk.services.s3.S3Client; import software.amazon.awssdk.services.s3.S3Client;
import software.amazon.awssdk.services.s3.model.HeadBucketRequest; import software.amazon.awssdk.services.s3.model.HeadBucketRequest;
import software.amazon.awssdk.services.s3.model.HeadBucketResponse;
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.s3a.statistics.impl.EmptyS3AStatisticsContext; import org.apache.hadoop.fs.s3a.statistics.impl.EmptyS3AStatisticsContext;
import static org.apache.hadoop.fs.s3a.Constants.AWS_REGION; import static org.apache.hadoop.fs.s3a.Constants.AWS_REGION;
import static org.apache.hadoop.fs.s3a.Constants.CENTRAL_ENDPOINT;
import static org.apache.hadoop.fs.s3a.Constants.PATH_STYLE_ACCESS; import static org.apache.hadoop.fs.s3a.Constants.PATH_STYLE_ACCESS;
import static org.apache.hadoop.fs.s3a.DefaultS3ClientFactory.ERROR_ENDPOINT_WITH_FIPS;
import static org.apache.hadoop.fs.s3a.S3ATestUtils.removeBaseAndBucketOverrides; import static org.apache.hadoop.fs.s3a.S3ATestUtils.removeBaseAndBucketOverrides;
import static org.apache.hadoop.io.IOUtils.closeStream; import static org.apache.hadoop.io.IOUtils.closeStream;
import static org.apache.hadoop.fs.s3a.Constants.CENTRAL_ENDPOINT;
import static org.apache.hadoop.test.LambdaTestUtils.intercept; import static org.apache.hadoop.test.LambdaTestUtils.intercept;
/** /**
@ -82,6 +82,8 @@ public class ITestS3AEndpointRegion extends AbstractS3ATestBase {
private static final String VPC_ENDPOINT = "vpce-1a2b3c4d-5e6f.s3.us-west-2.vpce.amazonaws.com"; private static final String VPC_ENDPOINT = "vpce-1a2b3c4d-5e6f.s3.us-west-2.vpce.amazonaws.com";
public static final String EXCEPTION_THROWN_BY_INTERCEPTOR = "Exception thrown by interceptor";
/** /**
* New FS instance which will be closed in teardown. * New FS instance which will be closed in teardown.
*/ */
@ -134,10 +136,9 @@ public void testEndpointOverride() throws Throwable {
describe("Create a client with a configured endpoint"); describe("Create a client with a configured endpoint");
Configuration conf = getConfiguration(); Configuration conf = getConfiguration();
S3Client client = createS3Client(conf, AWS_ENDPOINT_TEST, null, US_EAST_2); S3Client client = createS3Client(conf, AWS_ENDPOINT_TEST, null, US_EAST_2, false);
intercept(AwsServiceException.class, "Exception thrown by interceptor", () -> client.headBucket( expectInterceptorException(client);
HeadBucketRequest.builder().bucket(getFileSystem().getBucket()).build()));
} }
@Test @Test
@ -145,10 +146,9 @@ public void testCentralEndpoint() throws Throwable {
describe("Create a client with the central endpoint"); describe("Create a client with the central endpoint");
Configuration conf = getConfiguration(); Configuration conf = getConfiguration();
S3Client client = createS3Client(conf, CENTRAL_ENDPOINT, null, US_EAST_1); S3Client client = createS3Client(conf, CENTRAL_ENDPOINT, null, US_EAST_1, false);
intercept(AwsServiceException.class, "Exception thrown by interceptor", () -> client.headBucket( expectInterceptorException(client);
HeadBucketRequest.builder().bucket(getFileSystem().getBucket()).build()));
} }
@Test @Test
@ -156,21 +156,40 @@ public void testWithRegionConfig() throws Throwable {
describe("Create a client with a configured region"); describe("Create a client with a configured region");
Configuration conf = getConfiguration(); Configuration conf = getConfiguration();
S3Client client = createS3Client(conf, null, EU_WEST_2, EU_WEST_2); S3Client client = createS3Client(conf, null, EU_WEST_2, EU_WEST_2, false);
intercept(AwsServiceException.class, "Exception thrown by interceptor", () -> client.headBucket( expectInterceptorException(client);
HeadBucketRequest.builder().bucket(getFileSystem().getBucket()).build()));
} }
@Test
public void testWithFips() throws Throwable {
describe("Create a client with fips enabled");
S3Client client = createS3Client(getConfiguration(),
null, EU_WEST_2, EU_WEST_2, true);
expectInterceptorException(client);
}
/**
* Attempting to create a client with fips enabled and an endpoint specified
* fails during client construction.
*/
@Test
public void testWithFipsAndEndpoint() throws Throwable {
describe("Create a client with fips and an endpoint");
intercept(IllegalArgumentException.class, ERROR_ENDPOINT_WITH_FIPS, () ->
createS3Client(getConfiguration(), CENTRAL_ENDPOINT, null, US_EAST_1, true));
}
@Test
public void testEUWest2Endpoint() throws Throwable { public void testEUWest2Endpoint() throws Throwable {
describe("Create a client with the eu west 2 endpoint"); describe("Create a client with the eu west 2 endpoint");
Configuration conf = getConfiguration(); Configuration conf = getConfiguration();
S3Client client = createS3Client(conf, EU_WEST_2_ENDPOINT, null, EU_WEST_2); S3Client client = createS3Client(conf, EU_WEST_2_ENDPOINT, null, EU_WEST_2, false);
intercept(AwsServiceException.class, "Exception thrown by interceptor", () -> client.headBucket( expectInterceptorException(client);
HeadBucketRequest.builder().bucket(getFileSystem().getBucket()).build()));
} }
@Test @Test
@ -178,10 +197,9 @@ public void testWithRegionAndEndpointConfig() throws Throwable {
describe("Test that when both region and endpoint are configured, region takes precedence"); describe("Test that when both region and endpoint are configured, region takes precedence");
Configuration conf = getConfiguration(); Configuration conf = getConfiguration();
S3Client client = createS3Client(conf, EU_WEST_2_ENDPOINT, US_WEST_2, US_WEST_2); S3Client client = createS3Client(conf, EU_WEST_2_ENDPOINT, US_WEST_2, US_WEST_2, false);
intercept(AwsServiceException.class, "Exception thrown by interceptor", () -> client.headBucket( expectInterceptorException(client);
HeadBucketRequest.builder().bucket(getFileSystem().getBucket()).build()));
} }
@Test @Test
@ -189,21 +207,43 @@ public void testWithChinaEndpoint() throws Throwable {
describe("Test with a china endpoint"); describe("Test with a china endpoint");
Configuration conf = getConfiguration(); Configuration conf = getConfiguration();
S3Client client = createS3Client(conf, CN_ENDPOINT, null, CN_NORTHWEST_1); S3Client client = createS3Client(conf, CN_ENDPOINT, null, CN_NORTHWEST_1, false);
intercept(AwsServiceException.class, "Exception thrown by interceptor", () -> client.headBucket( expectInterceptorException(client);
HeadBucketRequest.builder().bucket(getFileSystem().getBucket()).build())); }
/**
* Expect an exception to be thrown by the interceptor with the message
* {@link #EXCEPTION_THROWN_BY_INTERCEPTOR}.
* @param client client to issue a head request against.
* @return the expected exception.
* @throws Exception any other exception.
*/
private AwsServiceException expectInterceptorException(final S3Client client)
throws Exception {
return intercept(AwsServiceException.class, EXCEPTION_THROWN_BY_INTERCEPTOR,
() -> head(client));
}
/**
* Issue a head request against the bucket.
* @param client client to use
* @return the response.
*/
private HeadBucketResponse head(final S3Client client) {
return client.headBucket(
HeadBucketRequest.builder().bucket(getFileSystem().getBucket()).build());
} }
@Test @Test
public void testWithGovCloudEndpoint() throws Throwable { public void testWithGovCloudEndpoint() throws Throwable {
describe("Test with a gov cloud endpoint"); describe("Test with a gov cloud endpoint; enable fips");
Configuration conf = getConfiguration(); Configuration conf = getConfiguration();
S3Client client = createS3Client(conf, GOV_ENDPOINT, null, US_GOV_EAST_1); S3Client client = createS3Client(conf, GOV_ENDPOINT, null, US_GOV_EAST_1, false);
intercept(AwsServiceException.class, "Exception thrown by interceptor", () -> client.headBucket( expectInterceptorException(client);
HeadBucketRequest.builder().bucket(getFileSystem().getBucket()).build()));
} }
@Test @Test
@ -212,19 +252,20 @@ public void testWithVPCE() throws Throwable {
describe("Test with vpc endpoint"); describe("Test with vpc endpoint");
Configuration conf = getConfiguration(); Configuration conf = getConfiguration();
S3Client client = createS3Client(conf, VPC_ENDPOINT, null, US_WEST_2); S3Client client = createS3Client(conf, VPC_ENDPOINT, null, US_WEST_2, false);
intercept(AwsServiceException.class, "Exception thrown by interceptor", () -> client.headBucket( expectInterceptorException(client);
HeadBucketRequest.builder().bucket(getFileSystem().getBucket()).build()));
} }
class RegionInterceptor implements ExecutionInterceptor { private final class RegionInterceptor implements ExecutionInterceptor {
private String endpoint; private final String endpoint;
private String region; private final String region;
private final boolean isFips;
RegionInterceptor(String endpoint, String region) { RegionInterceptor(String endpoint, String region, final boolean isFips) {
this.endpoint = endpoint; this.endpoint = endpoint;
this.region = region; this.region = region;
this.isFips = isFips;
} }
@Override @Override
@ -249,8 +290,15 @@ public void beforeExecution(Context.BeforeExecution context,
executionAttributes.getAttribute(AwsExecutionAttribute.AWS_REGION).toString()) executionAttributes.getAttribute(AwsExecutionAttribute.AWS_REGION).toString())
.describedAs("Incorrect region set").isEqualTo(region); .describedAs("Incorrect region set").isEqualTo(region);
// verify the fips state matches expectation.
Assertions.assertThat(executionAttributes.getAttribute(
AwsExecutionAttribute.FIPS_ENDPOINT_ENABLED))
.describedAs("Incorrect FIPS flag set in execution attributes")
.isNotNull()
.isEqualTo(isFips);
// We don't actually want to make a request, so exit early. // We don't actually want to make a request, so exit early.
throw AwsServiceException.builder().message("Exception thrown by interceptor").build(); throw AwsServiceException.builder().message(EXCEPTION_THROWN_BY_INTERCEPTOR).build();
} }
} }
@ -261,17 +309,17 @@ public void beforeExecution(Context.BeforeExecution context,
* @param conf configuration to use. * @param conf configuration to use.
* @param endpoint endpoint. * @param endpoint endpoint.
* @param expectedRegion the region that should be set in the client. * @param expectedRegion the region that should be set in the client.
* @param isFips is this a FIPS endpoint?
* @return the client. * @return the client.
* @throws URISyntaxException parse problems.
* @throws IOException IO problems * @throws IOException IO problems
*/ */
@SuppressWarnings("deprecation") @SuppressWarnings("deprecation")
private S3Client createS3Client(Configuration conf, private S3Client createS3Client(Configuration conf,
String endpoint, String configuredRegion, String expectedRegion) String endpoint, String configuredRegion, String expectedRegion, boolean isFips)
throws IOException { throws IOException {
List<ExecutionInterceptor> interceptors = new ArrayList<>(); List<ExecutionInterceptor> interceptors = new ArrayList<>();
interceptors.add(new RegionInterceptor(endpoint, expectedRegion)); interceptors.add(new RegionInterceptor(endpoint, expectedRegion, isFips));
DefaultS3ClientFactory factory DefaultS3ClientFactory factory
= new DefaultS3ClientFactory(); = new DefaultS3ClientFactory();
@ -283,8 +331,8 @@ private S3Client createS3Client(Configuration conf,
.withMetrics(new EmptyS3AStatisticsContext() .withMetrics(new EmptyS3AStatisticsContext()
.newStatisticsFromAwsSdk()) .newStatisticsFromAwsSdk())
.withExecutionInterceptors(interceptors) .withExecutionInterceptors(interceptors)
.withRegion(configuredRegion); .withRegion(configuredRegion)
.withFipsEnabled(isFips);
S3Client client = factory.createS3Client( S3Client client = factory.createS3Client(
getFileSystem().getUri(), getFileSystem().getUri(),

View File

@ -33,6 +33,8 @@
import org.apache.hadoop.test.LambdaTestUtils; import org.apache.hadoop.test.LambdaTestUtils;
import org.apache.hadoop.util.StringUtils; import org.apache.hadoop.util.StringUtils;
import static org.apache.hadoop.fs.contract.ContractTestUtils.skip;
import static org.apache.hadoop.fs.s3a.Constants.FIPS_ENDPOINT;
import static org.apache.hadoop.fs.s3a.Constants.S3_ENCRYPTION_ALGORITHM; import static org.apache.hadoop.fs.s3a.Constants.S3_ENCRYPTION_ALGORITHM;
import static org.apache.hadoop.fs.s3a.MultipartTestUtils.assertNoUploadsAt; import static org.apache.hadoop.fs.s3a.MultipartTestUtils.assertNoUploadsAt;
import static org.apache.hadoop.fs.s3a.MultipartTestUtils.clearAnyUploads; import static org.apache.hadoop.fs.s3a.MultipartTestUtils.clearAnyUploads;
@ -97,6 +99,20 @@ public void testStoreInfo() throws Throwable {
LOG.info("Exec output=\n{}", output); LOG.info("Exec output=\n{}", output);
} }
@Test
public void testStoreInfoFips() throws Throwable {
final S3AFileSystem fs = getFileSystem();
if (!fs.hasPathCapability(new Path("/"), FIPS_ENDPOINT)) {
skip("FIPS not enabled");
}
S3GuardTool.BucketInfo cmd =
toClose(new S3GuardTool.BucketInfo(fs.getConf()));
String output = exec(cmd, cmd.getName(),
"-" + BucketInfo.FIPS_FLAG,
fs.getUri().toString());
LOG.info("Exec output=\n{}", output);
}
private final static String UPLOAD_NAME = "test-upload"; private final static String UPLOAD_NAME = "test-upload";
@Test @Test

View File

@ -56,6 +56,12 @@
<description>Do not add the referrer header to landsat operations</description> <description>Do not add the referrer header to landsat operations</description>
</property> </property>
<property>
<name>fs.s3a.bucket.landsat-pds.endpoint.fips</name>
<value>true</value>
<description>Use the fips endpoint</description>
</property>
<!-- Per-bucket configurations: usgs-landsat --> <!-- Per-bucket configurations: usgs-landsat -->
<property> <property>
<name>fs.s3a.bucket.usgs-landsat.endpoint.region</name> <name>fs.s3a.bucket.usgs-landsat.endpoint.region</name>