From d37885379009d515fcee33056b9b8b1a7392a40b Mon Sep 17 00:00:00 2001 From: Steve Loughran Date: Tue, 16 Jan 2024 14:16:12 +0000 Subject: [PATCH] HADOOP-18975 S3A: Add option fs.s3a.endpoint.fips to use AWS FIPS endpoints (#6277) Adds a new option `fs.s3a.endpoint.fips` to switch the SDK client to use FIPS endpoints, as an alternative to explicitly declaring them. * The option is available as a path capability for probes. * SDK v2 itself doesn't know that some regions don't have FIPS endpoints * SDK only fails with endpoint + fips flag as a retried exception; wit this change the S3A client should fail fast. PR fails fast. * Adds a new "connecting.md" doc; moves existing docs there and restructures. * New Tests in ITestS3AEndpointRegion bucket-info command support: * added to list of path capabilities * added -fips flag and test for explicit probe * also now prints bucket region * and removed some of the obsolete s3guard options * updated docs Contributed by Steve Loughran --- .../org/apache/hadoop/fs/s3a/Constants.java | 9 + .../hadoop/fs/s3a/DefaultS3ClientFactory.java | 22 + .../apache/hadoop/fs/s3a/S3AFileSystem.java | 12 + .../apache/hadoop/fs/s3a/S3ClientFactory.java | 23 + .../hadoop/fs/s3a/impl/InternalConstants.java | 2 + .../hadoop/fs/s3a/s3guard/S3GuardTool.java | 22 +- .../markdown/tools/hadoop-aws/connecting.md | 477 ++++++++++++++++++ .../site/markdown/tools/hadoop-aws/index.md | 261 +--------- .../markdown/tools/hadoop-aws/performance.md | 1 + .../site/markdown/tools/hadoop-aws/s3guard.md | 17 +- .../fs/s3a/ITestS3ABucketExistence.java | 2 + .../hadoop/fs/s3a/ITestS3AEndpointRegion.java | 124 +++-- .../fs/s3a/s3guard/ITestS3GuardTool.java | 16 + .../src/test/resources/core-site.xml | 6 + 14 files changed, 688 insertions(+), 306 deletions(-) create mode 100644 hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/connecting.md diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java index c1c12b5948..e33f762cdf 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java @@ -1335,6 +1335,15 @@ private Constants() { */ public static final String AWS_S3_DEFAULT_REGION = "us-east-2"; + /** + * Is the endpoint a FIPS endpoint? + * Can be queried as a path capability. + * Value {@value}. + */ + public static final String FIPS_ENDPOINT = "fs.s3a.endpoint.fips"; + + public static final boolean ENDPOINT_FIPS_DEFAULT = false; + /** * Require that all S3 access is made through Access Points. */ diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java index 05ac5ef921..0fde93e654 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java @@ -22,6 +22,7 @@ import java.net.URI; import java.net.URISyntaxException; +import org.apache.hadoop.classification.VisibleForTesting; import org.apache.hadoop.fs.s3a.impl.AWSClientConfig; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -54,6 +55,7 @@ import static org.apache.hadoop.fs.s3a.Constants.AWS_REGION; import static org.apache.hadoop.fs.s3a.Constants.AWS_S3_DEFAULT_REGION; import static org.apache.hadoop.fs.s3a.Constants.CENTRAL_ENDPOINT; +import static org.apache.hadoop.fs.s3a.Constants.FIPS_ENDPOINT; import static org.apache.hadoop.fs.s3a.Constants.HTTP_SIGNER_CLASS_NAME; import static org.apache.hadoop.fs.s3a.Constants.HTTP_SIGNER_ENABLED; import static org.apache.hadoop.fs.s3a.Constants.HTTP_SIGNER_ENABLED_DEFAULT; @@ -63,6 +65,7 @@ import static org.apache.hadoop.fs.s3a.auth.SignerFactory.createHttpSigner; import static org.apache.hadoop.fs.s3a.impl.AWSHeaders.REQUESTER_PAYS_HEADER; import static org.apache.hadoop.fs.s3a.impl.InternalConstants.AUTH_SCHEME_AWS_SIGV_4; +import static org.apache.hadoop.util.Preconditions.checkArgument; /** @@ -102,6 +105,13 @@ public class DefaultS3ClientFactory extends Configured /** Exactly once log to inform about ignoring the AWS-SDK Warnings for CSE. */ private static final LogExactlyOnce IGNORE_CSE_WARN = new LogExactlyOnce(LOG); + /** + * Error message when an endpoint is set with FIPS enabled: {@value}. + */ + @VisibleForTesting + public static final String ERROR_ENDPOINT_WITH_FIPS = + "An endpoint cannot set when " + FIPS_ENDPOINT + " is true"; + @Override public S3Client createS3Client( final URI uri, @@ -248,6 +258,7 @@ protected ClientOverrideConfiguration createClientOverrideConfiguration( * @param conf conf configuration object * @param S3 client builder type * @param S3 client type + * @throws IllegalArgumentException if endpoint is set when FIPS is enabled. */ private , ClientT> void configureEndpointAndRegion( BuilderT builder, S3ClientCreationParameters parameters, Configuration conf) { @@ -263,7 +274,18 @@ private , ClientT> void region = Region.of(configuredRegion); } + // FIPs? Log it, then reject any attempt to set an endpoint + final boolean fipsEnabled = parameters.isFipsEnabled(); + if (fipsEnabled) { + LOG.debug("Enabling FIPS mode"); + } + // always setting it guarantees the value is non-null, + // which tests expect. + builder.fipsEnabled(fipsEnabled); + if (endpoint != null) { + checkArgument(!fipsEnabled, + "%s : %s", ERROR_ENDPOINT_WITH_FIPS, endpoint); builder.endpointOverride(endpoint); // No region was configured, try to determine it from the endpoint. if (region == null) { diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java index df7d3f1fb6..1aad1ad2f8 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java @@ -461,6 +461,11 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities, */ private boolean isMultipartCopyEnabled; + /** + * Is FIPS enabled? + */ + private boolean fipsEnabled; + /** * A cache of files that should be deleted when the FileSystem is closed * or the JVM is exited. @@ -614,6 +619,8 @@ public void initialize(URI name, Configuration originalConf) ? conf.getTrimmed(AWS_REGION) : accessPoint.getRegion(); + fipsEnabled = conf.getBoolean(FIPS_ENDPOINT, ENDPOINT_FIPS_DEFAULT); + // is this an S3Express store? s3ExpressStore = isS3ExpressStore(bucket, endpoint); @@ -1046,6 +1053,7 @@ private void bindAWSClient(URI name, boolean dtEnabled) throws IOException { .withMultipartThreshold(multiPartThreshold) .withTransferManagerExecutor(unboundedThreadPool) .withRegion(configuredRegion) + .withFipsEnabled(fipsEnabled) .withExpressCreateSession( conf.getBoolean(S3EXPRESS_CREATE_SESSION, S3EXPRESS_CREATE_SESSION_DEFAULT)); @@ -5521,6 +5529,10 @@ public boolean hasPathCapability(final Path path, final String capability) case OPTIMIZED_COPY_FROM_LOCAL: return optimizedCopyFromLocal; + // probe for a fips endpoint + case FIPS_ENDPOINT: + return fipsEnabled; + default: return super.hasPathCapability(p, cap); } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ClientFactory.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ClientFactory.java index 305bcbb565..404a255528 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ClientFactory.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ClientFactory.java @@ -176,6 +176,11 @@ final class S3ClientCreationParameters { */ private boolean expressCreateSession = S3EXPRESS_CREATE_SESSION_DEFAULT; + /** + * Is FIPS enabled? + */ + private boolean fipsEnabled; + /** * List of execution interceptors to include in the chain * of interceptors in the SDK. @@ -461,5 +466,23 @@ public String toString() { ", expressCreateSession=" + expressCreateSession + '}'; } + + /** + * Get the FIPS flag. + * @return is fips enabled + */ + public boolean isFipsEnabled() { + return fipsEnabled; + } + + /** + * Set builder value. + * @param value new value + * @return the builder + */ + public S3ClientCreationParameters withFipsEnabled(final boolean value) { + fipsEnabled = value; + return this; + } } } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/InternalConstants.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/InternalConstants.java index 1148f6fcd4..8ebf8c013d 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/InternalConstants.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/InternalConstants.java @@ -38,6 +38,7 @@ import static org.apache.hadoop.fs.Options.OpenFileOptions.FS_OPTION_OPENFILE_STANDARD_OPTIONS; import static org.apache.hadoop.fs.s3a.Constants.DIRECTORY_OPERATIONS_PURGE_UPLOADS; import static org.apache.hadoop.fs.s3a.Constants.ENABLE_MULTI_DELETE; +import static org.apache.hadoop.fs.s3a.Constants.FIPS_ENDPOINT; import static org.apache.hadoop.fs.s3a.Constants.FS_S3A_CREATE_PERFORMANCE; import static org.apache.hadoop.fs.s3a.Constants.FS_S3A_CREATE_PERFORMANCE_ENABLED; import static org.apache.hadoop.fs.s3a.Constants.STORE_CAPABILITY_AWS_V2; @@ -272,6 +273,7 @@ private InternalConstants() { FS_CHECKSUMS, FS_MULTIPART_UPLOADER, DIRECTORY_LISTING_INCONSISTENT, + FIPS_ENDPOINT, // s3 specific STORE_CAPABILITY_AWS_V2, diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3GuardTool.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3GuardTool.java index 41251d190c..26b6acda30 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3GuardTool.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3GuardTool.java @@ -357,12 +357,11 @@ public static class BucketInfo extends S3GuardTool { public static final String NAME = BUCKET_INFO; public static final String GUARDED_FLAG = "guarded"; public static final String UNGUARDED_FLAG = "unguarded"; - public static final String AUTH_FLAG = "auth"; - public static final String NONAUTH_FLAG = "nonauth"; public static final String ENCRYPTION_FLAG = "encryption"; public static final String MAGIC_FLAG = "magic"; public static final String MARKERS_FLAG = "markers"; public static final String MARKERS_AWARE = "aware"; + public static final String FIPS_FLAG = "fips"; public static final String PURPOSE = "provide/check information" + " about a specific bucket"; @@ -370,8 +369,7 @@ public static class BucketInfo extends S3GuardTool { private static final String USAGE = NAME + " [OPTIONS] s3a://BUCKET\n" + "\t" + PURPOSE + "\n\n" + "Common options:\n" - + " -" + AUTH_FLAG + " - Require the S3Guard mode to be \"authoritative\"\n" - + " -" + NONAUTH_FLAG + " - Require the S3Guard mode to be \"non-authoritative\"\n" + + " -" + FIPS_FLAG + " - Require the client is using a FIPS endpoint\n" + " -" + MAGIC_FLAG + " - Require the S3 filesystem to be support the \"magic\" committer\n" + " -" + ENCRYPTION_FLAG @@ -395,7 +393,7 @@ public static class BucketInfo extends S3GuardTool { + " directory markers are not deleted"; public BucketInfo(Configuration conf) { - super(conf, GUARDED_FLAG, UNGUARDED_FLAG, AUTH_FLAG, NONAUTH_FLAG, MAGIC_FLAG); + super(conf, GUARDED_FLAG, UNGUARDED_FLAG, FIPS_FLAG, MAGIC_FLAG); CommandFormat format = getCommandFormat(); format.addOptionWithValue(ENCRYPTION_FLAG); format.addOptionWithValue(MARKERS_FLAG); @@ -462,6 +460,10 @@ public int run(String[] args, PrintStream out) println(out, "\tEndpoint: %s=%s", ENDPOINT, StringUtils.isNotEmpty(endpoint) ? endpoint : "(unset)"); + String region = conf.getTrimmed(AWS_REGION, ""); + println(out, "\tRegion: %s=%s", AWS_REGION, + StringUtils.isNotEmpty(region) ? region : "(unset)"); + String encryption = printOption(out, "\tEncryption", Constants.S3_ENCRYPTION_ALGORITHM, "none"); @@ -487,12 +489,12 @@ public int run(String[] args, PrintStream out) FS_S3A_COMMITTER_NAME, COMMITTER_NAME_FILE); switch (committer) { case COMMITTER_NAME_FILE: - println(out, "The original 'file' commmitter is active" + println(out, "The original 'file' committer is active" + " -this is slow and potentially unsafe"); break; case InternalCommitterConstants.COMMITTER_NAME_STAGING: println(out, "The 'staging' committer is used " - + "-prefer the 'directory' committer"); + + "-prefer the 'magic' committer"); // fall through case COMMITTER_NAME_DIRECTORY: // fall through @@ -555,13 +557,17 @@ public int run(String[] args, PrintStream out) processMarkerOption(out, fs, getCommandFormat().getOptValue(MARKERS_FLAG)); - // and check for capabilitities + // and check for capabilities println(out, "%nStore Capabilities"); for (String capability : S3A_DYNAMIC_CAPABILITIES) { out.printf("\t%s %s%n", capability, fs.hasPathCapability(root, capability)); } println(out, ""); + + if (commands.getOpt(FIPS_FLAG) && !fs.hasPathCapability(root, FIPS_ENDPOINT)) { + throw badState("FIPS endpoint was required but the filesystem is not using it"); + } // and finally flush the output and report a success. out.flush(); return SUCCESS; diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/connecting.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/connecting.md new file mode 100644 index 0000000000..600e1e128a --- /dev/null +++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/connecting.md @@ -0,0 +1,477 @@ + + +# Connecting to an Amazon S3 Bucket through the S3A Connector + + + + +1. This document covers how to connect to and authenticate with S3 stores, primarily AWS S3. +2. There have been changes in this mechanism between the V1 and V2 SDK, in particular specifying +the region is now preferred to specifying the regional S3 endpoint. +3. For connecting to third-party stores, please read [Working with Third-party S3 Stores](third_party_stores.html) *after* reading this document. + +## Foundational Concepts + +### AWS Regions and Availability Zones + +AWS provides storage, compute and other services around the world, in *regions*. + +Data in S3 is stored *buckets*; each bucket is a single region. + +There are some "special" regions: China, AWS GovCloud. +It is *believed* that the S3A connector works in these places, at least to the extent that nobody has complained about it not working. + +### Endpoints + +The S3A connector connects to Amazon S3 storage over HTTPS connections, either directly or through an HTTP proxy. +HTTP HEAD and GET, PUT, POST and DELETE requests are invoked to perform different read/write operations against the store. + +There are multiple ways to connect to an S3 bucket + +* To an [S3 Endpoint](https://docs.aws.amazon.com/general/latest/gr/s3.html); an HTTPS server hosted by amazon or a third party. +* To a FIPS-compliant S3 Endpoint. +* To an AWS S3 [Access Point](https://docs.aws.amazon.com/AmazonS3/latest/userguide/access-points.html). +* Through a VPC connection, [AWS PrivateLink for Amazon S3](https://docs.aws.amazon.com/AmazonS3/latest/userguide/privatelink-interface-endpoints.html). +* AWS [Outposts](https://aws.amazon.com/outposts/). + +The S3A connector supports all these; S3 Endpoints are the primary mechanism used -either explicitly declared or automatically determined from the declared region of the bucket. + +Not supported: +* AWS [Snowball](https://aws.amazon.com/snowball/). + +As of December 2023, AWS S3 uses Transport Layer Security (TLS) [version 1.2](https://aws.amazon.com/blogs/security/tls-1-2-required-for-aws-endpoints/) to secure the communications channel; the S3A client is does this through +the Apache [HttpClient library](https://hc.apache.org/index.html). + +### Third party stores + +Third-party stores implementing the S3 API are also supported. +These often only implement a subset of the S3 API; not all features are available. +If TLS authentication is used, then the HTTPS certificates for the private stores +_MUST_ be installed on the JVMs on hosts within the Hadoop cluster. + +See [Working with Third-party S3 Stores](third_party_stores.html) *after* reading this document. + + +## Connection Settings + +There are three core settings to connect to an S3 store, endpoint, region and whether or not to use path style access. + + +```xml + + fs.s3a.endpoint + AWS S3 endpoint to connect to. An up-to-date list is + provided in the AWS Documentation: regions and endpoints. Without this + property, the standard region (s3.amazonaws.com) is assumed. + + + + + fs.s3a.endpoint.region + REGION + AWS Region of the data + + + + fs.s3a.path.style.access + false + Enable S3 path style access by disabling the default virtual hosting behaviour. + Needed for AWS PrivateLink, S3 AccessPoints, and, generally, third party stores. + Default: false. + + +``` + +Historically the S3A connector has preferred the endpoint as defined by the option `fs.s3a.endpoint`. +With the move to the AWS V2 SDK, there is more emphasis on the region, set by the `fs.s3a.endpoint.region` option. + +Normally, declaring the region in `fs.s3a.endpoint.region` should be sufficient to set up the network connection to correctly connect to an AWS-hosted S3 store. + +### Network timeouts + +See [Timeouts](performance.html#timeouts). + +### Low-level Network Options + +```xml + + + fs.s3a.connection.maximum + 200 + Controls the maximum number of simultaneous connections to S3. + This must be bigger than the value of fs.s3a.threads.max so as to stop + threads being blocked waiting for new HTTPS connections. + + + + + fs.s3a.connection.ssl.enabled + true + + Enables or disables SSL connections to AWS services. + + + + + fs.s3a.ssl.channel.mode + Default_JSSE + + TLS implementation and cipher options. + Values: OpenSSL, Default, Default_JSSE, Default_JSSE_with_GCM + + Default_JSSE is not truly the the default JSSE implementation because + the GCM cipher is disabled when running on Java 8. However, the name + was not changed in order to preserve backwards compatibility. Instead, + new mode called Default_JSSE_with_GCM delegates to the default JSSE + implementation with no changes to the list of enabled ciphers. + + OpenSSL requires the wildfly JAR on the classpath and a compatible installation of the openssl binaries. + It is often faster than the JVM libraries, but also trickier to + use. + + + + + fs.s3a.socket.send.buffer + 8192 + + Socket send buffer hint to amazon connector. Represented in bytes. + + + + + fs.s3a.socket.recv.buffer + 8192 + + Socket receive buffer hint to amazon connector. Represented in bytes. + + +``` + +### Proxy Settings + +Connections to S3A stores can be made through an HTTP or HTTPS proxy. + +```xml + + fs.s3a.proxy.host + + Hostname of the (optional) proxy server for S3 connections. + + + + + fs.s3a.proxy.ssl.enabled + false + + Does the proxy use a TLS connection? + + + + + fs.s3a.proxy.port + + Proxy server port. If this property is not set + but fs.s3a.proxy.host is, port 80 or 443 is assumed (consistent with + the value of fs.s3a.connection.ssl.enabled). + + + + + fs.s3a.proxy.username + Username for authenticating with proxy server. + + + + fs.s3a.proxy.password + Password for authenticating with proxy server. + + + + fs.s3a.proxy.domain + Domain for authenticating with proxy server. + + + + fs.s3a.proxy.workstation + Workstation for authenticating with proxy server. + +``` + +Sometimes the proxy can be source of problems, especially if HTTP connections are kept +in the connection pool for some time. +Experiment with the values of `fs.s3a.connection.ttl` and `fs.s3a.connection.request.timeout` +if long-lived connections have problems. + + +## Using Per-Bucket Configuration to access data round the world + +S3 Buckets are hosted in different "regions", the default being "US-East-1". +The S3A client talks to this region by default, issuing HTTP requests +to the server `s3.amazonaws.com`. + +S3A can work with buckets from any region. Each region has its own +S3 endpoint, documented [by Amazon](http://docs.aws.amazon.com/general/latest/gr/rande.html#s3_region). + +1. Applications running in EC2 infrastructure do not pay for IO to/from +*local S3 buckets*. They will be billed for access to remote buckets. Always +use local buckets and local copies of data, wherever possible. +2. With the V4 signing protocol, AWS requires the explicit region endpoint +to be used —hence S3A must be configured to use the specific endpoint. This +is done in the configuration option `fs.s3a.endpoint`. +3. All endpoints other than the default endpoint only support interaction +with buckets local to that S3 instance. +4. Standard S3 buckets support "cross-region" access where use of the original `us-east-1` + endpoint allows access to the data, but newer storage types, particularly S3 Express are + not supported. + + + +If the wrong endpoint is used, the request will fail. This may be reported as a 301/redirect error, +or as a 400 Bad Request: take these as cues to check the endpoint setting of +a bucket. + +The up to date list of regions is [Available online](https://docs.aws.amazon.com/general/latest/gr/s3.html). + +This list can be used to specify the endpoint of individual buckets, for example +for buckets in the central and EU/Ireland endpoints. + +```xml + + fs.s3a.bucket.landsat-pds.endpoint + s3-us-west-2.amazonaws.com + + + + fs.s3a.bucket.eu-dataset.endpoint + s3.eu-west-1.amazonaws.com + +``` + +Declaring the region for the data is simpler, as it avoid having to look up the full URL and having to worry about historical quirks of regional endpoint hostnames. + +```xml + + fs.s3a.bucket.landsat-pds.endpoint.region + us-west-2 + The endpoint for s3a://landsat-pds URLs + + + + fs.s3a.bucket.eu-dataset.endpoint.region + eu-west-1 + +``` + + +## AWS PrivateLink + +[AWS PrivateLink for Amazon S3](https://docs.aws.amazon.com/AmazonS3/latest/userguide/privatelink-interface-endpoints.html) allows for a private connection to a bucket to be defined, with network access rules managing how a bucket can be accessed. + + +1. Follow the documentation to create the private link +2. retrieve the DNS name from the console, such as `vpce-f264a96c-6d27bfa7c85e.s3.us-west-2.vpce.amazonaws.com` +3. Convert this to an endpoint URL by prefixing "https://bucket." +4. Declare this as the bucket endpoint and switch to path-style access. +5. Declare the region: there is no automated determination of the region from + the `vpce` URL. + +```xml + + + fs.s3a.bucket.example-usw2.endpoint + https://bucket.vpce-f264a96c-6d27bfa7c85e.s3.us-west-2.vpce.amazonaws.com/ + + + + fs.s3a.bucket.example-usw2.path.style.access + true + + + + fs.s3a.bucket.example-usw2.endpoint.region + us-west-2 + +``` + +## Federal Information Processing Standards (FIPS) Endpoints + + +It is possible to use [FIPs-compliant](https://www.nist.gov/itl/fips-general-information) endpoints which +support a restricted subset of TLS algorithms. + +Amazon provide a specific set of [FIPS endpoints](https://aws.amazon.com/compliance/fips/) +to use so callers can be confident that the network communication is compliant with the standard: +non-compliant algorithms are unavailable. + +The boolean option `fs.s3a.endpoint.fips` (default `false`) switches the S3A connector to using the FIPS endpoint of a region. + +```xml + + fs.s3a.endpoint.fips + true + Use the FIPS endpoint + +``` + +For a single bucket: +```xml + + fs.s3a.bucket.landsat-pds.endpoint.fips + true + Use the FIPS endpoint for the landsat dataset + +``` + +If this option is `true`, the endpoint option `fs.s3a.endpoint` MUST NOT be set: + +``` +A custom endpoint cannot be combined with FIPS: https://s3.eu-west-2.amazonaws.com +``` + +The SDK calculates the FIPS-specific endpoint without any awareness as to whether FIPs is supported by a region. The first attempt to interact with the service will fail + +``` +java.net.UnknownHostException: software.amazon.awssdk.core.exception.SdkClientException: +Received an UnknownHostException when attempting to interact with a service. + See cause for the exact endpoint that is failing to resolve. + If this is happening on an endpoint that previously worked, + there may be a network connectivity issue or your DNS cache + could be storing endpoints for too long.: + example-london-1.s3-fips.eu-west-2.amazonaws.com + +``` + +*Important* OpenSSL and FIPS endpoints + +Linux distributions with an FIPS-compliant SSL library may not be compatible with wildfly. +Always use with the JDK SSL implementation unless you are confident that the library +is compatible, or wish to experiment with the settings outside of production deployments. + +```xml + + fs.s3a.ssl.channel.mode + Default_JSSE + +``` + +## Configuring S3 AccessPoints usage with S3A + +S3A supports [S3 Access Point](https://aws.amazon.com/s3/features/access-points/) usage which +improves VPC integration with S3 and simplifies your data's permission model because different +policies can be applied now on the Access Point level. For more information about why to use and +how to create them make sure to read the official documentation. + +Accessing data through an access point, is done by using its ARN, as opposed to just the bucket name. +You can set the Access Point ARN property using the following per bucket configuration property: + +```xml + + fs.s3a.bucket.sample-bucket.accesspoint.arn + {ACCESSPOINT_ARN_HERE} + Configure S3a traffic to use this AccessPoint + +``` + +This configures access to the `sample-bucket` bucket for S3A, to go through the +new Access Point ARN. So, for example `s3a://sample-bucket/key` will now use your +configured ARN when getting data from S3 instead of your bucket. + +_the name of the bucket used in the s3a:// URLs is irrelevant; it is not used when connecting with the store_ + +Example + +```xml + + fs.s3a.bucket.example-ap.accesspoint.arn + arn:aws:s3:eu-west-2:152813717728:accesspoint/ap-example-london + AccessPoint bound to bucket name example-ap + +``` + +The `fs.s3a.accesspoint.required` property can also require all access to S3 to go through Access +Points. This has the advantage of increasing security inside a VPN / VPC as you only allow access +to known sources of data defined through Access Points. In case there is a need to access a bucket +directly (without Access Points) then you can use per bucket overrides to disable this setting on a +bucket by bucket basis i.e. `fs.s3a.bucket.{YOUR-BUCKET}.accesspoint.required`. + +```xml + + + fs.s3a.accesspoint.required + true + + + + fs.s3a.bucket.example-bucket.accesspoint.required + false + +``` + +Before using Access Points make sure you're not impacted by the following: +- `ListObjectsV1` is not supported, this is also deprecated on AWS S3 for performance reasons; +- The endpoint for S3 requests will automatically change to use +`s3-accesspoint.REGION.amazonaws.{com | com.cn}` depending on the Access Point ARN. While +considering endpoints, if you have any custom signers that use the host endpoint property make +sure to update them if needed; + +## Debugging network problems + +The `storediag` command within the utility [cloudstore](https://github.com/exampleoughran/cloudstore) +JAR is recommended as the way to view and print settings. + +If `storediag` doesn't connect to your S3 store, *nothing else will*. + +## Common Sources of Connection Problems + +Based on the experience of people who field support calls, here are +some of the main connectivity issues which cause problems. + +### Inconsistent configuration across a cluster + +All hosts in the cluster need to have the configuration secrets; +local environment variables are not enough. + +If HTTPS/TLS is used for a private store, the relevant certificates MUST be installed everywhere. + +For applications such as distcp, the options need to be passed with the job. + +### Confusion between public/private S3 Stores. + +If your cluster is configured to use a private store, AWS-hosted buckets are not visible. +If you wish to read access in a private store, you need to change the endpoint. + +Private S3 stores generally expect path style access. + +### Region and endpoints misconfigured + +These usually surface rapidly and with meaningful messages. + +Region errors generally surface as +* `UnknownHostException` +* `AWSRedirectException` "Received permanent redirect response to region" + +Endpoint configuration problems can be more varied, as they are just HTTPS URLs. + +### Wildfly/OpenSSL Brittleness + +When it works, it is fast. But it is fussy as to openSSL implementations, TLS protocols and more. +Because it uses the native openssl binaries, operating system updates can trigger regressions. + +Disabling it should be the first step to troubleshooting any TLS problems. + +### Proxy setup + +If there is a proxy, set it up correctly. diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md index dcf3be2b08..0f09c7f873 100644 --- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md +++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md @@ -33,6 +33,7 @@ full details. ## Documents +* [Connecting](./connecting.html) * [Encryption](./encryption.html) * [Performance](./performance.html) * [The upgrade to AWS Java SDK V2](./aws_sdk_upgrade.html) @@ -223,6 +224,10 @@ Do not inadvertently share these credentials through means such as: If you do any of these: change your credentials immediately! +## Connecting to Amazon S3 or a third-party store + +See [Connecting to an Amazon S3 Bucket through the S3A Connector](connecting.md). + ## Authenticating with S3 Except when interacting with public S3 buckets, the S3A client @@ -835,61 +840,15 @@ Here are some the S3A properties for use in production. - fs.s3a.connection.ssl.enabled - true - Enables or disables SSL connections to AWS services. - Also sets the default port to use for the s3a proxy settings, - when not explicitly set in fs.s3a.proxy.port. - - - - fs.s3a.endpoint - AWS S3 endpoint to connect to. An up-to-date list is - provided in the AWS Documentation: regions and endpoints. Without this - property, the standard region (s3.amazonaws.com) is assumed. + fs.s3a.connection.maximum + 96 + Controls the maximum number of simultaneous connections to S3. + This must be bigger than the value of fs.s3a.threads.max so as to stop + threads being blocked waiting for new HTTPS connections. + Why not equal? The AWS SDK transfer manager also uses these connections. - - fs.s3a.path.style.access - false - Enable S3 path style access ie disabling the default virtual hosting behaviour. - Useful for S3A-compliant storage providers as it removes the need to set up DNS for virtual hosting. - - - - - fs.s3a.proxy.host - Hostname of the (optional) proxy server for S3 connections. - - - - fs.s3a.proxy.port - Proxy server port. If this property is not set - but fs.s3a.proxy.host is, port 80 or 443 is assumed (consistent with - the value of fs.s3a.connection.ssl.enabled). - - - - fs.s3a.proxy.username - Username for authenticating with proxy server. - - - - fs.s3a.proxy.password - Password for authenticating with proxy server. - - - - fs.s3a.proxy.domain - Domain for authenticating with proxy server. - - - - fs.s3a.proxy.workstation - Workstation for authenticating with proxy server. - - fs.s3a.attempts.maximum 5 @@ -1005,14 +964,6 @@ Here are some the S3A properties for use in production. implementations can still be used - - fs.s3a.accesspoint.required - false - Require that all S3 access is made through Access Points and not through - buckets directly. If enabled, use per-bucket overrides to allow bucket access to a specific set - of buckets. - - fs.s3a.block.size 32M @@ -1218,23 +1169,6 @@ Here are some the S3A properties for use in production. - - fs.s3a.connection.request.timeout - 0 - - Time out on HTTP requests to the AWS service; 0 means no timeout. - Measured in seconds; the usual time suffixes are all supported - - Important: this is the maximum duration of any AWS service call, - including upload and copy operations. If non-zero, it must be larger - than the time to upload multi-megabyte blocks to S3 from the client, - and to rename many-GB files. Use with care. - - Values that are larger than Integer.MAX_VALUE milliseconds are - converged to Integer.MAX_VALUE milliseconds - - - fs.s3a.etag.checksum.enabled false @@ -1699,179 +1633,6 @@ For a site configuration of: The bucket "nightly" will be encrypted with SSE-KMS using the KMS key `arn:aws:kms:eu-west-2:1528130000000:key/753778e4-2d0f-42e6-b894-6a3ae4ea4e5f` -### Using Per-Bucket Configuration to access data round the world - -S3 Buckets are hosted in different "regions", the default being "US-East". -The S3A client talks to this region by default, issuing HTTP requests -to the server `s3.amazonaws.com`. - -S3A can work with buckets from any region. Each region has its own -S3 endpoint, documented [by Amazon](http://docs.aws.amazon.com/general/latest/gr/rande.html#s3_region). - -1. Applications running in EC2 infrastructure do not pay for IO to/from -*local S3 buckets*. They will be billed for access to remote buckets. Always -use local buckets and local copies of data, wherever possible. -1. The default S3 endpoint can support data IO with any bucket when the V1 request -signing protocol is used. -1. When the V4 signing protocol is used, AWS requires the explicit region endpoint -to be used —hence S3A must be configured to use the specific endpoint. This -is done in the configuration option `fs.s3a.endpoint`. -1. All endpoints other than the default endpoint only support interaction -with buckets local to that S3 instance. - -While it is generally simpler to use the default endpoint, working with -V4-signing-only regions (Frankfurt, Seoul) requires the endpoint to be identified. -Expect better performance from direct connections —traceroute will give you some insight. - -If the wrong endpoint is used, the request may fail. This may be reported as a 301/redirect error, -or as a 400 Bad Request: take these as cues to check the endpoint setting of -a bucket. - -Here is a list of properties defining all AWS S3 regions, current as of June 2017: - -```xml - - - central.endpoint - s3.amazonaws.com - - - - canada.endpoint - s3.ca-central-1.amazonaws.com - - - - frankfurt.endpoint - s3.eu-central-1.amazonaws.com - - - - ireland.endpoint - s3-eu-west-1.amazonaws.com - - - - london.endpoint - s3.eu-west-2.amazonaws.com - - - - mumbai.endpoint - s3.ap-south-1.amazonaws.com - - - - ohio.endpoint - s3.us-east-2.amazonaws.com - - - - oregon.endpoint - s3-us-west-2.amazonaws.com - - - - sao-paolo.endpoint - s3-sa-east-1.amazonaws.com - - - - seoul.endpoint - s3.ap-northeast-2.amazonaws.com - - - - singapore.endpoint - s3-ap-southeast-1.amazonaws.com - - - - sydney.endpoint - s3-ap-southeast-2.amazonaws.com - - - - tokyo.endpoint - s3-ap-northeast-1.amazonaws.com - - - - virginia.endpoint - ${central.endpoint} - -``` - -This list can be used to specify the endpoint of individual buckets, for example -for buckets in the central and EU/Ireland endpoints. - -```xml - - fs.s3a.bucket.landsat-pds.endpoint - ${central.endpoint} - The endpoint for s3a://landsat-pds URLs - - - - fs.s3a.bucket.eu-dataset.endpoint - ${ireland.endpoint} - The endpoint for s3a://eu-dataset URLs - -``` - -Why explicitly declare a bucket bound to the central endpoint? It ensures -that if the default endpoint is changed to a new region, data store in -US-east is still reachable. - -## Configuring S3 AccessPoints usage with S3A -S3a now supports [S3 Access Point](https://aws.amazon.com/s3/features/access-points/) usage which -improves VPC integration with S3 and simplifies your data's permission model because different -policies can be applied now on the Access Point level. For more information about why to use and -how to create them make sure to read the official documentation. - -Accessing data through an access point, is done by using its ARN, as opposed to just the bucket name. -You can set the Access Point ARN property using the following per bucket configuration property: -```xml - - fs.s3a.bucket.sample-bucket.accesspoint.arn - {ACCESSPOINT_ARN_HERE} - Configure S3a traffic to use this AccessPoint - -``` - -This configures access to the `sample-bucket` bucket for S3A, to go through the -new Access Point ARN. So, for example `s3a://sample-bucket/key` will now use your -configured ARN when getting data from S3 instead of your bucket. - -The `fs.s3a.accesspoint.required` property can also require all access to S3 to go through Access -Points. This has the advantage of increasing security inside a VPN / VPC as you only allow access -to known sources of data defined through Access Points. In case there is a need to access a bucket -directly (without Access Points) then you can use per bucket overrides to disable this setting on a -bucket by bucket basis i.e. `fs.s3a.bucket.{YOUR-BUCKET}.accesspoint.required`. - -```xml - - - fs.s3a.accesspoint.required - true - - - - fs.s3a.bucket.example-bucket.accesspoint.required - false - -``` - -Before using Access Points make sure you're not impacted by the following: -- `ListObjectsV1` is not supported, this is also deprecated on AWS S3 for performance reasons; -- The endpoint for S3 requests will automatically change from `s3.amazonaws.com` to use -`s3-accesspoint.REGION.amazonaws.{com | com.cn}` depending on the Access Point ARN. While -considering endpoints, if you have any custom signers that use the host endpoint property make -sure to update them if needed; - ## Requester Pays buckets S3A supports buckets with diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/performance.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/performance.md index 37cf472277..4d506b6bfc 100644 --- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/performance.md +++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/performance.md @@ -218,6 +218,7 @@ everything uses the same HTTP connection pool. | `fs.s3a.executor.capacity` | `16` | Maximum threads for any single operation | | `fs.s3a.max.total.tasks` | `16` | Extra tasks which can be queued excluding prefetching operations | +### Timeouts. Network timeout options can be tuned to make the client fail faster *or* retry more. The choice is yours. Generally recovery is better, but sometimes fail-fast is more useful. diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/s3guard.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/s3guard.md index c5e807c964..af4c6a76be 100644 --- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/s3guard.md +++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/s3guard.md @@ -132,20 +132,17 @@ This auditing information can be used to identify opportunities to reduce load. Prints and optionally checks the status of a bucket. ```bash -hadoop s3guard bucket-info [-guarded] [-unguarded] [-auth] [-nonauth] [-magic] [-encryption ENCRYPTION] [-markers MARKER] s3a://BUCKET +hadoop s3guard bucket-info [-fips] [-magic] [-encryption ENCRYPTION] [-markers MARKER] s3a://BUCKET ``` Options -| argument | meaning | -|-----------|-------------| -| `-guarded` | Require S3Guard to be enabled. This will now always fail | -| `-unguarded` | Require S3Guard to be disabled. This will now always succeed | -| `-auth` | Require the S3Guard mode to be "authoritative". This will now always fail | -| `-nonauth` | Require the S3Guard mode to be "non-authoritative". This will now always fail | -| `-magic` | Require the S3 filesystem to be support the "magic" committer | -| `-markers` | Directory marker status: `aware`, `keep`, `delete`, `authoritative` | -| `-encryption ` | Require a specific encryption algorithm | +| argument | meaning | +|----------------------|---------------------------------------------------------------------| +| `-fips` | Require FIPS endopint to be in use | +| `-magic` | Require the S3 filesystem to be support the "magic" committer | +| `-markers` | Directory marker status: `aware`, `keep`, `delete`, `authoritative` | +| `-encryption ` | Require a specific encryption algorithm | The server side encryption options are not directly related to S3Guard, but it is often convenient to check them at the same time. diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ABucketExistence.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ABucketExistence.java index ded2f0b885..ce6d8a7e1e 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ABucketExistence.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ABucketExistence.java @@ -39,6 +39,7 @@ import static org.apache.hadoop.fs.s3a.Constants.AWS_REGION; import static org.apache.hadoop.fs.s3a.Constants.AWS_S3_ACCESSPOINT_REQUIRED; import static org.apache.hadoop.fs.s3a.Constants.ENDPOINT; +import static org.apache.hadoop.fs.s3a.Constants.FIPS_ENDPOINT; import static org.apache.hadoop.fs.s3a.Constants.FS_S3A; import static org.apache.hadoop.fs.s3a.Constants.PATH_STYLE_ACCESS; import static org.apache.hadoop.fs.s3a.Constants.S3A_BUCKET_PROBE; @@ -138,6 +139,7 @@ private Configuration createConfigurationWithProbe(final int probe) { removeBaseAndBucketOverrides(conf, S3A_BUCKET_PROBE, ENDPOINT, + FIPS_ENDPOINT, AWS_REGION, PATH_STYLE_ACCESS); conf.setInt(S3A_BUCKET_PROBE, probe); diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEndpointRegion.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEndpointRegion.java index 5d10590dfe..5e6991128b 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEndpointRegion.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEndpointRegion.java @@ -20,7 +20,6 @@ import java.io.IOException; import java.net.URI; -import java.net.URISyntaxException; import java.net.UnknownHostException; import java.nio.file.AccessDeniedException; import java.util.ArrayList; @@ -36,16 +35,17 @@ import software.amazon.awssdk.core.interceptor.ExecutionInterceptor; import software.amazon.awssdk.services.s3.S3Client; import software.amazon.awssdk.services.s3.model.HeadBucketRequest; +import software.amazon.awssdk.services.s3.model.HeadBucketResponse; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.s3a.statistics.impl.EmptyS3AStatisticsContext; import static org.apache.hadoop.fs.s3a.Constants.AWS_REGION; +import static org.apache.hadoop.fs.s3a.Constants.CENTRAL_ENDPOINT; import static org.apache.hadoop.fs.s3a.Constants.PATH_STYLE_ACCESS; +import static org.apache.hadoop.fs.s3a.DefaultS3ClientFactory.ERROR_ENDPOINT_WITH_FIPS; import static org.apache.hadoop.fs.s3a.S3ATestUtils.removeBaseAndBucketOverrides; import static org.apache.hadoop.io.IOUtils.closeStream; -import static org.apache.hadoop.fs.s3a.Constants.CENTRAL_ENDPOINT; - import static org.apache.hadoop.test.LambdaTestUtils.intercept; /** @@ -82,6 +82,8 @@ public class ITestS3AEndpointRegion extends AbstractS3ATestBase { private static final String VPC_ENDPOINT = "vpce-1a2b3c4d-5e6f.s3.us-west-2.vpce.amazonaws.com"; + public static final String EXCEPTION_THROWN_BY_INTERCEPTOR = "Exception thrown by interceptor"; + /** * New FS instance which will be closed in teardown. */ @@ -134,10 +136,9 @@ public void testEndpointOverride() throws Throwable { describe("Create a client with a configured endpoint"); Configuration conf = getConfiguration(); - S3Client client = createS3Client(conf, AWS_ENDPOINT_TEST, null, US_EAST_2); + S3Client client = createS3Client(conf, AWS_ENDPOINT_TEST, null, US_EAST_2, false); - intercept(AwsServiceException.class, "Exception thrown by interceptor", () -> client.headBucket( - HeadBucketRequest.builder().bucket(getFileSystem().getBucket()).build())); + expectInterceptorException(client); } @Test @@ -145,10 +146,9 @@ public void testCentralEndpoint() throws Throwable { describe("Create a client with the central endpoint"); Configuration conf = getConfiguration(); - S3Client client = createS3Client(conf, CENTRAL_ENDPOINT, null, US_EAST_1); + S3Client client = createS3Client(conf, CENTRAL_ENDPOINT, null, US_EAST_1, false); - intercept(AwsServiceException.class, "Exception thrown by interceptor", () -> client.headBucket( - HeadBucketRequest.builder().bucket(getFileSystem().getBucket()).build())); + expectInterceptorException(client); } @Test @@ -156,21 +156,40 @@ public void testWithRegionConfig() throws Throwable { describe("Create a client with a configured region"); Configuration conf = getConfiguration(); - S3Client client = createS3Client(conf, null, EU_WEST_2, EU_WEST_2); + S3Client client = createS3Client(conf, null, EU_WEST_2, EU_WEST_2, false); - intercept(AwsServiceException.class, "Exception thrown by interceptor", () -> client.headBucket( - HeadBucketRequest.builder().bucket(getFileSystem().getBucket()).build())); + expectInterceptorException(client); } + @Test + public void testWithFips() throws Throwable { + describe("Create a client with fips enabled"); + S3Client client = createS3Client(getConfiguration(), + null, EU_WEST_2, EU_WEST_2, true); + expectInterceptorException(client); + } + + /** + * Attempting to create a client with fips enabled and an endpoint specified + * fails during client construction. + */ + @Test + public void testWithFipsAndEndpoint() throws Throwable { + describe("Create a client with fips and an endpoint"); + + intercept(IllegalArgumentException.class, ERROR_ENDPOINT_WITH_FIPS, () -> + createS3Client(getConfiguration(), CENTRAL_ENDPOINT, null, US_EAST_1, true)); + } + + @Test public void testEUWest2Endpoint() throws Throwable { describe("Create a client with the eu west 2 endpoint"); Configuration conf = getConfiguration(); - S3Client client = createS3Client(conf, EU_WEST_2_ENDPOINT, null, EU_WEST_2); + S3Client client = createS3Client(conf, EU_WEST_2_ENDPOINT, null, EU_WEST_2, false); - intercept(AwsServiceException.class, "Exception thrown by interceptor", () -> client.headBucket( - HeadBucketRequest.builder().bucket(getFileSystem().getBucket()).build())); + expectInterceptorException(client); } @Test @@ -178,10 +197,9 @@ public void testWithRegionAndEndpointConfig() throws Throwable { describe("Test that when both region and endpoint are configured, region takes precedence"); Configuration conf = getConfiguration(); - S3Client client = createS3Client(conf, EU_WEST_2_ENDPOINT, US_WEST_2, US_WEST_2); + S3Client client = createS3Client(conf, EU_WEST_2_ENDPOINT, US_WEST_2, US_WEST_2, false); - intercept(AwsServiceException.class, "Exception thrown by interceptor", () -> client.headBucket( - HeadBucketRequest.builder().bucket(getFileSystem().getBucket()).build())); + expectInterceptorException(client); } @Test @@ -189,21 +207,43 @@ public void testWithChinaEndpoint() throws Throwable { describe("Test with a china endpoint"); Configuration conf = getConfiguration(); - S3Client client = createS3Client(conf, CN_ENDPOINT, null, CN_NORTHWEST_1); + S3Client client = createS3Client(conf, CN_ENDPOINT, null, CN_NORTHWEST_1, false); - intercept(AwsServiceException.class, "Exception thrown by interceptor", () -> client.headBucket( - HeadBucketRequest.builder().bucket(getFileSystem().getBucket()).build())); + expectInterceptorException(client); + } + + /** + * Expect an exception to be thrown by the interceptor with the message + * {@link #EXCEPTION_THROWN_BY_INTERCEPTOR}. + * @param client client to issue a head request against. + * @return the expected exception. + * @throws Exception any other exception. + */ + private AwsServiceException expectInterceptorException(final S3Client client) + throws Exception { + + return intercept(AwsServiceException.class, EXCEPTION_THROWN_BY_INTERCEPTOR, + () -> head(client)); + } + + /** + * Issue a head request against the bucket. + * @param client client to use + * @return the response. + */ + private HeadBucketResponse head(final S3Client client) { + return client.headBucket( + HeadBucketRequest.builder().bucket(getFileSystem().getBucket()).build()); } @Test public void testWithGovCloudEndpoint() throws Throwable { - describe("Test with a gov cloud endpoint"); + describe("Test with a gov cloud endpoint; enable fips"); Configuration conf = getConfiguration(); - S3Client client = createS3Client(conf, GOV_ENDPOINT, null, US_GOV_EAST_1); + S3Client client = createS3Client(conf, GOV_ENDPOINT, null, US_GOV_EAST_1, false); - intercept(AwsServiceException.class, "Exception thrown by interceptor", () -> client.headBucket( - HeadBucketRequest.builder().bucket(getFileSystem().getBucket()).build())); + expectInterceptorException(client); } @Test @@ -212,19 +252,20 @@ public void testWithVPCE() throws Throwable { describe("Test with vpc endpoint"); Configuration conf = getConfiguration(); - S3Client client = createS3Client(conf, VPC_ENDPOINT, null, US_WEST_2); + S3Client client = createS3Client(conf, VPC_ENDPOINT, null, US_WEST_2, false); - intercept(AwsServiceException.class, "Exception thrown by interceptor", () -> client.headBucket( - HeadBucketRequest.builder().bucket(getFileSystem().getBucket()).build())); + expectInterceptorException(client); } - class RegionInterceptor implements ExecutionInterceptor { - private String endpoint; - private String region; + private final class RegionInterceptor implements ExecutionInterceptor { + private final String endpoint; + private final String region; + private final boolean isFips; - RegionInterceptor(String endpoint, String region) { + RegionInterceptor(String endpoint, String region, final boolean isFips) { this.endpoint = endpoint; this.region = region; + this.isFips = isFips; } @Override @@ -249,8 +290,15 @@ public void beforeExecution(Context.BeforeExecution context, executionAttributes.getAttribute(AwsExecutionAttribute.AWS_REGION).toString()) .describedAs("Incorrect region set").isEqualTo(region); + // verify the fips state matches expectation. + Assertions.assertThat(executionAttributes.getAttribute( + AwsExecutionAttribute.FIPS_ENDPOINT_ENABLED)) + .describedAs("Incorrect FIPS flag set in execution attributes") + .isNotNull() + .isEqualTo(isFips); + // We don't actually want to make a request, so exit early. - throw AwsServiceException.builder().message("Exception thrown by interceptor").build(); + throw AwsServiceException.builder().message(EXCEPTION_THROWN_BY_INTERCEPTOR).build(); } } @@ -261,17 +309,17 @@ public void beforeExecution(Context.BeforeExecution context, * @param conf configuration to use. * @param endpoint endpoint. * @param expectedRegion the region that should be set in the client. + * @param isFips is this a FIPS endpoint? * @return the client. - * @throws URISyntaxException parse problems. * @throws IOException IO problems */ @SuppressWarnings("deprecation") private S3Client createS3Client(Configuration conf, - String endpoint, String configuredRegion, String expectedRegion) + String endpoint, String configuredRegion, String expectedRegion, boolean isFips) throws IOException { List interceptors = new ArrayList<>(); - interceptors.add(new RegionInterceptor(endpoint, expectedRegion)); + interceptors.add(new RegionInterceptor(endpoint, expectedRegion, isFips)); DefaultS3ClientFactory factory = new DefaultS3ClientFactory(); @@ -283,8 +331,8 @@ private S3Client createS3Client(Configuration conf, .withMetrics(new EmptyS3AStatisticsContext() .newStatisticsFromAwsSdk()) .withExecutionInterceptors(interceptors) - .withRegion(configuredRegion); - + .withRegion(configuredRegion) + .withFipsEnabled(isFips); S3Client client = factory.createS3Client( getFileSystem().getUri(), diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/ITestS3GuardTool.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/ITestS3GuardTool.java index 28bc2a246a..08696ae62d 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/ITestS3GuardTool.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/ITestS3GuardTool.java @@ -33,6 +33,8 @@ import org.apache.hadoop.test.LambdaTestUtils; import org.apache.hadoop.util.StringUtils; +import static org.apache.hadoop.fs.contract.ContractTestUtils.skip; +import static org.apache.hadoop.fs.s3a.Constants.FIPS_ENDPOINT; import static org.apache.hadoop.fs.s3a.Constants.S3_ENCRYPTION_ALGORITHM; import static org.apache.hadoop.fs.s3a.MultipartTestUtils.assertNoUploadsAt; import static org.apache.hadoop.fs.s3a.MultipartTestUtils.clearAnyUploads; @@ -97,6 +99,20 @@ public void testStoreInfo() throws Throwable { LOG.info("Exec output=\n{}", output); } + @Test + public void testStoreInfoFips() throws Throwable { + final S3AFileSystem fs = getFileSystem(); + if (!fs.hasPathCapability(new Path("/"), FIPS_ENDPOINT)) { + skip("FIPS not enabled"); + } + S3GuardTool.BucketInfo cmd = + toClose(new S3GuardTool.BucketInfo(fs.getConf())); + String output = exec(cmd, cmd.getName(), + "-" + BucketInfo.FIPS_FLAG, + fs.getUri().toString()); + LOG.info("Exec output=\n{}", output); + } + private final static String UPLOAD_NAME = "test-upload"; @Test diff --git a/hadoop-tools/hadoop-aws/src/test/resources/core-site.xml b/hadoop-tools/hadoop-aws/src/test/resources/core-site.xml index f871369ed5..c99d7d4313 100644 --- a/hadoop-tools/hadoop-aws/src/test/resources/core-site.xml +++ b/hadoop-tools/hadoop-aws/src/test/resources/core-site.xml @@ -56,6 +56,12 @@ Do not add the referrer header to landsat operations + + fs.s3a.bucket.landsat-pds.endpoint.fips + true + Use the fips endpoint + + fs.s3a.bucket.usgs-landsat.endpoint.region