HADOOP-18975 S3A: Add option fs.s3a.endpoint.fips to use AWS FIPS endpoints (#6277)
Adds a new option `fs.s3a.endpoint.fips` to switch the SDK client to use FIPS endpoints, as an alternative to explicitly declaring them. * The option is available as a path capability for probes. * SDK v2 itself doesn't know that some regions don't have FIPS endpoints * SDK only fails with endpoint + fips flag as a retried exception; wit this change the S3A client should fail fast. PR fails fast. * Adds a new "connecting.md" doc; moves existing docs there and restructures. * New Tests in ITestS3AEndpointRegion bucket-info command support: * added to list of path capabilities * added -fips flag and test for explicit probe * also now prints bucket region * and removed some of the obsolete s3guard options * updated docs Contributed by Steve Loughran
This commit is contained in:
parent
36198b5edf
commit
d378853790
@ -1335,6 +1335,15 @@ private Constants() {
|
||||
*/
|
||||
public static final String AWS_S3_DEFAULT_REGION = "us-east-2";
|
||||
|
||||
/**
|
||||
* Is the endpoint a FIPS endpoint?
|
||||
* Can be queried as a path capability.
|
||||
* Value {@value}.
|
||||
*/
|
||||
public static final String FIPS_ENDPOINT = "fs.s3a.endpoint.fips";
|
||||
|
||||
public static final boolean ENDPOINT_FIPS_DEFAULT = false;
|
||||
|
||||
/**
|
||||
* Require that all S3 access is made through Access Points.
|
||||
*/
|
||||
|
@ -22,6 +22,7 @@
|
||||
import java.net.URI;
|
||||
import java.net.URISyntaxException;
|
||||
|
||||
import org.apache.hadoop.classification.VisibleForTesting;
|
||||
import org.apache.hadoop.fs.s3a.impl.AWSClientConfig;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
@ -54,6 +55,7 @@
|
||||
import static org.apache.hadoop.fs.s3a.Constants.AWS_REGION;
|
||||
import static org.apache.hadoop.fs.s3a.Constants.AWS_S3_DEFAULT_REGION;
|
||||
import static org.apache.hadoop.fs.s3a.Constants.CENTRAL_ENDPOINT;
|
||||
import static org.apache.hadoop.fs.s3a.Constants.FIPS_ENDPOINT;
|
||||
import static org.apache.hadoop.fs.s3a.Constants.HTTP_SIGNER_CLASS_NAME;
|
||||
import static org.apache.hadoop.fs.s3a.Constants.HTTP_SIGNER_ENABLED;
|
||||
import static org.apache.hadoop.fs.s3a.Constants.HTTP_SIGNER_ENABLED_DEFAULT;
|
||||
@ -63,6 +65,7 @@
|
||||
import static org.apache.hadoop.fs.s3a.auth.SignerFactory.createHttpSigner;
|
||||
import static org.apache.hadoop.fs.s3a.impl.AWSHeaders.REQUESTER_PAYS_HEADER;
|
||||
import static org.apache.hadoop.fs.s3a.impl.InternalConstants.AUTH_SCHEME_AWS_SIGV_4;
|
||||
import static org.apache.hadoop.util.Preconditions.checkArgument;
|
||||
|
||||
|
||||
/**
|
||||
@ -102,6 +105,13 @@ public class DefaultS3ClientFactory extends Configured
|
||||
/** Exactly once log to inform about ignoring the AWS-SDK Warnings for CSE. */
|
||||
private static final LogExactlyOnce IGNORE_CSE_WARN = new LogExactlyOnce(LOG);
|
||||
|
||||
/**
|
||||
* Error message when an endpoint is set with FIPS enabled: {@value}.
|
||||
*/
|
||||
@VisibleForTesting
|
||||
public static final String ERROR_ENDPOINT_WITH_FIPS =
|
||||
"An endpoint cannot set when " + FIPS_ENDPOINT + " is true";
|
||||
|
||||
@Override
|
||||
public S3Client createS3Client(
|
||||
final URI uri,
|
||||
@ -248,6 +258,7 @@ protected ClientOverrideConfiguration createClientOverrideConfiguration(
|
||||
* @param conf conf configuration object
|
||||
* @param <BuilderT> S3 client builder type
|
||||
* @param <ClientT> S3 client type
|
||||
* @throws IllegalArgumentException if endpoint is set when FIPS is enabled.
|
||||
*/
|
||||
private <BuilderT extends S3BaseClientBuilder<BuilderT, ClientT>, ClientT> void configureEndpointAndRegion(
|
||||
BuilderT builder, S3ClientCreationParameters parameters, Configuration conf) {
|
||||
@ -263,7 +274,18 @@ private <BuilderT extends S3BaseClientBuilder<BuilderT, ClientT>, ClientT> void
|
||||
region = Region.of(configuredRegion);
|
||||
}
|
||||
|
||||
// FIPs? Log it, then reject any attempt to set an endpoint
|
||||
final boolean fipsEnabled = parameters.isFipsEnabled();
|
||||
if (fipsEnabled) {
|
||||
LOG.debug("Enabling FIPS mode");
|
||||
}
|
||||
// always setting it guarantees the value is non-null,
|
||||
// which tests expect.
|
||||
builder.fipsEnabled(fipsEnabled);
|
||||
|
||||
if (endpoint != null) {
|
||||
checkArgument(!fipsEnabled,
|
||||
"%s : %s", ERROR_ENDPOINT_WITH_FIPS, endpoint);
|
||||
builder.endpointOverride(endpoint);
|
||||
// No region was configured, try to determine it from the endpoint.
|
||||
if (region == null) {
|
||||
|
@ -461,6 +461,11 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities,
|
||||
*/
|
||||
private boolean isMultipartCopyEnabled;
|
||||
|
||||
/**
|
||||
* Is FIPS enabled?
|
||||
*/
|
||||
private boolean fipsEnabled;
|
||||
|
||||
/**
|
||||
* A cache of files that should be deleted when the FileSystem is closed
|
||||
* or the JVM is exited.
|
||||
@ -614,6 +619,8 @@ public void initialize(URI name, Configuration originalConf)
|
||||
? conf.getTrimmed(AWS_REGION)
|
||||
: accessPoint.getRegion();
|
||||
|
||||
fipsEnabled = conf.getBoolean(FIPS_ENDPOINT, ENDPOINT_FIPS_DEFAULT);
|
||||
|
||||
// is this an S3Express store?
|
||||
s3ExpressStore = isS3ExpressStore(bucket, endpoint);
|
||||
|
||||
@ -1046,6 +1053,7 @@ private void bindAWSClient(URI name, boolean dtEnabled) throws IOException {
|
||||
.withMultipartThreshold(multiPartThreshold)
|
||||
.withTransferManagerExecutor(unboundedThreadPool)
|
||||
.withRegion(configuredRegion)
|
||||
.withFipsEnabled(fipsEnabled)
|
||||
.withExpressCreateSession(
|
||||
conf.getBoolean(S3EXPRESS_CREATE_SESSION, S3EXPRESS_CREATE_SESSION_DEFAULT));
|
||||
|
||||
@ -5521,6 +5529,10 @@ public boolean hasPathCapability(final Path path, final String capability)
|
||||
case OPTIMIZED_COPY_FROM_LOCAL:
|
||||
return optimizedCopyFromLocal;
|
||||
|
||||
// probe for a fips endpoint
|
||||
case FIPS_ENDPOINT:
|
||||
return fipsEnabled;
|
||||
|
||||
default:
|
||||
return super.hasPathCapability(p, cap);
|
||||
}
|
||||
|
@ -176,6 +176,11 @@ final class S3ClientCreationParameters {
|
||||
*/
|
||||
private boolean expressCreateSession = S3EXPRESS_CREATE_SESSION_DEFAULT;
|
||||
|
||||
/**
|
||||
* Is FIPS enabled?
|
||||
*/
|
||||
private boolean fipsEnabled;
|
||||
|
||||
/**
|
||||
* List of execution interceptors to include in the chain
|
||||
* of interceptors in the SDK.
|
||||
@ -461,5 +466,23 @@ public String toString() {
|
||||
", expressCreateSession=" + expressCreateSession +
|
||||
'}';
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the FIPS flag.
|
||||
* @return is fips enabled
|
||||
*/
|
||||
public boolean isFipsEnabled() {
|
||||
return fipsEnabled;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set builder value.
|
||||
* @param value new value
|
||||
* @return the builder
|
||||
*/
|
||||
public S3ClientCreationParameters withFipsEnabled(final boolean value) {
|
||||
fipsEnabled = value;
|
||||
return this;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -38,6 +38,7 @@
|
||||
import static org.apache.hadoop.fs.Options.OpenFileOptions.FS_OPTION_OPENFILE_STANDARD_OPTIONS;
|
||||
import static org.apache.hadoop.fs.s3a.Constants.DIRECTORY_OPERATIONS_PURGE_UPLOADS;
|
||||
import static org.apache.hadoop.fs.s3a.Constants.ENABLE_MULTI_DELETE;
|
||||
import static org.apache.hadoop.fs.s3a.Constants.FIPS_ENDPOINT;
|
||||
import static org.apache.hadoop.fs.s3a.Constants.FS_S3A_CREATE_PERFORMANCE;
|
||||
import static org.apache.hadoop.fs.s3a.Constants.FS_S3A_CREATE_PERFORMANCE_ENABLED;
|
||||
import static org.apache.hadoop.fs.s3a.Constants.STORE_CAPABILITY_AWS_V2;
|
||||
@ -272,6 +273,7 @@ private InternalConstants() {
|
||||
FS_CHECKSUMS,
|
||||
FS_MULTIPART_UPLOADER,
|
||||
DIRECTORY_LISTING_INCONSISTENT,
|
||||
FIPS_ENDPOINT,
|
||||
|
||||
// s3 specific
|
||||
STORE_CAPABILITY_AWS_V2,
|
||||
|
@ -357,12 +357,11 @@ public static class BucketInfo extends S3GuardTool {
|
||||
public static final String NAME = BUCKET_INFO;
|
||||
public static final String GUARDED_FLAG = "guarded";
|
||||
public static final String UNGUARDED_FLAG = "unguarded";
|
||||
public static final String AUTH_FLAG = "auth";
|
||||
public static final String NONAUTH_FLAG = "nonauth";
|
||||
public static final String ENCRYPTION_FLAG = "encryption";
|
||||
public static final String MAGIC_FLAG = "magic";
|
||||
public static final String MARKERS_FLAG = "markers";
|
||||
public static final String MARKERS_AWARE = "aware";
|
||||
public static final String FIPS_FLAG = "fips";
|
||||
|
||||
public static final String PURPOSE = "provide/check information"
|
||||
+ " about a specific bucket";
|
||||
@ -370,8 +369,7 @@ public static class BucketInfo extends S3GuardTool {
|
||||
private static final String USAGE = NAME + " [OPTIONS] s3a://BUCKET\n"
|
||||
+ "\t" + PURPOSE + "\n\n"
|
||||
+ "Common options:\n"
|
||||
+ " -" + AUTH_FLAG + " - Require the S3Guard mode to be \"authoritative\"\n"
|
||||
+ " -" + NONAUTH_FLAG + " - Require the S3Guard mode to be \"non-authoritative\"\n"
|
||||
+ " -" + FIPS_FLAG + " - Require the client is using a FIPS endpoint\n"
|
||||
+ " -" + MAGIC_FLAG +
|
||||
" - Require the S3 filesystem to be support the \"magic\" committer\n"
|
||||
+ " -" + ENCRYPTION_FLAG
|
||||
@ -395,7 +393,7 @@ public static class BucketInfo extends S3GuardTool {
|
||||
+ " directory markers are not deleted";
|
||||
|
||||
public BucketInfo(Configuration conf) {
|
||||
super(conf, GUARDED_FLAG, UNGUARDED_FLAG, AUTH_FLAG, NONAUTH_FLAG, MAGIC_FLAG);
|
||||
super(conf, GUARDED_FLAG, UNGUARDED_FLAG, FIPS_FLAG, MAGIC_FLAG);
|
||||
CommandFormat format = getCommandFormat();
|
||||
format.addOptionWithValue(ENCRYPTION_FLAG);
|
||||
format.addOptionWithValue(MARKERS_FLAG);
|
||||
@ -462,6 +460,10 @@ public int run(String[] args, PrintStream out)
|
||||
println(out, "\tEndpoint: %s=%s",
|
||||
ENDPOINT,
|
||||
StringUtils.isNotEmpty(endpoint) ? endpoint : "(unset)");
|
||||
String region = conf.getTrimmed(AWS_REGION, "");
|
||||
println(out, "\tRegion: %s=%s", AWS_REGION,
|
||||
StringUtils.isNotEmpty(region) ? region : "(unset)");
|
||||
|
||||
String encryption =
|
||||
printOption(out, "\tEncryption", Constants.S3_ENCRYPTION_ALGORITHM,
|
||||
"none");
|
||||
@ -487,12 +489,12 @@ public int run(String[] args, PrintStream out)
|
||||
FS_S3A_COMMITTER_NAME, COMMITTER_NAME_FILE);
|
||||
switch (committer) {
|
||||
case COMMITTER_NAME_FILE:
|
||||
println(out, "The original 'file' commmitter is active"
|
||||
println(out, "The original 'file' committer is active"
|
||||
+ " -this is slow and potentially unsafe");
|
||||
break;
|
||||
case InternalCommitterConstants.COMMITTER_NAME_STAGING:
|
||||
println(out, "The 'staging' committer is used "
|
||||
+ "-prefer the 'directory' committer");
|
||||
+ "-prefer the 'magic' committer");
|
||||
// fall through
|
||||
case COMMITTER_NAME_DIRECTORY:
|
||||
// fall through
|
||||
@ -555,13 +557,17 @@ public int run(String[] args, PrintStream out)
|
||||
processMarkerOption(out, fs,
|
||||
getCommandFormat().getOptValue(MARKERS_FLAG));
|
||||
|
||||
// and check for capabilitities
|
||||
// and check for capabilities
|
||||
println(out, "%nStore Capabilities");
|
||||
for (String capability : S3A_DYNAMIC_CAPABILITIES) {
|
||||
out.printf("\t%s %s%n", capability,
|
||||
fs.hasPathCapability(root, capability));
|
||||
}
|
||||
println(out, "");
|
||||
|
||||
if (commands.getOpt(FIPS_FLAG) && !fs.hasPathCapability(root, FIPS_ENDPOINT)) {
|
||||
throw badState("FIPS endpoint was required but the filesystem is not using it");
|
||||
}
|
||||
// and finally flush the output and report a success.
|
||||
out.flush();
|
||||
return SUCCESS;
|
||||
|
@ -0,0 +1,477 @@
|
||||
<!---
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. See accompanying LICENSE file.
|
||||
-->
|
||||
|
||||
# Connecting to an Amazon S3 Bucket through the S3A Connector
|
||||
|
||||
<!-- MACRO{toc|fromDepth=0|toDepth=2} -->
|
||||
|
||||
|
||||
1. This document covers how to connect to and authenticate with S3 stores, primarily AWS S3.
|
||||
2. There have been changes in this mechanism between the V1 and V2 SDK, in particular specifying
|
||||
the region is now preferred to specifying the regional S3 endpoint.
|
||||
3. For connecting to third-party stores, please read [Working with Third-party S3 Stores](third_party_stores.html) *after* reading this document.
|
||||
|
||||
## <a name="foundational"></a> Foundational Concepts
|
||||
|
||||
### <a name="regions"></a> AWS Regions and Availability Zones
|
||||
|
||||
AWS provides storage, compute and other services around the world, in *regions*.
|
||||
|
||||
Data in S3 is stored *buckets*; each bucket is a single region.
|
||||
|
||||
There are some "special" regions: China, AWS GovCloud.
|
||||
It is *believed* that the S3A connector works in these places, at least to the extent that nobody has complained about it not working.
|
||||
|
||||
### <a name="endpoints"></a> Endpoints
|
||||
|
||||
The S3A connector connects to Amazon S3 storage over HTTPS connections, either directly or through an HTTP proxy.
|
||||
HTTP HEAD and GET, PUT, POST and DELETE requests are invoked to perform different read/write operations against the store.
|
||||
|
||||
There are multiple ways to connect to an S3 bucket
|
||||
|
||||
* To an [S3 Endpoint](https://docs.aws.amazon.com/general/latest/gr/s3.html); an HTTPS server hosted by amazon or a third party.
|
||||
* To a FIPS-compliant S3 Endpoint.
|
||||
* To an AWS S3 [Access Point](https://docs.aws.amazon.com/AmazonS3/latest/userguide/access-points.html).
|
||||
* Through a VPC connection, [AWS PrivateLink for Amazon S3](https://docs.aws.amazon.com/AmazonS3/latest/userguide/privatelink-interface-endpoints.html).
|
||||
* AWS [Outposts](https://aws.amazon.com/outposts/).
|
||||
|
||||
The S3A connector supports all these; S3 Endpoints are the primary mechanism used -either explicitly declared or automatically determined from the declared region of the bucket.
|
||||
|
||||
Not supported:
|
||||
* AWS [Snowball](https://aws.amazon.com/snowball/).
|
||||
|
||||
As of December 2023, AWS S3 uses Transport Layer Security (TLS) [version 1.2](https://aws.amazon.com/blogs/security/tls-1-2-required-for-aws-endpoints/) to secure the communications channel; the S3A client is does this through
|
||||
the Apache [HttpClient library](https://hc.apache.org/index.html).
|
||||
|
||||
### <a name="third-party"></a> Third party stores
|
||||
|
||||
Third-party stores implementing the S3 API are also supported.
|
||||
These often only implement a subset of the S3 API; not all features are available.
|
||||
If TLS authentication is used, then the HTTPS certificates for the private stores
|
||||
_MUST_ be installed on the JVMs on hosts within the Hadoop cluster.
|
||||
|
||||
See [Working with Third-party S3 Stores](third_party_stores.html) *after* reading this document.
|
||||
|
||||
|
||||
## <a name="settings"></a> Connection Settings
|
||||
|
||||
There are three core settings to connect to an S3 store, endpoint, region and whether or not to use path style access.
|
||||
|
||||
|
||||
```xml
|
||||
<property>
|
||||
<name>fs.s3a.endpoint</name>
|
||||
<description>AWS S3 endpoint to connect to. An up-to-date list is
|
||||
provided in the AWS Documentation: regions and endpoints. Without this
|
||||
property, the standard region (s3.amazonaws.com) is assumed.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>fs.s3a.endpoint.region</name>
|
||||
<value>REGION</value>
|
||||
<description>AWS Region of the data</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>fs.s3a.path.style.access</name>
|
||||
<value>false</value>
|
||||
<description>Enable S3 path style access by disabling the default virtual hosting behaviour.
|
||||
Needed for AWS PrivateLink, S3 AccessPoints, and, generally, third party stores.
|
||||
Default: false.
|
||||
</description>
|
||||
</property>
|
||||
```
|
||||
|
||||
Historically the S3A connector has preferred the endpoint as defined by the option `fs.s3a.endpoint`.
|
||||
With the move to the AWS V2 SDK, there is more emphasis on the region, set by the `fs.s3a.endpoint.region` option.
|
||||
|
||||
Normally, declaring the region in `fs.s3a.endpoint.region` should be sufficient to set up the network connection to correctly connect to an AWS-hosted S3 store.
|
||||
|
||||
### <a name="timeouts"></a> Network timeouts
|
||||
|
||||
See [Timeouts](performance.html#timeouts).
|
||||
|
||||
### <a name="networking"></a> Low-level Network Options
|
||||
|
||||
```xml
|
||||
|
||||
<property>
|
||||
<name>fs.s3a.connection.maximum</name>
|
||||
<value>200</value>
|
||||
<description>Controls the maximum number of simultaneous connections to S3.
|
||||
This must be bigger than the value of fs.s3a.threads.max so as to stop
|
||||
threads being blocked waiting for new HTTPS connections.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>fs.s3a.connection.ssl.enabled</name>
|
||||
<value>true</value>
|
||||
<description>
|
||||
Enables or disables SSL connections to AWS services.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>fs.s3a.ssl.channel.mode</name>
|
||||
<value>Default_JSSE</value>
|
||||
<description>
|
||||
TLS implementation and cipher options.
|
||||
Values: OpenSSL, Default, Default_JSSE, Default_JSSE_with_GCM
|
||||
|
||||
Default_JSSE is not truly the the default JSSE implementation because
|
||||
the GCM cipher is disabled when running on Java 8. However, the name
|
||||
was not changed in order to preserve backwards compatibility. Instead,
|
||||
new mode called Default_JSSE_with_GCM delegates to the default JSSE
|
||||
implementation with no changes to the list of enabled ciphers.
|
||||
|
||||
OpenSSL requires the wildfly JAR on the classpath and a compatible installation of the openssl binaries.
|
||||
It is often faster than the JVM libraries, but also trickier to
|
||||
use.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>fs.s3a.socket.send.buffer</name>
|
||||
<value>8192</value>
|
||||
<description>
|
||||
Socket send buffer hint to amazon connector. Represented in bytes.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>fs.s3a.socket.recv.buffer</name>
|
||||
<value>8192</value>
|
||||
<description>
|
||||
Socket receive buffer hint to amazon connector. Represented in bytes.
|
||||
</description>
|
||||
</property>
|
||||
```
|
||||
|
||||
### <a name="proxies"></a> Proxy Settings
|
||||
|
||||
Connections to S3A stores can be made through an HTTP or HTTPS proxy.
|
||||
|
||||
```xml
|
||||
<property>
|
||||
<name>fs.s3a.proxy.host</name>
|
||||
<description>
|
||||
Hostname of the (optional) proxy server for S3 connections.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>fs.s3a.proxy.ssl.enabled</name>
|
||||
<value>false</value>
|
||||
<description>
|
||||
Does the proxy use a TLS connection?
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>fs.s3a.proxy.port</name>
|
||||
<description>
|
||||
Proxy server port. If this property is not set
|
||||
but fs.s3a.proxy.host is, port 80 or 443 is assumed (consistent with
|
||||
the value of fs.s3a.connection.ssl.enabled).
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>fs.s3a.proxy.username</name>
|
||||
<description>Username for authenticating with proxy server.</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>fs.s3a.proxy.password</name>
|
||||
<description>Password for authenticating with proxy server.</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>fs.s3a.proxy.domain</name>
|
||||
<description>Domain for authenticating with proxy server.</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>fs.s3a.proxy.workstation</name>
|
||||
<description>Workstation for authenticating with proxy server.</description>
|
||||
</property>
|
||||
```
|
||||
|
||||
Sometimes the proxy can be source of problems, especially if HTTP connections are kept
|
||||
in the connection pool for some time.
|
||||
Experiment with the values of `fs.s3a.connection.ttl` and `fs.s3a.connection.request.timeout`
|
||||
if long-lived connections have problems.
|
||||
|
||||
|
||||
## <a name="per_bucket_endpoints"></a>Using Per-Bucket Configuration to access data round the world
|
||||
|
||||
S3 Buckets are hosted in different "regions", the default being "US-East-1".
|
||||
The S3A client talks to this region by default, issuing HTTP requests
|
||||
to the server `s3.amazonaws.com`.
|
||||
|
||||
S3A can work with buckets from any region. Each region has its own
|
||||
S3 endpoint, documented [by Amazon](http://docs.aws.amazon.com/general/latest/gr/rande.html#s3_region).
|
||||
|
||||
1. Applications running in EC2 infrastructure do not pay for IO to/from
|
||||
*local S3 buckets*. They will be billed for access to remote buckets. Always
|
||||
use local buckets and local copies of data, wherever possible.
|
||||
2. With the V4 signing protocol, AWS requires the explicit region endpoint
|
||||
to be used —hence S3A must be configured to use the specific endpoint. This
|
||||
is done in the configuration option `fs.s3a.endpoint`.
|
||||
3. All endpoints other than the default endpoint only support interaction
|
||||
with buckets local to that S3 instance.
|
||||
4. Standard S3 buckets support "cross-region" access where use of the original `us-east-1`
|
||||
endpoint allows access to the data, but newer storage types, particularly S3 Express are
|
||||
not supported.
|
||||
|
||||
|
||||
|
||||
If the wrong endpoint is used, the request will fail. This may be reported as a 301/redirect error,
|
||||
or as a 400 Bad Request: take these as cues to check the endpoint setting of
|
||||
a bucket.
|
||||
|
||||
The up to date list of regions is [Available online](https://docs.aws.amazon.com/general/latest/gr/s3.html).
|
||||
|
||||
This list can be used to specify the endpoint of individual buckets, for example
|
||||
for buckets in the central and EU/Ireland endpoints.
|
||||
|
||||
```xml
|
||||
<property>
|
||||
<name>fs.s3a.bucket.landsat-pds.endpoint</name>
|
||||
<value>s3-us-west-2.amazonaws.com</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>fs.s3a.bucket.eu-dataset.endpoint</name>
|
||||
<value>s3.eu-west-1.amazonaws.com</value>
|
||||
</property>
|
||||
```
|
||||
|
||||
Declaring the region for the data is simpler, as it avoid having to look up the full URL and having to worry about historical quirks of regional endpoint hostnames.
|
||||
|
||||
```xml
|
||||
<property>
|
||||
<name>fs.s3a.bucket.landsat-pds.endpoint.region</name>
|
||||
<value>us-west-2</value>
|
||||
<description>The endpoint for s3a://landsat-pds URLs</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>fs.s3a.bucket.eu-dataset.endpoint.region</name>
|
||||
<value>eu-west-1</value>
|
||||
</property>
|
||||
```
|
||||
|
||||
|
||||
## <a name="privatelink"></a> AWS PrivateLink
|
||||
|
||||
[AWS PrivateLink for Amazon S3](https://docs.aws.amazon.com/AmazonS3/latest/userguide/privatelink-interface-endpoints.html) allows for a private connection to a bucket to be defined, with network access rules managing how a bucket can be accessed.
|
||||
|
||||
|
||||
1. Follow the documentation to create the private link
|
||||
2. retrieve the DNS name from the console, such as `vpce-f264a96c-6d27bfa7c85e.s3.us-west-2.vpce.amazonaws.com`
|
||||
3. Convert this to an endpoint URL by prefixing "https://bucket."
|
||||
4. Declare this as the bucket endpoint and switch to path-style access.
|
||||
5. Declare the region: there is no automated determination of the region from
|
||||
the `vpce` URL.
|
||||
|
||||
```xml
|
||||
|
||||
<property>
|
||||
<name>fs.s3a.bucket.example-usw2.endpoint</name>
|
||||
<value>https://bucket.vpce-f264a96c-6d27bfa7c85e.s3.us-west-2.vpce.amazonaws.com/</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>fs.s3a.bucket.example-usw2.path.style.access</name>
|
||||
<value>true</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>fs.s3a.bucket.example-usw2.endpoint.region</name>
|
||||
<value>us-west-2</value>
|
||||
</property>
|
||||
```
|
||||
|
||||
## <a name="fips"></a> Federal Information Processing Standards (FIPS) Endpoints
|
||||
|
||||
|
||||
It is possible to use [FIPs-compliant](https://www.nist.gov/itl/fips-general-information) endpoints which
|
||||
support a restricted subset of TLS algorithms.
|
||||
|
||||
Amazon provide a specific set of [FIPS endpoints](https://aws.amazon.com/compliance/fips/)
|
||||
to use so callers can be confident that the network communication is compliant with the standard:
|
||||
non-compliant algorithms are unavailable.
|
||||
|
||||
The boolean option `fs.s3a.endpoint.fips` (default `false`) switches the S3A connector to using the FIPS endpoint of a region.
|
||||
|
||||
```xml
|
||||
<property>
|
||||
<name>fs.s3a.endpoint.fips</name>
|
||||
<value>true</value>
|
||||
<description>Use the FIPS endpoint</description>
|
||||
</property>
|
||||
```
|
||||
|
||||
For a single bucket:
|
||||
```xml
|
||||
<property>
|
||||
<name>fs.s3a.bucket.landsat-pds.endpoint.fips</name>
|
||||
<value>true</value>
|
||||
<description>Use the FIPS endpoint for the landsat dataset</description>
|
||||
</property>
|
||||
```
|
||||
|
||||
If this option is `true`, the endpoint option `fs.s3a.endpoint` MUST NOT be set:
|
||||
|
||||
```
|
||||
A custom endpoint cannot be combined with FIPS: https://s3.eu-west-2.amazonaws.com
|
||||
```
|
||||
|
||||
The SDK calculates the FIPS-specific endpoint without any awareness as to whether FIPs is supported by a region. The first attempt to interact with the service will fail
|
||||
|
||||
```
|
||||
java.net.UnknownHostException: software.amazon.awssdk.core.exception.SdkClientException:
|
||||
Received an UnknownHostException when attempting to interact with a service.
|
||||
See cause for the exact endpoint that is failing to resolve.
|
||||
If this is happening on an endpoint that previously worked,
|
||||
there may be a network connectivity issue or your DNS cache
|
||||
could be storing endpoints for too long.:
|
||||
example-london-1.s3-fips.eu-west-2.amazonaws.com
|
||||
|
||||
```
|
||||
|
||||
*Important* OpenSSL and FIPS endpoints
|
||||
|
||||
Linux distributions with an FIPS-compliant SSL library may not be compatible with wildfly.
|
||||
Always use with the JDK SSL implementation unless you are confident that the library
|
||||
is compatible, or wish to experiment with the settings outside of production deployments.
|
||||
|
||||
```xml
|
||||
<property>
|
||||
<name>fs.s3a.ssl.channel.mode</name>
|
||||
<value>Default_JSSE</value>
|
||||
</property>
|
||||
```
|
||||
|
||||
## <a name="accesspoints"></a>Configuring S3 AccessPoints usage with S3A
|
||||
|
||||
S3A supports [S3 Access Point](https://aws.amazon.com/s3/features/access-points/) usage which
|
||||
improves VPC integration with S3 and simplifies your data's permission model because different
|
||||
policies can be applied now on the Access Point level. For more information about why to use and
|
||||
how to create them make sure to read the official documentation.
|
||||
|
||||
Accessing data through an access point, is done by using its ARN, as opposed to just the bucket name.
|
||||
You can set the Access Point ARN property using the following per bucket configuration property:
|
||||
|
||||
```xml
|
||||
<property>
|
||||
<name>fs.s3a.bucket.sample-bucket.accesspoint.arn</name>
|
||||
<value> {ACCESSPOINT_ARN_HERE} </value>
|
||||
<description>Configure S3a traffic to use this AccessPoint</description>
|
||||
</property>
|
||||
```
|
||||
|
||||
This configures access to the `sample-bucket` bucket for S3A, to go through the
|
||||
new Access Point ARN. So, for example `s3a://sample-bucket/key` will now use your
|
||||
configured ARN when getting data from S3 instead of your bucket.
|
||||
|
||||
_the name of the bucket used in the s3a:// URLs is irrelevant; it is not used when connecting with the store_
|
||||
|
||||
Example
|
||||
|
||||
```xml
|
||||
<property>
|
||||
<name>fs.s3a.bucket.example-ap.accesspoint.arn</name>
|
||||
<value>arn:aws:s3:eu-west-2:152813717728:accesspoint/ap-example-london</value>
|
||||
<description>AccessPoint bound to bucket name example-ap</description>
|
||||
</property>
|
||||
```
|
||||
|
||||
The `fs.s3a.accesspoint.required` property can also require all access to S3 to go through Access
|
||||
Points. This has the advantage of increasing security inside a VPN / VPC as you only allow access
|
||||
to known sources of data defined through Access Points. In case there is a need to access a bucket
|
||||
directly (without Access Points) then you can use per bucket overrides to disable this setting on a
|
||||
bucket by bucket basis i.e. `fs.s3a.bucket.{YOUR-BUCKET}.accesspoint.required`.
|
||||
|
||||
```xml
|
||||
<!-- Require access point only access -->
|
||||
<property>
|
||||
<name>fs.s3a.accesspoint.required</name>
|
||||
<value>true</value>
|
||||
</property>
|
||||
<!-- Disable it on a per-bucket basis if needed -->
|
||||
<property>
|
||||
<name>fs.s3a.bucket.example-bucket.accesspoint.required</name>
|
||||
<value>false</value>
|
||||
</property>
|
||||
```
|
||||
|
||||
Before using Access Points make sure you're not impacted by the following:
|
||||
- `ListObjectsV1` is not supported, this is also deprecated on AWS S3 for performance reasons;
|
||||
- The endpoint for S3 requests will automatically change to use
|
||||
`s3-accesspoint.REGION.amazonaws.{com | com.cn}` depending on the Access Point ARN. While
|
||||
considering endpoints, if you have any custom signers that use the host endpoint property make
|
||||
sure to update them if needed;
|
||||
|
||||
## <a name="debugging"></a> Debugging network problems
|
||||
|
||||
The `storediag` command within the utility [cloudstore](https://github.com/exampleoughran/cloudstore)
|
||||
JAR is recommended as the way to view and print settings.
|
||||
|
||||
If `storediag` doesn't connect to your S3 store, *nothing else will*.
|
||||
|
||||
## <a name="common-problems"></a> Common Sources of Connection Problems
|
||||
|
||||
Based on the experience of people who field support calls, here are
|
||||
some of the main connectivity issues which cause problems.
|
||||
|
||||
### <a name="inconsistent-config"></a> Inconsistent configuration across a cluster
|
||||
|
||||
All hosts in the cluster need to have the configuration secrets;
|
||||
local environment variables are not enough.
|
||||
|
||||
If HTTPS/TLS is used for a private store, the relevant certificates MUST be installed everywhere.
|
||||
|
||||
For applications such as distcp, the options need to be passed with the job.
|
||||
|
||||
### <a name="public-private-mixup"></a> Confusion between public/private S3 Stores.
|
||||
|
||||
If your cluster is configured to use a private store, AWS-hosted buckets are not visible.
|
||||
If you wish to read access in a private store, you need to change the endpoint.
|
||||
|
||||
Private S3 stores generally expect path style access.
|
||||
|
||||
### <a name="region-misconfigure"></a> Region and endpoints misconfigured
|
||||
|
||||
These usually surface rapidly and with meaningful messages.
|
||||
|
||||
Region errors generally surface as
|
||||
* `UnknownHostException`
|
||||
* `AWSRedirectException` "Received permanent redirect response to region"
|
||||
|
||||
Endpoint configuration problems can be more varied, as they are just HTTPS URLs.
|
||||
|
||||
### <a name="wildfly"></a> Wildfly/OpenSSL Brittleness
|
||||
|
||||
When it works, it is fast. But it is fussy as to openSSL implementations, TLS protocols and more.
|
||||
Because it uses the native openssl binaries, operating system updates can trigger regressions.
|
||||
|
||||
Disabling it should be the first step to troubleshooting any TLS problems.
|
||||
|
||||
### <a name="proxy-misconfiguration"></a> Proxy setup
|
||||
|
||||
If there is a proxy, set it up correctly.
|
@ -33,6 +33,7 @@ full details.
|
||||
|
||||
## <a name="documents"></a> Documents
|
||||
|
||||
* [Connecting](./connecting.html)
|
||||
* [Encryption](./encryption.html)
|
||||
* [Performance](./performance.html)
|
||||
* [The upgrade to AWS Java SDK V2](./aws_sdk_upgrade.html)
|
||||
@ -223,6 +224,10 @@ Do not inadvertently share these credentials through means such as:
|
||||
If you do any of these: change your credentials immediately!
|
||||
|
||||
|
||||
## Connecting to Amazon S3 or a third-party store
|
||||
|
||||
See [Connecting to an Amazon S3 Bucket through the S3A Connector](connecting.md).
|
||||
|
||||
## <a name="authenticating"></a> Authenticating with S3
|
||||
|
||||
Except when interacting with public S3 buckets, the S3A client
|
||||
@ -835,61 +840,15 @@ Here are some the S3A properties for use in production.
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>fs.s3a.connection.ssl.enabled</name>
|
||||
<value>true</value>
|
||||
<description>Enables or disables SSL connections to AWS services.
|
||||
Also sets the default port to use for the s3a proxy settings,
|
||||
when not explicitly set in fs.s3a.proxy.port.</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>fs.s3a.endpoint</name>
|
||||
<description>AWS S3 endpoint to connect to. An up-to-date list is
|
||||
provided in the AWS Documentation: regions and endpoints. Without this
|
||||
property, the standard region (s3.amazonaws.com) is assumed.
|
||||
<name>fs.s3a.connection.maximum</name>
|
||||
<value>96</value>
|
||||
<description>Controls the maximum number of simultaneous connections to S3.
|
||||
This must be bigger than the value of fs.s3a.threads.max so as to stop
|
||||
threads being blocked waiting for new HTTPS connections.
|
||||
Why not equal? The AWS SDK transfer manager also uses these connections.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>fs.s3a.path.style.access</name>
|
||||
<value>false</value>
|
||||
<description>Enable S3 path style access ie disabling the default virtual hosting behaviour.
|
||||
Useful for S3A-compliant storage providers as it removes the need to set up DNS for virtual hosting.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>fs.s3a.proxy.host</name>
|
||||
<description>Hostname of the (optional) proxy server for S3 connections.</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>fs.s3a.proxy.port</name>
|
||||
<description>Proxy server port. If this property is not set
|
||||
but fs.s3a.proxy.host is, port 80 or 443 is assumed (consistent with
|
||||
the value of fs.s3a.connection.ssl.enabled).</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>fs.s3a.proxy.username</name>
|
||||
<description>Username for authenticating with proxy server.</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>fs.s3a.proxy.password</name>
|
||||
<description>Password for authenticating with proxy server.</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>fs.s3a.proxy.domain</name>
|
||||
<description>Domain for authenticating with proxy server.</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>fs.s3a.proxy.workstation</name>
|
||||
<description>Workstation for authenticating with proxy server.</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>fs.s3a.attempts.maximum</name>
|
||||
<value>5</value>
|
||||
@ -1005,14 +964,6 @@ Here are some the S3A properties for use in production.
|
||||
implementations can still be used</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>fs.s3a.accesspoint.required</name>
|
||||
<value>false</value>
|
||||
<description>Require that all S3 access is made through Access Points and not through
|
||||
buckets directly. If enabled, use per-bucket overrides to allow bucket access to a specific set
|
||||
of buckets.</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>fs.s3a.block.size</name>
|
||||
<value>32M</value>
|
||||
@ -1218,23 +1169,6 @@ Here are some the S3A properties for use in production.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>fs.s3a.connection.request.timeout</name>
|
||||
<value>0</value>
|
||||
<description>
|
||||
Time out on HTTP requests to the AWS service; 0 means no timeout.
|
||||
Measured in seconds; the usual time suffixes are all supported
|
||||
|
||||
Important: this is the maximum duration of any AWS service call,
|
||||
including upload and copy operations. If non-zero, it must be larger
|
||||
than the time to upload multi-megabyte blocks to S3 from the client,
|
||||
and to rename many-GB files. Use with care.
|
||||
|
||||
Values that are larger than Integer.MAX_VALUE milliseconds are
|
||||
converged to Integer.MAX_VALUE milliseconds
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>fs.s3a.etag.checksum.enabled</name>
|
||||
<value>false</value>
|
||||
@ -1699,179 +1633,6 @@ For a site configuration of:
|
||||
The bucket "nightly" will be encrypted with SSE-KMS using the KMS key
|
||||
`arn:aws:kms:eu-west-2:1528130000000:key/753778e4-2d0f-42e6-b894-6a3ae4ea4e5f`
|
||||
|
||||
### <a name="per_bucket_endpoints"></a>Using Per-Bucket Configuration to access data round the world
|
||||
|
||||
S3 Buckets are hosted in different "regions", the default being "US-East".
|
||||
The S3A client talks to this region by default, issuing HTTP requests
|
||||
to the server `s3.amazonaws.com`.
|
||||
|
||||
S3A can work with buckets from any region. Each region has its own
|
||||
S3 endpoint, documented [by Amazon](http://docs.aws.amazon.com/general/latest/gr/rande.html#s3_region).
|
||||
|
||||
1. Applications running in EC2 infrastructure do not pay for IO to/from
|
||||
*local S3 buckets*. They will be billed for access to remote buckets. Always
|
||||
use local buckets and local copies of data, wherever possible.
|
||||
1. The default S3 endpoint can support data IO with any bucket when the V1 request
|
||||
signing protocol is used.
|
||||
1. When the V4 signing protocol is used, AWS requires the explicit region endpoint
|
||||
to be used —hence S3A must be configured to use the specific endpoint. This
|
||||
is done in the configuration option `fs.s3a.endpoint`.
|
||||
1. All endpoints other than the default endpoint only support interaction
|
||||
with buckets local to that S3 instance.
|
||||
|
||||
While it is generally simpler to use the default endpoint, working with
|
||||
V4-signing-only regions (Frankfurt, Seoul) requires the endpoint to be identified.
|
||||
Expect better performance from direct connections —traceroute will give you some insight.
|
||||
|
||||
If the wrong endpoint is used, the request may fail. This may be reported as a 301/redirect error,
|
||||
or as a 400 Bad Request: take these as cues to check the endpoint setting of
|
||||
a bucket.
|
||||
|
||||
Here is a list of properties defining all AWS S3 regions, current as of June 2017:
|
||||
|
||||
```xml
|
||||
<!--
|
||||
This is the default endpoint, which can be used to interact
|
||||
with any v2 region.
|
||||
-->
|
||||
<property>
|
||||
<name>central.endpoint</name>
|
||||
<value>s3.amazonaws.com</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>canada.endpoint</name>
|
||||
<value>s3.ca-central-1.amazonaws.com</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>frankfurt.endpoint</name>
|
||||
<value>s3.eu-central-1.amazonaws.com</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>ireland.endpoint</name>
|
||||
<value>s3-eu-west-1.amazonaws.com</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>london.endpoint</name>
|
||||
<value>s3.eu-west-2.amazonaws.com</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>mumbai.endpoint</name>
|
||||
<value>s3.ap-south-1.amazonaws.com</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>ohio.endpoint</name>
|
||||
<value>s3.us-east-2.amazonaws.com</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>oregon.endpoint</name>
|
||||
<value>s3-us-west-2.amazonaws.com</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>sao-paolo.endpoint</name>
|
||||
<value>s3-sa-east-1.amazonaws.com</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>seoul.endpoint</name>
|
||||
<value>s3.ap-northeast-2.amazonaws.com</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>singapore.endpoint</name>
|
||||
<value>s3-ap-southeast-1.amazonaws.com</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>sydney.endpoint</name>
|
||||
<value>s3-ap-southeast-2.amazonaws.com</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>tokyo.endpoint</name>
|
||||
<value>s3-ap-northeast-1.amazonaws.com</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>virginia.endpoint</name>
|
||||
<value>${central.endpoint}</value>
|
||||
</property>
|
||||
```
|
||||
|
||||
This list can be used to specify the endpoint of individual buckets, for example
|
||||
for buckets in the central and EU/Ireland endpoints.
|
||||
|
||||
```xml
|
||||
<property>
|
||||
<name>fs.s3a.bucket.landsat-pds.endpoint</name>
|
||||
<value>${central.endpoint}</value>
|
||||
<description>The endpoint for s3a://landsat-pds URLs</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>fs.s3a.bucket.eu-dataset.endpoint</name>
|
||||
<value>${ireland.endpoint}</value>
|
||||
<description>The endpoint for s3a://eu-dataset URLs</description>
|
||||
</property>
|
||||
```
|
||||
|
||||
Why explicitly declare a bucket bound to the central endpoint? It ensures
|
||||
that if the default endpoint is changed to a new region, data store in
|
||||
US-east is still reachable.
|
||||
|
||||
## <a name="accesspoints"></a>Configuring S3 AccessPoints usage with S3A
|
||||
S3a now supports [S3 Access Point](https://aws.amazon.com/s3/features/access-points/) usage which
|
||||
improves VPC integration with S3 and simplifies your data's permission model because different
|
||||
policies can be applied now on the Access Point level. For more information about why to use and
|
||||
how to create them make sure to read the official documentation.
|
||||
|
||||
Accessing data through an access point, is done by using its ARN, as opposed to just the bucket name.
|
||||
You can set the Access Point ARN property using the following per bucket configuration property:
|
||||
```xml
|
||||
<property>
|
||||
<name>fs.s3a.bucket.sample-bucket.accesspoint.arn</name>
|
||||
<value> {ACCESSPOINT_ARN_HERE} </value>
|
||||
<description>Configure S3a traffic to use this AccessPoint</description>
|
||||
</property>
|
||||
```
|
||||
|
||||
This configures access to the `sample-bucket` bucket for S3A, to go through the
|
||||
new Access Point ARN. So, for example `s3a://sample-bucket/key` will now use your
|
||||
configured ARN when getting data from S3 instead of your bucket.
|
||||
|
||||
The `fs.s3a.accesspoint.required` property can also require all access to S3 to go through Access
|
||||
Points. This has the advantage of increasing security inside a VPN / VPC as you only allow access
|
||||
to known sources of data defined through Access Points. In case there is a need to access a bucket
|
||||
directly (without Access Points) then you can use per bucket overrides to disable this setting on a
|
||||
bucket by bucket basis i.e. `fs.s3a.bucket.{YOUR-BUCKET}.accesspoint.required`.
|
||||
|
||||
```xml
|
||||
<!-- Require access point only access -->
|
||||
<property>
|
||||
<name>fs.s3a.accesspoint.required</name>
|
||||
<value>true</value>
|
||||
</property>
|
||||
<!-- Disable it on a per-bucket basis if needed -->
|
||||
<property>
|
||||
<name>fs.s3a.bucket.example-bucket.accesspoint.required</name>
|
||||
<value>false</value>
|
||||
</property>
|
||||
```
|
||||
|
||||
Before using Access Points make sure you're not impacted by the following:
|
||||
- `ListObjectsV1` is not supported, this is also deprecated on AWS S3 for performance reasons;
|
||||
- The endpoint for S3 requests will automatically change from `s3.amazonaws.com` to use
|
||||
`s3-accesspoint.REGION.amazonaws.{com | com.cn}` depending on the Access Point ARN. While
|
||||
considering endpoints, if you have any custom signers that use the host endpoint property make
|
||||
sure to update them if needed;
|
||||
|
||||
## <a name="requester_pays"></a>Requester Pays buckets
|
||||
|
||||
S3A supports buckets with
|
||||
|
@ -218,6 +218,7 @@ everything uses the same HTTP connection pool.
|
||||
| `fs.s3a.executor.capacity` | `16` | Maximum threads for any single operation |
|
||||
| `fs.s3a.max.total.tasks` | `16` | Extra tasks which can be queued excluding prefetching operations |
|
||||
|
||||
### <a name="timeouts"></a> Timeouts.
|
||||
|
||||
Network timeout options can be tuned to make the client fail faster *or* retry more.
|
||||
The choice is yours. Generally recovery is better, but sometimes fail-fast is more useful.
|
||||
|
@ -132,17 +132,14 @@ This auditing information can be used to identify opportunities to reduce load.
|
||||
Prints and optionally checks the status of a bucket.
|
||||
|
||||
```bash
|
||||
hadoop s3guard bucket-info [-guarded] [-unguarded] [-auth] [-nonauth] [-magic] [-encryption ENCRYPTION] [-markers MARKER] s3a://BUCKET
|
||||
hadoop s3guard bucket-info [-fips] [-magic] [-encryption ENCRYPTION] [-markers MARKER] s3a://BUCKET
|
||||
```
|
||||
|
||||
Options
|
||||
|
||||
| argument | meaning |
|
||||
|-----------|-------------|
|
||||
| `-guarded` | Require S3Guard to be enabled. This will now always fail |
|
||||
| `-unguarded` | Require S3Guard to be disabled. This will now always succeed |
|
||||
| `-auth` | Require the S3Guard mode to be "authoritative". This will now always fail |
|
||||
| `-nonauth` | Require the S3Guard mode to be "non-authoritative". This will now always fail |
|
||||
|----------------------|---------------------------------------------------------------------|
|
||||
| `-fips` | Require FIPS endopint to be in use |
|
||||
| `-magic` | Require the S3 filesystem to be support the "magic" committer |
|
||||
| `-markers` | Directory marker status: `aware`, `keep`, `delete`, `authoritative` |
|
||||
| `-encryption <type>` | Require a specific encryption algorithm |
|
||||
|
@ -39,6 +39,7 @@
|
||||
import static org.apache.hadoop.fs.s3a.Constants.AWS_REGION;
|
||||
import static org.apache.hadoop.fs.s3a.Constants.AWS_S3_ACCESSPOINT_REQUIRED;
|
||||
import static org.apache.hadoop.fs.s3a.Constants.ENDPOINT;
|
||||
import static org.apache.hadoop.fs.s3a.Constants.FIPS_ENDPOINT;
|
||||
import static org.apache.hadoop.fs.s3a.Constants.FS_S3A;
|
||||
import static org.apache.hadoop.fs.s3a.Constants.PATH_STYLE_ACCESS;
|
||||
import static org.apache.hadoop.fs.s3a.Constants.S3A_BUCKET_PROBE;
|
||||
@ -138,6 +139,7 @@ private Configuration createConfigurationWithProbe(final int probe) {
|
||||
removeBaseAndBucketOverrides(conf,
|
||||
S3A_BUCKET_PROBE,
|
||||
ENDPOINT,
|
||||
FIPS_ENDPOINT,
|
||||
AWS_REGION,
|
||||
PATH_STYLE_ACCESS);
|
||||
conf.setInt(S3A_BUCKET_PROBE, probe);
|
||||
|
@ -20,7 +20,6 @@
|
||||
|
||||
import java.io.IOException;
|
||||
import java.net.URI;
|
||||
import java.net.URISyntaxException;
|
||||
import java.net.UnknownHostException;
|
||||
import java.nio.file.AccessDeniedException;
|
||||
import java.util.ArrayList;
|
||||
@ -36,16 +35,17 @@
|
||||
import software.amazon.awssdk.core.interceptor.ExecutionInterceptor;
|
||||
import software.amazon.awssdk.services.s3.S3Client;
|
||||
import software.amazon.awssdk.services.s3.model.HeadBucketRequest;
|
||||
import software.amazon.awssdk.services.s3.model.HeadBucketResponse;
|
||||
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.s3a.statistics.impl.EmptyS3AStatisticsContext;
|
||||
|
||||
import static org.apache.hadoop.fs.s3a.Constants.AWS_REGION;
|
||||
import static org.apache.hadoop.fs.s3a.Constants.CENTRAL_ENDPOINT;
|
||||
import static org.apache.hadoop.fs.s3a.Constants.PATH_STYLE_ACCESS;
|
||||
import static org.apache.hadoop.fs.s3a.DefaultS3ClientFactory.ERROR_ENDPOINT_WITH_FIPS;
|
||||
import static org.apache.hadoop.fs.s3a.S3ATestUtils.removeBaseAndBucketOverrides;
|
||||
import static org.apache.hadoop.io.IOUtils.closeStream;
|
||||
import static org.apache.hadoop.fs.s3a.Constants.CENTRAL_ENDPOINT;
|
||||
|
||||
import static org.apache.hadoop.test.LambdaTestUtils.intercept;
|
||||
|
||||
/**
|
||||
@ -82,6 +82,8 @@ public class ITestS3AEndpointRegion extends AbstractS3ATestBase {
|
||||
|
||||
private static final String VPC_ENDPOINT = "vpce-1a2b3c4d-5e6f.s3.us-west-2.vpce.amazonaws.com";
|
||||
|
||||
public static final String EXCEPTION_THROWN_BY_INTERCEPTOR = "Exception thrown by interceptor";
|
||||
|
||||
/**
|
||||
* New FS instance which will be closed in teardown.
|
||||
*/
|
||||
@ -134,10 +136,9 @@ public void testEndpointOverride() throws Throwable {
|
||||
describe("Create a client with a configured endpoint");
|
||||
Configuration conf = getConfiguration();
|
||||
|
||||
S3Client client = createS3Client(conf, AWS_ENDPOINT_TEST, null, US_EAST_2);
|
||||
S3Client client = createS3Client(conf, AWS_ENDPOINT_TEST, null, US_EAST_2, false);
|
||||
|
||||
intercept(AwsServiceException.class, "Exception thrown by interceptor", () -> client.headBucket(
|
||||
HeadBucketRequest.builder().bucket(getFileSystem().getBucket()).build()));
|
||||
expectInterceptorException(client);
|
||||
}
|
||||
|
||||
@Test
|
||||
@ -145,10 +146,9 @@ public void testCentralEndpoint() throws Throwable {
|
||||
describe("Create a client with the central endpoint");
|
||||
Configuration conf = getConfiguration();
|
||||
|
||||
S3Client client = createS3Client(conf, CENTRAL_ENDPOINT, null, US_EAST_1);
|
||||
S3Client client = createS3Client(conf, CENTRAL_ENDPOINT, null, US_EAST_1, false);
|
||||
|
||||
intercept(AwsServiceException.class, "Exception thrown by interceptor", () -> client.headBucket(
|
||||
HeadBucketRequest.builder().bucket(getFileSystem().getBucket()).build()));
|
||||
expectInterceptorException(client);
|
||||
}
|
||||
|
||||
@Test
|
||||
@ -156,21 +156,40 @@ public void testWithRegionConfig() throws Throwable {
|
||||
describe("Create a client with a configured region");
|
||||
Configuration conf = getConfiguration();
|
||||
|
||||
S3Client client = createS3Client(conf, null, EU_WEST_2, EU_WEST_2);
|
||||
S3Client client = createS3Client(conf, null, EU_WEST_2, EU_WEST_2, false);
|
||||
|
||||
intercept(AwsServiceException.class, "Exception thrown by interceptor", () -> client.headBucket(
|
||||
HeadBucketRequest.builder().bucket(getFileSystem().getBucket()).build()));
|
||||
expectInterceptorException(client);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testWithFips() throws Throwable {
|
||||
describe("Create a client with fips enabled");
|
||||
|
||||
S3Client client = createS3Client(getConfiguration(),
|
||||
null, EU_WEST_2, EU_WEST_2, true);
|
||||
expectInterceptorException(client);
|
||||
}
|
||||
|
||||
/**
|
||||
* Attempting to create a client with fips enabled and an endpoint specified
|
||||
* fails during client construction.
|
||||
*/
|
||||
@Test
|
||||
public void testWithFipsAndEndpoint() throws Throwable {
|
||||
describe("Create a client with fips and an endpoint");
|
||||
|
||||
intercept(IllegalArgumentException.class, ERROR_ENDPOINT_WITH_FIPS, () ->
|
||||
createS3Client(getConfiguration(), CENTRAL_ENDPOINT, null, US_EAST_1, true));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testEUWest2Endpoint() throws Throwable {
|
||||
describe("Create a client with the eu west 2 endpoint");
|
||||
Configuration conf = getConfiguration();
|
||||
|
||||
S3Client client = createS3Client(conf, EU_WEST_2_ENDPOINT, null, EU_WEST_2);
|
||||
S3Client client = createS3Client(conf, EU_WEST_2_ENDPOINT, null, EU_WEST_2, false);
|
||||
|
||||
intercept(AwsServiceException.class, "Exception thrown by interceptor", () -> client.headBucket(
|
||||
HeadBucketRequest.builder().bucket(getFileSystem().getBucket()).build()));
|
||||
expectInterceptorException(client);
|
||||
}
|
||||
|
||||
@Test
|
||||
@ -178,10 +197,9 @@ public void testWithRegionAndEndpointConfig() throws Throwable {
|
||||
describe("Test that when both region and endpoint are configured, region takes precedence");
|
||||
Configuration conf = getConfiguration();
|
||||
|
||||
S3Client client = createS3Client(conf, EU_WEST_2_ENDPOINT, US_WEST_2, US_WEST_2);
|
||||
S3Client client = createS3Client(conf, EU_WEST_2_ENDPOINT, US_WEST_2, US_WEST_2, false);
|
||||
|
||||
intercept(AwsServiceException.class, "Exception thrown by interceptor", () -> client.headBucket(
|
||||
HeadBucketRequest.builder().bucket(getFileSystem().getBucket()).build()));
|
||||
expectInterceptorException(client);
|
||||
}
|
||||
|
||||
@Test
|
||||
@ -189,21 +207,43 @@ public void testWithChinaEndpoint() throws Throwable {
|
||||
describe("Test with a china endpoint");
|
||||
Configuration conf = getConfiguration();
|
||||
|
||||
S3Client client = createS3Client(conf, CN_ENDPOINT, null, CN_NORTHWEST_1);
|
||||
S3Client client = createS3Client(conf, CN_ENDPOINT, null, CN_NORTHWEST_1, false);
|
||||
|
||||
intercept(AwsServiceException.class, "Exception thrown by interceptor", () -> client.headBucket(
|
||||
HeadBucketRequest.builder().bucket(getFileSystem().getBucket()).build()));
|
||||
expectInterceptorException(client);
|
||||
}
|
||||
|
||||
/**
|
||||
* Expect an exception to be thrown by the interceptor with the message
|
||||
* {@link #EXCEPTION_THROWN_BY_INTERCEPTOR}.
|
||||
* @param client client to issue a head request against.
|
||||
* @return the expected exception.
|
||||
* @throws Exception any other exception.
|
||||
*/
|
||||
private AwsServiceException expectInterceptorException(final S3Client client)
|
||||
throws Exception {
|
||||
|
||||
return intercept(AwsServiceException.class, EXCEPTION_THROWN_BY_INTERCEPTOR,
|
||||
() -> head(client));
|
||||
}
|
||||
|
||||
/**
|
||||
* Issue a head request against the bucket.
|
||||
* @param client client to use
|
||||
* @return the response.
|
||||
*/
|
||||
private HeadBucketResponse head(final S3Client client) {
|
||||
return client.headBucket(
|
||||
HeadBucketRequest.builder().bucket(getFileSystem().getBucket()).build());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testWithGovCloudEndpoint() throws Throwable {
|
||||
describe("Test with a gov cloud endpoint");
|
||||
describe("Test with a gov cloud endpoint; enable fips");
|
||||
Configuration conf = getConfiguration();
|
||||
|
||||
S3Client client = createS3Client(conf, GOV_ENDPOINT, null, US_GOV_EAST_1);
|
||||
S3Client client = createS3Client(conf, GOV_ENDPOINT, null, US_GOV_EAST_1, false);
|
||||
|
||||
intercept(AwsServiceException.class, "Exception thrown by interceptor", () -> client.headBucket(
|
||||
HeadBucketRequest.builder().bucket(getFileSystem().getBucket()).build()));
|
||||
expectInterceptorException(client);
|
||||
}
|
||||
|
||||
@Test
|
||||
@ -212,19 +252,20 @@ public void testWithVPCE() throws Throwable {
|
||||
describe("Test with vpc endpoint");
|
||||
Configuration conf = getConfiguration();
|
||||
|
||||
S3Client client = createS3Client(conf, VPC_ENDPOINT, null, US_WEST_2);
|
||||
S3Client client = createS3Client(conf, VPC_ENDPOINT, null, US_WEST_2, false);
|
||||
|
||||
intercept(AwsServiceException.class, "Exception thrown by interceptor", () -> client.headBucket(
|
||||
HeadBucketRequest.builder().bucket(getFileSystem().getBucket()).build()));
|
||||
expectInterceptorException(client);
|
||||
}
|
||||
|
||||
class RegionInterceptor implements ExecutionInterceptor {
|
||||
private String endpoint;
|
||||
private String region;
|
||||
private final class RegionInterceptor implements ExecutionInterceptor {
|
||||
private final String endpoint;
|
||||
private final String region;
|
||||
private final boolean isFips;
|
||||
|
||||
RegionInterceptor(String endpoint, String region) {
|
||||
RegionInterceptor(String endpoint, String region, final boolean isFips) {
|
||||
this.endpoint = endpoint;
|
||||
this.region = region;
|
||||
this.isFips = isFips;
|
||||
}
|
||||
|
||||
@Override
|
||||
@ -249,8 +290,15 @@ public void beforeExecution(Context.BeforeExecution context,
|
||||
executionAttributes.getAttribute(AwsExecutionAttribute.AWS_REGION).toString())
|
||||
.describedAs("Incorrect region set").isEqualTo(region);
|
||||
|
||||
// verify the fips state matches expectation.
|
||||
Assertions.assertThat(executionAttributes.getAttribute(
|
||||
AwsExecutionAttribute.FIPS_ENDPOINT_ENABLED))
|
||||
.describedAs("Incorrect FIPS flag set in execution attributes")
|
||||
.isNotNull()
|
||||
.isEqualTo(isFips);
|
||||
|
||||
// We don't actually want to make a request, so exit early.
|
||||
throw AwsServiceException.builder().message("Exception thrown by interceptor").build();
|
||||
throw AwsServiceException.builder().message(EXCEPTION_THROWN_BY_INTERCEPTOR).build();
|
||||
}
|
||||
}
|
||||
|
||||
@ -261,17 +309,17 @@ public void beforeExecution(Context.BeforeExecution context,
|
||||
* @param conf configuration to use.
|
||||
* @param endpoint endpoint.
|
||||
* @param expectedRegion the region that should be set in the client.
|
||||
* @param isFips is this a FIPS endpoint?
|
||||
* @return the client.
|
||||
* @throws URISyntaxException parse problems.
|
||||
* @throws IOException IO problems
|
||||
*/
|
||||
@SuppressWarnings("deprecation")
|
||||
private S3Client createS3Client(Configuration conf,
|
||||
String endpoint, String configuredRegion, String expectedRegion)
|
||||
String endpoint, String configuredRegion, String expectedRegion, boolean isFips)
|
||||
throws IOException {
|
||||
|
||||
List<ExecutionInterceptor> interceptors = new ArrayList<>();
|
||||
interceptors.add(new RegionInterceptor(endpoint, expectedRegion));
|
||||
interceptors.add(new RegionInterceptor(endpoint, expectedRegion, isFips));
|
||||
|
||||
DefaultS3ClientFactory factory
|
||||
= new DefaultS3ClientFactory();
|
||||
@ -283,8 +331,8 @@ private S3Client createS3Client(Configuration conf,
|
||||
.withMetrics(new EmptyS3AStatisticsContext()
|
||||
.newStatisticsFromAwsSdk())
|
||||
.withExecutionInterceptors(interceptors)
|
||||
.withRegion(configuredRegion);
|
||||
|
||||
.withRegion(configuredRegion)
|
||||
.withFipsEnabled(isFips);
|
||||
|
||||
S3Client client = factory.createS3Client(
|
||||
getFileSystem().getUri(),
|
||||
|
@ -33,6 +33,8 @@
|
||||
import org.apache.hadoop.test.LambdaTestUtils;
|
||||
import org.apache.hadoop.util.StringUtils;
|
||||
|
||||
import static org.apache.hadoop.fs.contract.ContractTestUtils.skip;
|
||||
import static org.apache.hadoop.fs.s3a.Constants.FIPS_ENDPOINT;
|
||||
import static org.apache.hadoop.fs.s3a.Constants.S3_ENCRYPTION_ALGORITHM;
|
||||
import static org.apache.hadoop.fs.s3a.MultipartTestUtils.assertNoUploadsAt;
|
||||
import static org.apache.hadoop.fs.s3a.MultipartTestUtils.clearAnyUploads;
|
||||
@ -97,6 +99,20 @@ public void testStoreInfo() throws Throwable {
|
||||
LOG.info("Exec output=\n{}", output);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testStoreInfoFips() throws Throwable {
|
||||
final S3AFileSystem fs = getFileSystem();
|
||||
if (!fs.hasPathCapability(new Path("/"), FIPS_ENDPOINT)) {
|
||||
skip("FIPS not enabled");
|
||||
}
|
||||
S3GuardTool.BucketInfo cmd =
|
||||
toClose(new S3GuardTool.BucketInfo(fs.getConf()));
|
||||
String output = exec(cmd, cmd.getName(),
|
||||
"-" + BucketInfo.FIPS_FLAG,
|
||||
fs.getUri().toString());
|
||||
LOG.info("Exec output=\n{}", output);
|
||||
}
|
||||
|
||||
private final static String UPLOAD_NAME = "test-upload";
|
||||
|
||||
@Test
|
||||
|
@ -56,6 +56,12 @@
|
||||
<description>Do not add the referrer header to landsat operations</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>fs.s3a.bucket.landsat-pds.endpoint.fips</name>
|
||||
<value>true</value>
|
||||
<description>Use the fips endpoint</description>
|
||||
</property>
|
||||
|
||||
<!-- Per-bucket configurations: usgs-landsat -->
|
||||
<property>
|
||||
<name>fs.s3a.bucket.usgs-landsat.endpoint.region</name>
|
||||
|
Loading…
Reference in New Issue
Block a user