diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java index d14a82e5c3..c4b8f6e3c4 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java @@ -160,14 +160,33 @@ private Constants() { DEFAULT_SSL_CHANNEL_MODE = DelegatingSSLSocketFactory.SSLChannelMode.Default_JSSE; - //use a custom endpoint? + /** + * Endpoint. For v4 signing and/or better performance, + * this should be the specific endpoint of the region + * in which the bucket is hosted. + */ public static final String ENDPOINT = "fs.s3a.endpoint"; /** - * Default value of s3 endpoint. If not set explicitly using - * {@code AmazonS3#setEndpoint()}, this is used. + * Default value of s3 endpoint: {@value}. + * It tells the AWS client to work it out by asking the central + * endpoint where the bucket lives; caching that + * value in the client for the life of the process. + *
+ * Note: previously this constant was defined as + * {@link #CENTRAL_ENDPOINT}, however the actual + * S3A client code used "" as the default when + * {@link #ENDPOINT} was unset. + * As core-default.xml also set the endpoint to "", + * the empty string has long been the real + * default value. */ - public static final String DEFAULT_ENDPOINT = "s3.amazonaws.com"; + public static final String DEFAULT_ENDPOINT = ""; + + /** + * The central endpoint :{@value}. + */ + public static final String CENTRAL_ENDPOINT = "s3.amazonaws.com"; //Enable path style access? Overrides default virtual hosting public static final String PATH_STYLE_ACCESS = "fs.s3a.path.style.access"; diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java index 96d16e8b1b..ae50bd1459 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java @@ -22,9 +22,8 @@ import java.net.URI; import com.amazonaws.ClientConfiguration; -import com.amazonaws.auth.AWSCredentialsProvider; import com.amazonaws.client.builder.AwsClientBuilder; -import com.amazonaws.metrics.RequestMetricCollector; +import com.amazonaws.handlers.RequestHandler2; import com.amazonaws.services.s3.AmazonS3; import com.amazonaws.services.s3.AmazonS3Client; import com.amazonaws.services.s3.AmazonS3ClientBuilder; @@ -41,18 +40,15 @@ import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configured; -import org.apache.hadoop.fs.s3a.statistics.StatisticsFromAwsSdk; import org.apache.hadoop.fs.s3a.statistics.impl.AwsStatisticsCollector; import static org.apache.hadoop.fs.s3a.Constants.EXPERIMENTAL_AWS_INTERNAL_THROTTLING; -import static org.apache.hadoop.fs.s3a.Constants.ENDPOINT; import static org.apache.hadoop.fs.s3a.Constants.EXPERIMENTAL_AWS_INTERNAL_THROTTLING_DEFAULT; -import static org.apache.hadoop.fs.s3a.Constants.PATH_STYLE_ACCESS; /** * The default {@link S3ClientFactory} implementation. * This calls the AWS SDK to configure and create an - * {@link AmazonS3Client} that communicates with the S3 service. + * {@code AmazonS3Client} that communicates with the S3 service. */ @InterfaceAudience.Private @InterfaceStability.Unstable @@ -60,8 +56,6 @@ public class DefaultS3ClientFactory extends Configured implements S3ClientFactory { private static final String S3_SERVICE_NAME = "s3"; - private static final String S3_SIGNER = "S3SignerType"; - private static final String S3_V4_SIGNER = "AWSS3V4SignerType"; /** * Subclasses refer to this. @@ -70,22 +64,21 @@ public class DefaultS3ClientFactory extends Configured LoggerFactory.getLogger(DefaultS3ClientFactory.class); /** - * Create the client. - *
- * If the AWS stats are not null then a {@link AwsStatisticsCollector}. - * is created to bind to the two. - * Important: until this binding works properly across regions, - * this should be null. + * Create the client by preparing the AwsConf configuration + * and then invoking {@code buildAmazonS3Client()}. */ @Override - public AmazonS3 createS3Client(URI name, - final String bucket, - final AWSCredentialsProvider credentials, - final String userAgentSuffix, - final StatisticsFromAwsSdk statisticsFromAwsSdk) throws IOException { + public AmazonS3 createS3Client( + final URI uri, + final S3ClientCreationParameters parameters) throws IOException { Configuration conf = getConf(); final ClientConfiguration awsConf = S3AUtils - .createAwsConf(conf, bucket, Constants.AWS_SERVICE_IDENTIFIER_S3); + .createAwsConf(conf, + uri.getHost(), + Constants.AWS_SERVICE_IDENTIFIER_S3); + // add any headers + parameters.getHeaders().forEach((h, v) -> + awsConf.addHeader(h, v)); // When EXPERIMENTAL_AWS_INTERNAL_THROTTLING is false // throttling is explicitly disabled on the S3 client so that @@ -96,111 +89,62 @@ public AmazonS3 createS3Client(URI name, conf.getBoolean(EXPERIMENTAL_AWS_INTERNAL_THROTTLING, EXPERIMENTAL_AWS_INTERNAL_THROTTLING_DEFAULT)); - if (!StringUtils.isEmpty(userAgentSuffix)) { - awsConf.setUserAgentSuffix(userAgentSuffix); + if (!StringUtils.isEmpty(parameters.getUserAgentSuffix())) { + awsConf.setUserAgentSuffix(parameters.getUserAgentSuffix()); } - // optional metrics - RequestMetricCollector metrics = statisticsFromAwsSdk != null - ? new AwsStatisticsCollector(statisticsFromAwsSdk) - : null; - return newAmazonS3Client( - credentials, + return buildAmazonS3Client( awsConf, - metrics, - conf.getTrimmed(ENDPOINT, ""), - conf.getBoolean(PATH_STYLE_ACCESS, false)); + parameters); } /** - * Create an {@link AmazonS3} client. - * Override this to provide an extended version of the client - * @param credentials credentials to use - * @param awsConf AWS configuration - * @param metrics metrics collector or null - * @param endpoint endpoint string; may be "" - * @param pathStyleAccess enable path style access? - * @return new AmazonS3 client - */ - protected AmazonS3 newAmazonS3Client( - final AWSCredentialsProvider credentials, - final ClientConfiguration awsConf, - final RequestMetricCollector metrics, - final String endpoint, - final boolean pathStyleAccess) { - if (metrics != null) { - LOG.debug("Building S3 client using the SDK builder API"); - return buildAmazonS3Client(credentials, awsConf, metrics, endpoint, - pathStyleAccess); - } else { - LOG.debug("Building S3 client using the SDK builder API"); - return classicAmazonS3Client(credentials, awsConf, endpoint, - pathStyleAccess); - } - } - - /** - * Use the (newer) Builder SDK to create a an AWS S3 client. + * Use the Builder API to create an AWS S3 client. *
- * This has a more complex endpoint configuration in a - * way which does not yet work in this code in a way - * which doesn't trigger regressions. So it is only used - * when SDK metrics are supplied. - * @param credentials credentials to use + * This has a more complex endpoint configuration mechanism + * which initially caused problems; the + * {@code withForceGlobalBucketAccessEnabled(true)} + * command is critical here. * @param awsConf AWS configuration - * @param metrics metrics collector or null - * @param endpoint endpoint string; may be "" - * @param pathStyleAccess enable path style access? + * @param parameters parameters * @return new AmazonS3 client */ - private AmazonS3 buildAmazonS3Client( - final AWSCredentialsProvider credentials, + protected AmazonS3 buildAmazonS3Client( final ClientConfiguration awsConf, - final RequestMetricCollector metrics, - final String endpoint, - final boolean pathStyleAccess) { + final S3ClientCreationParameters parameters) { AmazonS3ClientBuilder b = AmazonS3Client.builder(); - b.withCredentials(credentials); + b.withCredentials(parameters.getCredentialSet()); b.withClientConfiguration(awsConf); - b.withPathStyleAccessEnabled(pathStyleAccess); - if (metrics != null) { - b.withMetricsCollector(metrics); + b.withPathStyleAccessEnabled(parameters.isPathStyleAccess()); + + if (parameters.getMetrics() != null) { + b.withMetricsCollector( + new AwsStatisticsCollector(parameters.getMetrics())); + } + if (parameters.getRequestHandlers() != null) { + b.withRequestHandlers( + parameters.getRequestHandlers().toArray(new RequestHandler2[0])); + } + if (parameters.getMonitoringListener() != null) { + b.withMonitoringListener(parameters.getMonitoringListener()); } // endpoint set up is a PITA - // client.setEndpoint("") is no longer available AwsClientBuilder.EndpointConfiguration epr - = createEndpointConfiguration(endpoint, awsConf); + = createEndpointConfiguration(parameters.getEndpoint(), + awsConf); if (epr != null) { // an endpoint binding was constructed: use it. b.withEndpointConfiguration(epr); + } else { + // no idea what the endpoint is, so tell the SDK + // to work it out at the cost of an extra HEAD request + b.withForceGlobalBucketAccessEnabled(true); } final AmazonS3 client = b.build(); return client; } - /** - * Wrapper around constructor for {@link AmazonS3} client. - * Override this to provide an extended version of the client. - *
- * This uses a deprecated constructor -it is currently - * the only one which works for us. - * @param credentials credentials to use - * @param awsConf AWS configuration - * @param endpoint endpoint string; may be "" - * @param pathStyleAccess enable path style access? - * @return new AmazonS3 client - */ - @SuppressWarnings("deprecation") - private AmazonS3 classicAmazonS3Client( - AWSCredentialsProvider credentials, - ClientConfiguration awsConf, - final String endpoint, - final boolean pathStyleAccess) { - final AmazonS3 client = new AmazonS3Client(credentials, awsConf); - return configureAmazonS3Client(client, endpoint, pathStyleAccess); - } - /** * Configure classic S3 client. *
@@ -226,31 +170,6 @@ protected static AmazonS3 configureAmazonS3Client(AmazonS3 s3, throw new IllegalArgumentException(msg, e); } } - return applyS3ClientOptions(s3, pathStyleAccess); - } - - /** - * Perform any tuning of the {@code S3ClientOptions} settings based on - * the Hadoop configuration. - * This is different from the general AWS configuration creation as - * it is unique to S3 connections. - *
- * The {@link Constants#PATH_STYLE_ACCESS} option enables path-style access - * to S3 buckets if configured. By default, the - * behavior is to use virtual hosted-style access with URIs of the form - * {@code http://bucketname.s3.amazonaws.com} - *
- * Enabling path-style access and a
- * region-specific endpoint switches the behavior to use URIs of the form
- * {@code http://s3-eu-west-1.amazonaws.com/bucketname}.
- * It is common to use this when connecting to private S3 servers, as it
- * avoids the need to play with DNS entries.
- * @param s3 S3 client
- * @param pathStyleAccess enable path style access?
- * @return the S3 client
- */
- protected static AmazonS3 applyS3ClientOptions(AmazonS3 s3,
- final boolean pathStyleAccess) {
if (pathStyleAccess) {
LOG.debug("Enabling path style access!");
s3.setS3ClientOptions(S3ClientOptions.builder()
diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/InconsistentS3ClientFactory.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/InconsistentS3ClientFactory.java
index ddc492235d..c11581f1d5 100644
--- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/InconsistentS3ClientFactory.java
+++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/InconsistentS3ClientFactory.java
@@ -19,8 +19,6 @@
package org.apache.hadoop.fs.s3a;
import com.amazonaws.ClientConfiguration;
-import com.amazonaws.auth.AWSCredentialsProvider;
-import com.amazonaws.metrics.RequestMetricCollector;
import com.amazonaws.services.s3.AmazonS3;
import org.apache.hadoop.classification.InterfaceAudience;
@@ -31,31 +29,25 @@
* This client is for testing only; it is in the production
* {@code hadoop-aws} module to enable integration tests to use this
* just by editing the Hadoop configuration used to bring up the client.
+ *
+ * The factory uses the older constructor-based instantiation/configuration
+ * of the client, so does not wire up metrics, handlers etc.
*/
@InterfaceAudience.Private
@InterfaceStability.Unstable
public class InconsistentS3ClientFactory extends DefaultS3ClientFactory {
- /**
- * Create the inconsistent client.
- * Logs a warning that this is being done.
- * @param credentials credentials to use
- * @param awsConf AWS configuration
- * @param metrics metric collector
- * @param endpoint AWS endpoint
- * @param pathStyleAccess should path style access be supported?
- * @return an inconsistent client.
- */
@Override
- protected AmazonS3 newAmazonS3Client(AWSCredentialsProvider credentials,
- ClientConfiguration awsConf,
- final RequestMetricCollector metrics,
- final String endpoint,
- final boolean pathStyleAccess) {
+ protected AmazonS3 buildAmazonS3Client(
+ final ClientConfiguration awsConf,
+ final S3ClientCreationParameters parameters) {
LOG.warn("** FAILURE INJECTION ENABLED. Do not run in production! **");
InconsistentAmazonS3Client s3
- = new InconsistentAmazonS3Client(credentials, awsConf, getConf());
- configureAmazonS3Client(s3, endpoint, pathStyleAccess);
+ = new InconsistentAmazonS3Client(
+ parameters.getCredentialSet(), awsConf, getConf());
+ configureAmazonS3Client(s3,
+ parameters.getEndpoint(),
+ parameters.isPathStyleAccess());
return s3;
}
}
diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java
index f625346957..8db5d51def 100644
--- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java
+++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java
@@ -69,7 +69,6 @@
import com.amazonaws.services.s3.model.ObjectMetadata;
import com.amazonaws.services.s3.model.PutObjectRequest;
import com.amazonaws.services.s3.model.PutObjectResult;
-
import com.amazonaws.services.s3.model.SSEAwsKeyManagementParams;
import com.amazonaws.services.s3.model.SSECustomerKey;
import com.amazonaws.services.s3.model.UploadPartRequest;
@@ -83,7 +82,6 @@
import com.amazonaws.event.ProgressListener;
import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting;
import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions;
-import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.ListeningExecutorService;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -166,7 +164,6 @@
import org.apache.hadoop.fs.s3a.statistics.BlockOutputStreamStatistics;
import org.apache.hadoop.fs.s3a.statistics.CommitterStatistics;
import org.apache.hadoop.fs.s3a.statistics.S3AStatisticsContext;
-import org.apache.hadoop.fs.s3a.statistics.StatisticsFromAwsSdk;
import org.apache.hadoop.fs.s3a.statistics.impl.BondedS3AStatisticsContext;
import org.apache.hadoop.fs.s3native.S3xLoginHelper;
import org.apache.hadoop.io.retry.RetryPolicies;
@@ -198,7 +195,6 @@
import static org.apache.hadoop.fs.s3a.impl.CallableSupplier.waitForCompletionIgnoringExceptions;
import static org.apache.hadoop.fs.s3a.impl.ErrorTranslation.isObjectNotFound;
import static org.apache.hadoop.fs.s3a.impl.ErrorTranslation.isUnknownBucket;
-import static org.apache.hadoop.fs.s3a.impl.InternalConstants.AWS_SDK_METRICS_ENABLED;
import static org.apache.hadoop.fs.s3a.impl.InternalConstants.SC_404;
import static org.apache.hadoop.fs.s3a.impl.NetworkBinding.fixBucketRegion;
import static org.apache.hadoop.fs.s3a.impl.NetworkBinding.logDnsLookup;
@@ -376,6 +372,11 @@ public void initialize(URI name, Configuration originalConf)
LOG.debug("Initializing S3AFileSystem for {}", bucket);
// clone the configuration into one with propagated bucket options
Configuration conf = propagateBucketOptions(originalConf, bucket);
+ // fix up the classloader of the configuration to be whatever
+ // classloader loaded this filesystem.
+ // See: HADOOP-17372
+ conf.setClassLoader(this.getClass().getClassLoader());
+
// patch the Hadoop security providers
patchSecurityCredentialProviders(conf);
// look for delegation token support early.
@@ -740,16 +741,17 @@ private void bindAWSClient(URI name, boolean dtEnabled) throws IOException {
S3_CLIENT_FACTORY_IMPL, DEFAULT_S3_CLIENT_FACTORY_IMPL,
S3ClientFactory.class);
- StatisticsFromAwsSdk awsStats = null;
- // TODO: HADOOP-16830 when the S3 client building code works
- // with different regions,
- // then non-null stats can be passed in here.
- if (AWS_SDK_METRICS_ENABLED) {
- awsStats = statisticsContext.newStatisticsFromAwsSdk();
- }
+ S3ClientFactory.S3ClientCreationParameters parameters = null;
+ parameters = new S3ClientFactory.S3ClientCreationParameters()
+ .withCredentialSet(credentials)
+ .withEndpoint(conf.getTrimmed(ENDPOINT, DEFAULT_ENDPOINT))
+ .withMetrics(statisticsContext.newStatisticsFromAwsSdk())
+ .withPathStyleAccess(conf.getBoolean(PATH_STYLE_ACCESS, false))
+ .withUserAgentSuffix(uaSuffix);
s3 = ReflectionUtils.newInstance(s3ClientFactoryClass, conf)
- .createS3Client(getUri(), bucket, credentials, uaSuffix, awsStats);
+ .createS3Client(getUri(),
+ parameters);
}
/**
diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ClientFactory.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ClientFactory.java
index e04d3b5cbd..dbb39fb662 100644
--- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ClientFactory.java
+++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ClientFactory.java
@@ -18,38 +18,246 @@
package org.apache.hadoop.fs.s3a;
+import javax.annotation.Nullable;
import java.io.IOException;
import java.net.URI;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
import com.amazonaws.auth.AWSCredentialsProvider;
+import com.amazonaws.handlers.RequestHandler2;
+import com.amazonaws.monitoring.MonitoringListener;
import com.amazonaws.services.s3.AmazonS3;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.fs.s3a.statistics.StatisticsFromAwsSdk;
+import static org.apache.hadoop.fs.s3a.Constants.DEFAULT_ENDPOINT;
+
/**
* Factory for creation of {@link AmazonS3} client instances.
+ * Important: HBase's HBoss module implements this interface in its
+ * tests.
+ * Take care when updating this interface to ensure that a client
+ * implementing only the deprecated method will work.
+ * See https://github.com/apache/hbase-filesystem
+ *
*/
-@InterfaceAudience.Private
-@InterfaceStability.Unstable
+@InterfaceAudience.LimitedPrivate("HBoss")
+@InterfaceStability.Evolving
public interface S3ClientFactory {
/**
* Creates a new {@link AmazonS3} client.
*
- * @param name raw input S3A file system URI
- * @param bucket Optional bucket to use to look up per-bucket proxy secrets
- * @param credentialSet credentials to use
- * @param userAgentSuffix optional suffix for the UA field.
- * @param statisticsFromAwsSdk binding for AWS stats - may be null
+ * @param uri S3A file system URI
+ * @param parameters parameter object
* @return S3 client
* @throws IOException IO problem
*/
- AmazonS3 createS3Client(URI name,
- String bucket,
- AWSCredentialsProvider credentialSet,
- String userAgentSuffix,
- StatisticsFromAwsSdk statisticsFromAwsSdk) throws IOException;
+ AmazonS3 createS3Client(URI uri,
+ S3ClientCreationParameters parameters) throws IOException;
+ /**
+ * Settings for the S3 Client.
+ * Implemented as a class to pass in so that adding
+ * new parameters does not break the binding of
+ * external implementations of the factory.
+ */
+ final class S3ClientCreationParameters {
+
+ /**
+ * Credentials.
+ */
+ private AWSCredentialsProvider credentialSet;
+
+ /**
+ * Endpoint.
+ */
+ private String endpoint = DEFAULT_ENDPOINT;
+
+ /**
+ * Custom Headers.
+ */
+ private final Map