From d8fa1cfa6722cbf7a4ec3d6b9c44b034da9aa351 Mon Sep 17 00:00:00 2001 From: Chris Nauroth Date: Mon, 24 Oct 2016 21:22:34 -0700 Subject: [PATCH] HADOOP-13727. S3A: Reduce high number of connections to EC2 Instance Metadata Service caused by InstanceProfileCredentialsProvider. Contributed by Chris Nauroth. --- .../src/main/resources/core-default.xml | 122 ++++---- .../fs/s3a/AWSCredentialProviderList.java | 11 + .../org/apache/hadoop/fs/s3a/S3AUtils.java | 134 +++++++-- ...redInstanceProfileCredentialsProvider.java | 67 +++++ .../site/markdown/tools/hadoop-aws/index.md | 52 +++- .../s3a/ITestS3AAWSCredentialsProvider.java | 113 +------- .../apache/hadoop/fs/s3a/S3ATestUtils.java | 42 ++- .../fs/s3a/TestS3AAWSCredentialsProvider.java | 273 ++++++++++++++++++ 8 files changed, 621 insertions(+), 193 deletions(-) create mode 100644 hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/SharedInstanceProfileCredentialsProvider.java create mode 100644 hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AAWSCredentialsProvider.java diff --git a/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml b/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml index 59d939bd83..dbbb3e1cc6 100644 --- a/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml +++ b/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml @@ -41,10 +41,10 @@ hadoop.http.filter.initializers org.apache.hadoop.http.lib.StaticUserWebFilter - A comma separated list of class names. Each class in the list - must extend org.apache.hadoop.http.FilterInitializer. The corresponding - Filter will be initialized. Then, the Filter will be applied to all user - facing jsp and servlet web pages. The ordering of the list defines the + A comma separated list of class names. Each class in the list + must extend org.apache.hadoop.http.FilterInitializer. The corresponding + Filter will be initialized. Then, the Filter will be applied to all user + facing jsp and servlet web pages. The ordering of the list defines the ordering of the filters. @@ -76,14 +76,14 @@ hadoop.security.group.mapping org.apache.hadoop.security.JniBasedUnixGroupsMappingWithFallback - Class for user to group mapping (get groups for a given user) for ACL. + Class for user to group mapping (get groups for a given user) for ACL. The default implementation, - org.apache.hadoop.security.JniBasedUnixGroupsMappingWithFallback, - will determine if the Java Native Interface (JNI) is available. If JNI is - available the implementation will use the API within hadoop to resolve a - list of groups for a user. If JNI is not available then the shell - implementation, ShellBasedUnixGroupsMapping, is used. This implementation - shells out to the Linux/Unix environment with the + org.apache.hadoop.security.JniBasedUnixGroupsMappingWithFallback, + will determine if the Java Native Interface (JNI) is available. If JNI is + available the implementation will use the API within hadoop to resolve a + list of groups for a user. If JNI is not available then the shell + implementation, ShellBasedUnixGroupsMapping, is used. This implementation + shells out to the Linux/Unix environment with the bash -c groups command to resolve a list of groups for a user. @@ -481,10 +481,10 @@ hadoop.rpc.protection authentication - A comma-separated list of protection values for secured sasl + A comma-separated list of protection values for secured sasl connections. Possible values are authentication, integrity and privacy. - authentication means authentication only and no integrity or privacy; - integrity implies authentication and integrity are enabled; and privacy + authentication means authentication only and no integrity or privacy; + integrity implies authentication and integrity are enabled; and privacy implies all of authentication, integrity and privacy are enabled. hadoop.security.saslproperties.resolver.class can be used to override the hadoop.rpc.protection for a connection at the server side. @@ -494,10 +494,10 @@ hadoop.security.saslproperties.resolver.class - SaslPropertiesResolver used to resolve the QOP used for a - connection. If not specified, the full set of values specified in - hadoop.rpc.protection is used while determining the QOP used for the - connection. If a class is specified, then the QOP values returned by + SaslPropertiesResolver used to resolve the QOP used for a + connection. If not specified, the full set of values specified in + hadoop.rpc.protection is used while determining the QOP used for the + connection. If a class is specified, then the QOP values returned by the class will be used while determining the QOP used for the connection. @@ -566,7 +566,7 @@ page size (4096 on Intel x86), and it determines how much data is buffered during read and write operations. - + io.bytes.per.checksum 512 @@ -599,7 +599,7 @@ either by by name or the full pathname. In the former case, the library is located by the dynamic linker, usually searching the directories specified in the environment variable LD_LIBRARY_PATH. - + The value of "system-native" indicates that the default system library should be used. To indicate that the algorithm should operate entirely in Java, specify "java-builtin". @@ -709,8 +709,8 @@ Number of minutes between trash checkpoints. Should be smaller or equal to fs.trash.interval. If zero, the value is set to the value of fs.trash.interval. - Every time the checkpointer runs it creates a new checkpoint - out of current and removes checkpoints created more than + Every time the checkpointer runs it creates a new checkpoint + out of current and removes checkpoints created more than fs.trash.interval minutes ago. @@ -735,7 +735,7 @@ fs.AbstractFileSystem.har.impl org.apache.hadoop.fs.HarFs The AbstractFileSystem for har: uris. - + fs.AbstractFileSystem.hdfs.impl @@ -806,7 +806,7 @@ fs.s3n.maxRetries 4 - The maximum number of retries for reading or writing files to S3, + The maximum number of retries for reading or writing files to S3, before we signal failure to the application. @@ -895,15 +895,37 @@ com.amazonaws.auth.AWSCredentialsProvider. These are loaded and queried in sequence for a valid set of credentials. - Each listed class must provide either an accessible constructor accepting - java.net.URI and org.apache.hadoop.conf.Configuration, or an accessible - default constructor. + Each listed class must implement one of the following means of + construction, which are attempted in order: + 1. a public constructor accepting java.net.URI and + org.apache.hadoop.conf.Configuration, + 2. a public static method named getInstance that accepts no + arguments and returns an instance of + com.amazonaws.auth.AWSCredentialsProvider, or + 3. a public default constructor. Specifying org.apache.hadoop.fs.s3a.AnonymousAWSCredentialsProvider allows anonymous access to a publicly accessible S3 bucket without any credentials. Please note that allowing anonymous access to an S3 bucket compromises security and therefore is unsuitable for most use cases. It can be useful for accessing public data sets without requiring AWS credentials. + + If unspecified, then the default list of credential provider classes, + queried in sequence, is: + 1. org.apache.hadoop.fs.s3a.BasicAWSCredentialsProvider: supports static + configuration of AWS access key ID and secret access key. See also + fs.s3a.access.key and fs.s3a.secret.key. + 2. com.amazonaws.auth.EnvironmentVariableCredentialsProvider: supports + configuration of AWS access key ID and secret access key in + environment variables named AWS_ACCESS_KEY_ID and + AWS_SECRET_ACCESS_KEY, as documented in the AWS SDK. + 3. org.apache.hadoop.fs.s3a.SharedInstanceProfileCredentialsProvider: + a shared instance of + com.amazonaws.auth.InstanceProfileCredentialsProvider from the AWS + SDK, which supports use of instance profile credentials if running + in an EC2 VM. Using this shared instance potentially reduces load + on the EC2 instance metadata service for multi-threaded + applications. @@ -1007,7 +1029,7 @@ fs.s3a.paging.maximum 5000 - How many keys to request from S3 when doing + How many keys to request from S3 when doing directory listings at a time. @@ -1106,7 +1128,7 @@ fs.s3a.buffer.dir ${hadoop.tmp.dir}/s3a - Comma separated list of directories that will be used to buffer file + Comma separated list of directories that will be used to buffer file uploads to. @@ -1197,7 +1219,7 @@ io.seqfile.compress.blocksize 1000000 - The minimum block size for compression in block compressed + The minimum block size for compression in block compressed SequenceFiles. @@ -1213,7 +1235,7 @@ io.seqfile.sorter.recordlimit 1000000 - The limit on number of records to be kept in memory in a spill + The limit on number of records to be kept in memory in a spill in SequenceFiles.Sorter @@ -1291,7 +1313,7 @@ ipc.client.connect.timeout 20000 - Indicates the number of milliseconds a client will wait for the + Indicates the number of milliseconds a client will wait for the socket to establish a server connection. @@ -1388,10 +1410,10 @@ hadoop.security.impersonation.provider.class - A class which implements ImpersonationProvider interface, used to - authorize whether one user can impersonate a specific user. - If not specified, the DefaultImpersonationProvider will be used. - If a class is specified, then that class will be used to determine + A class which implements ImpersonationProvider interface, used to + authorize whether one user can impersonate a specific user. + If not specified, the DefaultImpersonationProvider will be used. + If a class is specified, then that class will be used to determine the impersonation capability. @@ -1453,7 +1475,7 @@ net.topology.script.number.args 100 - The max number of args that the script configured with + The max number of args that the script configured with net.topology.script.file.name should be run with. Each arg is an IP address. @@ -1467,7 +1489,7 @@ org.apache.hadoop.net.TableMapping. The file format is a two column text file, with columns separated by whitespace. The first column is a DNS or IP address and the second column specifies the rack where the address maps. - If no entry corresponding to a host in the cluster is found, then + If no entry corresponding to a host in the cluster is found, then /default-rack is assumed. @@ -1983,14 +2005,14 @@ nfs.exports.allowed.hosts * rw - By default, the export can be mounted by any client. The value string - contains machine name and access privilege, separated by whitespace - characters. The machine name format can be a single host, a Java regular - expression, or an IPv4 address. The access privilege uses rw or ro to - specify read/write or read-only access of the machines to exports. If the + By default, the export can be mounted by any client. The value string + contains machine name and access privilege, separated by whitespace + characters. The machine name format can be a single host, a Java regular + expression, or an IPv4 address. The access privilege uses rw or ro to + specify read/write or read-only access of the machines to exports. If the access privilege is not provided, the default is read-only. Entries are separated by ";". For example: "192.168.0.0/22 rw ; host.*\.example\.com ; host1.test.org ro;". - Only the NFS gateway needs to restart after this property is updated. + Only the NFS gateway needs to restart after this property is updated. @@ -2044,7 +2066,7 @@ hadoop.security.crypto.codec.classes.aes.ctr.nopadding org.apache.hadoop.crypto.OpensslAesCtrCryptoCodec, org.apache.hadoop.crypto.JceAesCtrCryptoCodec - Comma-separated list of crypto codec implementations for AES/CTR/NoPadding. + Comma-separated list of crypto codec implementations for AES/CTR/NoPadding. The first implementation will be used if available, others are fallbacks. @@ -2061,7 +2083,7 @@ hadoop.security.crypto.jce.provider - The JCE provider name used in CryptoCodec. + The JCE provider name used in CryptoCodec. @@ -2069,7 +2091,7 @@ hadoop.security.crypto.buffer.size 8192 - The buffer size used by CryptoInputStream and CryptoOutputStream. + The buffer size used by CryptoInputStream and CryptoOutputStream. @@ -2077,7 +2099,7 @@ hadoop.security.java.secure.random.algorithm SHA1PRNG - The java secure random algorithm. + The java secure random algorithm. @@ -2085,7 +2107,7 @@ hadoop.security.secure.random.impl - Implementation of secure random. + Implementation of secure random. @@ -2156,7 +2178,7 @@ 0 The maximum number of concurrent connections a server is allowed to accept. If this limit is exceeded, incoming connections will first fill - the listen queue and then may go to an OS-specific listen overflow queue. + the listen queue and then may go to an OS-specific listen overflow queue. The client may fail or timeout, but the server can avoid running out of file descriptors using this feature. 0 means no limit. diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AWSCredentialProviderList.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AWSCredentialProviderList.java index cee3269a86..d4ec2d6d29 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AWSCredentialProviderList.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AWSCredentialProviderList.java @@ -22,6 +22,7 @@ import com.amazonaws.auth.AWSCredentials; import com.amazonaws.auth.AWSCredentialsProvider; import com.amazonaws.auth.AnonymousAWSCredentials; +import com.google.common.annotations.VisibleForTesting; import org.apache.commons.lang.StringUtils; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; @@ -151,6 +152,16 @@ public AWSCredentials getCredentials() { } + /** + * Returns the underlying list of providers. + * + * @return providers + */ + @VisibleForTesting + List getProviders() { + return providers; + } + /** * Verify that the provider list is not empty. * @throws AmazonClientException if there are no providers. diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java index c89f6904cd..f926f342a9 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java @@ -40,6 +40,9 @@ import java.io.EOFException; import java.io.FileNotFoundException; import java.io.IOException; +import java.lang.reflect.Constructor; +import java.lang.reflect.Method; +import java.lang.reflect.Modifier; import java.net.URI; import java.nio.file.AccessDeniedException; import java.util.Date; @@ -66,6 +69,8 @@ public final class S3AUtils { = "instantiation exception"; static final String NOT_AWS_PROVIDER = "does not implement AWSCredentialsProvider"; + static final String ABSTRACT_PROVIDER = + "is abstract and therefore cannot be created"; static final String ENDPOINT_KEY = "Endpoint"; private S3AUtils() { @@ -305,9 +310,15 @@ public static AWSCredentialProviderList createAWSCredentialProviderSet( credentials.add(new BasicAWSCredentialsProvider( creds.getUser(), creds.getPassword())); credentials.add(new EnvironmentVariableCredentialsProvider()); - credentials.add(new InstanceProfileCredentialsProvider()); + credentials.add( + SharedInstanceProfileCredentialsProvider.getInstance()); } else { for (Class aClass : awsClasses) { + if (aClass == InstanceProfileCredentialsProvider.class) { + LOG.debug("Found {}, but will use {} instead.", aClass.getName(), + SharedInstanceProfileCredentialsProvider.class.getName()); + aClass = SharedInstanceProfileCredentialsProvider.class; + } credentials.add(createAWSCredentialProvider(conf, aClass, fsURI)); @@ -317,7 +328,19 @@ public static AWSCredentialProviderList createAWSCredentialProviderSet( } /** - * Create an AWS credential provider. + * Create an AWS credential provider from its class by using reflection. The + * class must implement one of the following means of construction, which are + * attempted in order: + * + *
    + *
  1. a public constructor accepting java.net.URI and + * org.apache.hadoop.conf.Configuration
  2. + *
  3. a public static method named getInstance that accepts no + * arguments and returns an instance of + * com.amazonaws.auth.AWSCredentialsProvider, or
  4. + *
  5. a public default constructor.
  6. + *
+ * * @param conf configuration * @param credClass credential class * @param uri URI of the FS @@ -328,32 +351,54 @@ static AWSCredentialsProvider createAWSCredentialProvider( Configuration conf, Class credClass, URI uri) throws IOException { - AWSCredentialsProvider credentials; + AWSCredentialsProvider credentials = null; String className = credClass.getName(); if (!AWSCredentialsProvider.class.isAssignableFrom(credClass)) { throw new IOException("Class " + credClass + " " + NOT_AWS_PROVIDER); } - try { - LOG.debug("Credential provider class is {}", className); - try { - credentials = - (AWSCredentialsProvider) credClass.getDeclaredConstructor( - URI.class, Configuration.class).newInstance(uri, conf); - } catch (NoSuchMethodException | SecurityException e) { - credentials = - (AWSCredentialsProvider) credClass.getDeclaredConstructor() - .newInstance(); - } - } catch (NoSuchMethodException | SecurityException e) { - throw new IOException(String.format("%s " + CONSTRUCTOR_EXCEPTION - +". A class specified in %s must provide an accessible constructor " - + "accepting URI and Configuration, or an accessible default " - + "constructor.", className, AWS_CREDENTIALS_PROVIDER), e); - } catch (ReflectiveOperationException | IllegalArgumentException e) { - throw new IOException(className + " " + INSTANTIATION_EXCEPTION +".", e); + if (Modifier.isAbstract(credClass.getModifiers())) { + throw new IOException("Class " + credClass + " " + ABSTRACT_PROVIDER); + } + LOG.debug("Credential provider class is {}", className); + + try { + // new X(uri, conf) + Constructor cons = getConstructor(credClass, URI.class, + Configuration.class); + if (cons != null) { + credentials = (AWSCredentialsProvider)cons.newInstance(uri, conf); + return credentials; + } + + // X.getInstance() + Method factory = getFactoryMethod(credClass, AWSCredentialsProvider.class, + "getInstance"); + if (factory != null) { + credentials = (AWSCredentialsProvider)factory.invoke(null); + return credentials; + } + + // new X() + cons = getConstructor(credClass); + if (cons != null) { + credentials = (AWSCredentialsProvider)cons.newInstance(); + return credentials; + } + + // no supported constructor or factory method found + throw new IOException(String.format("%s " + CONSTRUCTOR_EXCEPTION + + ". A class specified in %s must provide a public constructor " + + "accepting URI and Configuration, or a public factory method named " + + "getInstance that accepts no arguments, or a public default " + + "constructor.", className, AWS_CREDENTIALS_PROVIDER)); + } catch (ReflectiveOperationException | IllegalArgumentException e) { + // supported constructor or factory method found, but the call failed + throw new IOException(className + " " + INSTANTIATION_EXCEPTION +".", e); + } finally { + if (credentials != null) { + LOG.debug("Using {} for {}.", credentials, uri); + } } - LOG.debug("Using {} for {}.", credentials, uri); - return credentials; } /** @@ -499,4 +544,47 @@ public static int ensureOutputParameterInRange(String name, long size) { return (int)size; } } + + /** + * Returns the public constructor of {@code cl} specified by the list of + * {@code args} or {@code null} if {@code cl} has no public constructor that + * matches that specification. + * @param cl class + * @param args constructor argument types + * @return constructor or null + */ + private static Constructor getConstructor(Class cl, Class... args) { + try { + Constructor cons = cl.getDeclaredConstructor(args); + return Modifier.isPublic(cons.getModifiers()) ? cons : null; + } catch (NoSuchMethodException | SecurityException e) { + return null; + } + } + + /** + * Returns the public static method of {@code cl} that accepts no arguments + * and returns {@code returnType} specified by {@code methodName} or + * {@code null} if {@code cl} has no public static method that matches that + * specification. + * @param cl class + * @param returnType return type + * @param methodName method name + * @return method or null + */ + private static Method getFactoryMethod(Class cl, Class returnType, + String methodName) { + try { + Method m = cl.getDeclaredMethod(methodName); + if (Modifier.isPublic(m.getModifiers()) && + Modifier.isStatic(m.getModifiers()) && + returnType.isAssignableFrom(m.getReturnType())) { + return m; + } else { + return null; + } + } catch (NoSuchMethodException | SecurityException e) { + return null; + } + } } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/SharedInstanceProfileCredentialsProvider.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/SharedInstanceProfileCredentialsProvider.java new file mode 100644 index 0000000000..cbc07873f0 --- /dev/null +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/SharedInstanceProfileCredentialsProvider.java @@ -0,0 +1,67 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.s3a; + +import com.amazonaws.auth.InstanceProfileCredentialsProvider; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; + +/** + * A subclass of {@link InstanceProfileCredentialsProvider} that enforces + * instantiation of only a single instance. + * This credential provider calls the EC2 instance metadata service to obtain + * credentials. For highly multi-threaded applications, it's possible that + * multiple instances call the service simultaneously and overwhelm it with + * load. The service handles this by throttling the client with an HTTP 429 + * response or forcibly terminating the connection. Forcing use of a single + * instance reduces load on the metadata service by allowing all threads to + * share the credentials. The base class is thread-safe, and there is nothing + * that varies in the credentials across different instances of + * {@link S3AFileSystem} connecting to different buckets, so sharing a singleton + * instance is safe. + * + * As of AWS SDK 1.11.39, the SDK code internally enforces a singleton. After + * Hadoop upgrades to that version or higher, it's likely that we can remove + * this class. + */ +@InterfaceAudience.Private +@InterfaceStability.Stable +public final class SharedInstanceProfileCredentialsProvider + extends InstanceProfileCredentialsProvider { + + private static final SharedInstanceProfileCredentialsProvider INSTANCE = + new SharedInstanceProfileCredentialsProvider(); + + /** + * Returns the singleton instance. + * + * @return singleton instance + */ + public static SharedInstanceProfileCredentialsProvider getInstance() { + return INSTANCE; + } + + /** + * Default constructor, defined explicitly as private to enforce singleton. + */ + private SharedInstanceProfileCredentialsProvider() { + super(); + } +} diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md index c23e782d38..c0d9157fc1 100644 --- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md +++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md @@ -272,15 +272,37 @@ of `com.amazonaws.auth.AWSCredentialsProvider` may also be used. com.amazonaws.auth.AWSCredentialsProvider. These are loaded and queried in sequence for a valid set of credentials. - Each listed class must provide either an accessible constructor accepting - java.net.URI and org.apache.hadoop.conf.Configuration, or an accessible - default constructor. + Each listed class must implement one of the following means of + construction, which are attempted in order: + 1. a public constructor accepting java.net.URI and + org.apache.hadoop.conf.Configuration, + 2. a public static method named getInstance that accepts no + arguments and returns an instance of + com.amazonaws.auth.AWSCredentialsProvider, or + 3. a public default constructor. Specifying org.apache.hadoop.fs.s3a.AnonymousAWSCredentialsProvider allows anonymous access to a publicly accessible S3 bucket without any credentials. Please note that allowing anonymous access to an S3 bucket compromises security and therefore is unsuitable for most use cases. It can be useful for accessing public data sets without requiring AWS credentials. + + If unspecified, then the default list of credential provider classes, + queried in sequence, is: + 1. org.apache.hadoop.fs.s3a.BasicAWSCredentialsProvider: supports + static configuration of AWS access key ID and secret access key. + See also fs.s3a.access.key and fs.s3a.secret.key. + 2. com.amazonaws.auth.EnvironmentVariableCredentialsProvider: supports + configuration of AWS access key ID and secret access key in + environment variables named AWS_ACCESS_KEY_ID and + AWS_SECRET_ACCESS_KEY, as documented in the AWS SDK. + 3. org.apache.hadoop.fs.s3a.SharedInstanceProfileCredentialsProvider: + a shared instance of + com.amazonaws.auth.InstanceProfileCredentialsProvider from the AWS + SDK, which supports use of instance profile credentials if running + in an EC2 VM. Using this shared instance potentially reduces load + on the EC2 instance metadata service for multi-threaded + applications. @@ -353,12 +375,13 @@ AWS Credential Providers are classes which can be used by the Amazon AWS SDK to obtain an AWS login from a different source in the system, including environment variables, JVM properties and configuration files. -There are three AWS Credential Providers inside the `hadoop-aws` JAR: +There are four AWS Credential Providers inside the `hadoop-aws` JAR: | classname | description | |-----------|-------------| | `org.apache.hadoop.fs.s3a.TemporaryAWSCredentialsProvider`| Session Credentials | | `org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider`| Simple name/secret credentials | +| `org.apache.hadoop.fs.s3a.SharedInstanceProfileCredentialsProvider`| Shared instance of EC2 Metadata Credentials, which can reduce load on the EC2 instance metadata service. (See below.) | | `org.apache.hadoop.fs.s3a.AnonymousAWSCredentialsProvider`| Anonymous Login | There are also many in the Amazon SDKs, in particular two which are automatically @@ -370,6 +393,25 @@ set up in the authentication chain: | `com.amazonaws.auth.EnvironmentVariableCredentialsProvider`| AWS Environment Variables | +*EC2 Metadata Credentials with `SharedInstanceProfileCredentialsProvider`* + +Applications running in EC2 may associate an IAM role with the VM and query the +[EC2 Instance Metadata Service](http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ec2-instance-metadata.html) +for credentials to access S3. Within the AWS SDK, this functionality is +provided by `InstanceProfileCredentialsProvider`. Heavily multi-threaded +applications may trigger a high volume of calls to the instance metadata service +and trigger throttling: either an HTTP 429 response or a forcible close of the +connection. + +To mitigate against this problem, `hadoop-aws` ships with a variant of +`InstanceProfileCredentialsProvider` called +`SharedInstanceProfileCredentialsProvider`. Using this ensures that all +instances of S3A reuse the same instance profile credentials instead of issuing +a large volume of redundant metadata service calls. If +`fs.s3a.aws.credentials.provider` refers to +`com.amazonaws.auth.InstanceProfileCredentialsProvider`, S3A automatically uses +`org.apache.hadoop.fs.s3a.SharedInstanceProfileCredentialsProvider` instead. + *Session Credentials with `TemporaryAWSCredentialsProvider`* [Temporary Security Credentials](http://docs.aws.amazon.com/IAM/latest/UserGuide/id_credentials_temp.html) @@ -468,7 +510,7 @@ This means that the default S3A authentication chain can be defined as org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider, com.amazonaws.auth.EnvironmentVariableCredentialsProvider, - com.amazonaws.auth.InstanceProfileCredentialsProvider + org.apache.hadoop.fs.s3a.SharedInstanceProfileCredentialsProvider diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AAWSCredentialsProvider.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AAWSCredentialsProvider.java index cf8783cd34..819d9d8002 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AAWSCredentialsProvider.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AAWSCredentialsProvider.java @@ -28,7 +28,6 @@ import org.apache.hadoop.fs.Path; import org.junit.Rule; import org.junit.Test; -import org.junit.rules.ExpectedException; import org.junit.rules.Timeout; import com.amazonaws.auth.AWSCredentials; @@ -41,12 +40,10 @@ import static org.apache.hadoop.fs.s3a.Constants.*; import static org.apache.hadoop.fs.s3a.S3ATestConstants.*; -import static org.apache.hadoop.fs.s3a.S3AUtils.*; import static org.junit.Assert.*; /** - * Tests for {@link Constants#AWS_CREDENTIALS_PROVIDER} logic. - * + * Integration tests for {@link Constants#AWS_CREDENTIALS_PROVIDER} logic. */ public class ITestS3AAWSCredentialsProvider { private static final Logger LOG = @@ -55,21 +52,6 @@ public class ITestS3AAWSCredentialsProvider { @Rule public Timeout testTimeout = new Timeout(1 * 60 * 1000); - @Rule - public ExpectedException exception = ExpectedException.none(); - - /** - * Declare what exception to raise, and the text which must be found - * in it. - * @param exceptionClass class of exception - * @param text text in exception - */ - private void expectException(Class exceptionClass, - String text) { - exception.expect(exceptionClass); - exception.expectMessage(text); - } - @Test public void testBadConfiguration() throws IOException { Configuration conf = new Configuration(); @@ -154,97 +136,4 @@ public void testAnonymousProvider() throws Exception { assertNotNull(stat); assertEquals(testFile, stat.getPath()); } - - /** - * A credential provider whose constructor signature doesn't match. - */ - static class ConstructorSignatureErrorProvider - implements AWSCredentialsProvider { - - @SuppressWarnings("unused") - public ConstructorSignatureErrorProvider(String str) { - } - - @Override - public AWSCredentials getCredentials() { - return null; - } - - @Override - public void refresh() { - } - } - - /** - * A credential provider whose constructor raises an NPE. - */ - static class ConstructorFailureProvider - implements AWSCredentialsProvider { - - @SuppressWarnings("unused") - public ConstructorFailureProvider() { - throw new NullPointerException("oops"); - } - - @Override - public AWSCredentials getCredentials() { - return null; - } - - @Override - public void refresh() { - } - } - - @Test - public void testProviderWrongClass() throws Exception { - expectProviderInstantiationFailure(this.getClass().getName(), - NOT_AWS_PROVIDER); - } - - @Test - public void testProviderNotAClass() throws Exception { - expectProviderInstantiationFailure("NoSuchClass", - "ClassNotFoundException"); - } - - private void expectProviderInstantiationFailure(String option, - String expectedErrorText) throws IOException { - Configuration conf = new Configuration(); - conf.set(AWS_CREDENTIALS_PROVIDER, option); - Path testFile = new Path( - conf.getTrimmed(KEY_CSVTEST_FILE, DEFAULT_CSVTEST_FILE)); - expectException(IOException.class, expectedErrorText); - URI uri = testFile.toUri(); - S3AUtils.createAWSCredentialProviderSet(uri, conf, uri); - } - - @Test - public void testProviderConstructorError() throws Exception { - expectProviderInstantiationFailure( - ConstructorSignatureErrorProvider.class.getName(), - CONSTRUCTOR_EXCEPTION); - } - - @Test - public void testProviderFailureError() throws Exception { - expectProviderInstantiationFailure( - ConstructorFailureProvider.class.getName(), - INSTANTIATION_EXCEPTION); - } - - @Test - public void testInstantiationChain() throws Throwable { - Configuration conf = new Configuration(); - conf.set(AWS_CREDENTIALS_PROVIDER, - TemporaryAWSCredentialsProvider.NAME - + ", \t" + SimpleAWSCredentialsProvider.NAME - + " ,\n " + AnonymousAWSCredentialsProvider.NAME); - Path testFile = new Path( - conf.getTrimmed(KEY_CSVTEST_FILE, DEFAULT_CSVTEST_FILE)); - - URI uri = testFile.toUri(); - S3AUtils.createAWSCredentialProviderSet(uri, conf, uri); - } - } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/S3ATestUtils.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/S3ATestUtils.java index 19dccac15b..809c6e32f6 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/S3ATestUtils.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/S3ATestUtils.java @@ -28,6 +28,7 @@ import java.io.IOException; import java.net.URI; +import java.util.List; import static org.apache.hadoop.fs.contract.ContractTestUtils.skip; import static org.apache.hadoop.fs.s3a.S3ATestConstants.*; @@ -36,7 +37,7 @@ /** * Utilities for the S3A tests. */ -public class S3ATestUtils { +public final class S3ATestUtils { /** * Value to set a system property to (in maven) to declare that @@ -130,7 +131,7 @@ public static FileContext createTestFileContext(Configuration conf) throw new AssumptionViolatedException("No test filesystem in " + TEST_FS_S3A_NAME); } - FileContext fc = FileContext.getFileContext(testURI,conf); + FileContext fc = FileContext.getFileContext(testURI, conf); return fc; } @@ -446,7 +447,7 @@ public boolean diffLessThanOrEquals(MetricDiff that) { } /** - * Get the statistic + * Get the statistic. * @return the statistic */ public Statistic getStatistic() { @@ -461,4 +462,39 @@ public long getStartingValue() { return startingValue; } } + + /** + * Asserts that {@code obj} is an instance of {@code expectedClass} using a + * descriptive assertion message. + * @param expectedClass class + * @param obj object to check + */ + public static void assertInstanceOf(Class expectedClass, Object obj) { + Assert.assertTrue(String.format("Expected instance of class %s, but is %s.", + expectedClass, obj.getClass()), + expectedClass.isAssignableFrom(obj.getClass())); + } + + /** + * Builds a comma-separated list of class names. + * @param classes list of classes + * @return comma-separated list of class names + */ + public static > String buildClassListString( + List classes) { + StringBuilder sb = new StringBuilder(); + for (int i = 0; i < classes.size(); ++i) { + if (i > 0) { + sb.append(','); + } + sb.append(classes.get(i).getName()); + } + return sb.toString(); + } + + /** + * This class should not be instantiated. + */ + private S3ATestUtils() { + } } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AAWSCredentialsProvider.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AAWSCredentialsProvider.java new file mode 100644 index 0000000000..c29d7254b8 --- /dev/null +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AAWSCredentialsProvider.java @@ -0,0 +1,273 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.s3a; + +import static org.apache.hadoop.fs.s3a.Constants.*; +import static org.apache.hadoop.fs.s3a.S3ATestConstants.*; +import static org.apache.hadoop.fs.s3a.S3ATestUtils.*; +import static org.apache.hadoop.fs.s3a.S3AUtils.*; +import static org.junit.Assert.*; + +import java.io.IOException; +import java.net.URI; +import java.util.Arrays; +import java.util.List; + +import com.amazonaws.auth.AWSCredentials; +import com.amazonaws.auth.AWSCredentialsProvider; +import com.amazonaws.auth.EnvironmentVariableCredentialsProvider; +import com.amazonaws.auth.InstanceProfileCredentialsProvider; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; + +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.ExpectedException; + +/** + * Unit tests for {@link Constants#AWS_CREDENTIALS_PROVIDER} logic. + */ +public class TestS3AAWSCredentialsProvider { + + @Rule + public ExpectedException exception = ExpectedException.none(); + + @Test + public void testProviderWrongClass() throws Exception { + expectProviderInstantiationFailure(this.getClass().getName(), + NOT_AWS_PROVIDER); + } + + @Test + public void testProviderAbstractClass() throws Exception { + expectProviderInstantiationFailure(AbstractProvider.class.getName(), + ABSTRACT_PROVIDER); + } + + @Test + public void testProviderNotAClass() throws Exception { + expectProviderInstantiationFailure("NoSuchClass", + "ClassNotFoundException"); + } + + @Test + public void testProviderConstructorError() throws Exception { + expectProviderInstantiationFailure( + ConstructorSignatureErrorProvider.class.getName(), + CONSTRUCTOR_EXCEPTION); + } + + @Test + public void testProviderFailureError() throws Exception { + expectProviderInstantiationFailure( + ConstructorFailureProvider.class.getName(), + INSTANTIATION_EXCEPTION); + } + + @Test + public void testInstantiationChain() throws Throwable { + Configuration conf = new Configuration(); + conf.set(AWS_CREDENTIALS_PROVIDER, + TemporaryAWSCredentialsProvider.NAME + + ", \t" + SimpleAWSCredentialsProvider.NAME + + " ,\n " + AnonymousAWSCredentialsProvider.NAME); + Path testFile = new Path( + conf.getTrimmed(KEY_CSVTEST_FILE, DEFAULT_CSVTEST_FILE)); + + URI uri = testFile.toUri(); + AWSCredentialProviderList list = S3AUtils.createAWSCredentialProviderSet( + uri, conf, uri); + List> expectedClasses = + Arrays.asList( + TemporaryAWSCredentialsProvider.class, + SimpleAWSCredentialsProvider.class, + AnonymousAWSCredentialsProvider.class); + assertCredentialProviders(expectedClasses, list); + } + + @Test + public void testDefaultChain() throws Exception { + URI uri1 = new URI("s3a://bucket1"), uri2 = new URI("s3a://bucket2"); + Configuration conf = new Configuration(); + AWSCredentialProviderList list1 = S3AUtils.createAWSCredentialProviderSet( + uri1, conf, uri1); + AWSCredentialProviderList list2 = S3AUtils.createAWSCredentialProviderSet( + uri2, conf, uri2); + List> expectedClasses = + Arrays.asList( + BasicAWSCredentialsProvider.class, + EnvironmentVariableCredentialsProvider.class, + SharedInstanceProfileCredentialsProvider.class); + assertCredentialProviders(expectedClasses, list1); + assertCredentialProviders(expectedClasses, list2); + assertSameInstanceProfileCredentialsProvider(list1.getProviders().get(2), + list2.getProviders().get(2)); + } + + @Test + public void testConfiguredChain() throws Exception { + URI uri1 = new URI("s3a://bucket1"), uri2 = new URI("s3a://bucket2"); + Configuration conf = new Configuration(); + List> expectedClasses = + Arrays.asList( + EnvironmentVariableCredentialsProvider.class, + SharedInstanceProfileCredentialsProvider.class, + AnonymousAWSCredentialsProvider.class); + conf.set(AWS_CREDENTIALS_PROVIDER, buildClassListString(expectedClasses)); + AWSCredentialProviderList list1 = S3AUtils.createAWSCredentialProviderSet( + uri1, conf, uri1); + AWSCredentialProviderList list2 = S3AUtils.createAWSCredentialProviderSet( + uri2, conf, uri2); + assertCredentialProviders(expectedClasses, list1); + assertCredentialProviders(expectedClasses, list2); + assertSameInstanceProfileCredentialsProvider(list1.getProviders().get(1), + list2.getProviders().get(1)); + } + + @Test + public void testConfiguredChainUsesSharedInstanceProfile() throws Exception { + URI uri1 = new URI("s3a://bucket1"), uri2 = new URI("s3a://bucket2"); + Configuration conf = new Configuration(); + List> expectedClasses = + Arrays.>asList( + InstanceProfileCredentialsProvider.class); + conf.set(AWS_CREDENTIALS_PROVIDER, buildClassListString(expectedClasses)); + AWSCredentialProviderList list1 = S3AUtils.createAWSCredentialProviderSet( + uri1, conf, uri1); + AWSCredentialProviderList list2 = S3AUtils.createAWSCredentialProviderSet( + uri2, conf, uri2); + assertCredentialProviders(expectedClasses, list1); + assertCredentialProviders(expectedClasses, list2); + assertSameInstanceProfileCredentialsProvider(list1.getProviders().get(0), + list2.getProviders().get(0)); + } + + /** + * A credential provider declared as abstract, so it cannot be instantiated. + */ + static abstract class AbstractProvider implements AWSCredentialsProvider { + } + + /** + * A credential provider whose constructor signature doesn't match. + */ + static class ConstructorSignatureErrorProvider + implements AWSCredentialsProvider { + + @SuppressWarnings("unused") + public ConstructorSignatureErrorProvider(String str) { + } + + @Override + public AWSCredentials getCredentials() { + return null; + } + + @Override + public void refresh() { + } + } + + /** + * A credential provider whose constructor raises an NPE. + */ + static class ConstructorFailureProvider + implements AWSCredentialsProvider { + + @SuppressWarnings("unused") + public ConstructorFailureProvider() { + throw new NullPointerException("oops"); + } + + @Override + public AWSCredentials getCredentials() { + return null; + } + + @Override + public void refresh() { + } + } + + /** + * Declare what exception to raise, and the text which must be found + * in it. + * @param exceptionClass class of exception + * @param text text in exception + */ + private void expectException(Class exceptionClass, + String text) { + exception.expect(exceptionClass); + exception.expectMessage(text); + } + + private void expectProviderInstantiationFailure(String option, + String expectedErrorText) throws IOException { + Configuration conf = new Configuration(); + conf.set(AWS_CREDENTIALS_PROVIDER, option); + Path testFile = new Path( + conf.getTrimmed(KEY_CSVTEST_FILE, DEFAULT_CSVTEST_FILE)); + expectException(IOException.class, expectedErrorText); + URI uri = testFile.toUri(); + S3AUtils.createAWSCredentialProviderSet(uri, conf, uri); + } + + /** + * Asserts expected provider classes in list. + * @param expectedClasses expected provider classes + * @param list providers to check + */ + private static void assertCredentialProviders( + List> expectedClasses, + AWSCredentialProviderList list) { + assertNotNull(list); + List providers = list.getProviders(); + assertEquals(expectedClasses.size(), providers.size()); + for (int i = 0; i < expectedClasses.size(); ++i) { + Class expectedClass = + expectedClasses.get(i); + AWSCredentialsProvider provider = providers.get(i); + assertNotNull( + String.format("At position %d, expected class is %s, but found null.", + i, expectedClass), provider); + assertTrue( + String.format("At position %d, expected class is %s, but found %s.", + i, expectedClass, provider.getClass()), + expectedClass.isAssignableFrom(provider.getClass())); + } + } + + /** + * Asserts that two different references point to the same shared instance of + * InstanceProfileCredentialsProvider using a descriptive assertion message. + * @param provider1 provider to check + * @param provider2 provider to check + */ + private static void assertSameInstanceProfileCredentialsProvider( + AWSCredentialsProvider provider1, AWSCredentialsProvider provider2) { + assertNotNull(provider1); + assertInstanceOf(InstanceProfileCredentialsProvider.class, provider1); + assertNotNull(provider2); + assertInstanceOf(InstanceProfileCredentialsProvider.class, provider2); + assertSame("Expected all usage of InstanceProfileCredentialsProvider to " + + "share a singleton instance, but found unique instances.", + provider1, provider2); + } +}