diff --git a/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml b/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml index b3436da670..96b108f0f3 100644 --- a/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml +++ b/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml @@ -765,6 +765,13 @@ + + fs.s3a.path.style.access + Enable S3 path style access ie disabling the default virtual hosting behaviour. + Useful for S3A-compliant storage providers as it removes the need to set up DNS for virtual hosting. + + + fs.s3a.proxy.host Hostname of the (optional) proxy server for S3 connections. diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java index f10f3db60d..9d79623f71 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java @@ -28,13 +28,17 @@ public class Constants { // number of simultaneous connections to s3 public static final String MAXIMUM_CONNECTIONS = "fs.s3a.connection.maximum"; public static final int DEFAULT_MAXIMUM_CONNECTIONS = 15; - + // connect to s3 over ssl? public static final String SECURE_CONNECTIONS = "fs.s3a.connection.ssl.enabled"; public static final boolean DEFAULT_SECURE_CONNECTIONS = true; //use a custom endpoint? public static final String ENDPOINT = "fs.s3a.endpoint"; + + //Enable path style access? Overrides default virtual hosting + public static final String PATH_STYLE_ACCESS = "fs.s3a.path.style.access"; + //connect to s3 through a proxy server? public static final String PROXY_HOST = "fs.s3a.proxy.host"; public static final String PROXY_PORT = "fs.s3a.proxy.port"; @@ -50,7 +54,7 @@ public class Constants { // seconds until we give up trying to establish a connection to s3 public static final String ESTABLISH_TIMEOUT = "fs.s3a.connection.establish.timeout"; public static final int DEFAULT_ESTABLISH_TIMEOUT = 50000; - + // seconds until we give up on a connection to s3 public static final String SOCKET_TIMEOUT = "fs.s3a.connection.timeout"; public static final int DEFAULT_SOCKET_TIMEOUT = 200000; @@ -74,7 +78,7 @@ public class Constants { // size of each of or multipart pieces in bytes public static final String MULTIPART_SIZE = "fs.s3a.multipart.size"; public static final long DEFAULT_MULTIPART_SIZE = 104857600; // 100 MB - + // minimum size in bytes before we start a multipart uploads or copy public static final String MIN_MULTIPART_THRESHOLD = "fs.s3a.multipart.threshold"; public static final long DEFAULT_MIN_MULTIPART_THRESHOLD = Integer.MAX_VALUE; diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java index fe705cef83..97092ac03d 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java @@ -38,6 +38,7 @@ import com.amazonaws.auth.InstanceProfileCredentialsProvider; import com.amazonaws.services.s3.AmazonS3Client; +import com.amazonaws.services.s3.S3ClientOptions; import com.amazonaws.services.s3.model.CannedAccessControlList; import com.amazonaws.services.s3.model.DeleteObjectRequest; import com.amazonaws.services.s3.model.DeleteObjectsRequest; @@ -244,6 +245,15 @@ private void initAmazonS3Client(Configuration conf, throw new IllegalArgumentException(msg, e); } } + enablePathStyleAccessIfRequired(conf); + } + + private void enablePathStyleAccessIfRequired(Configuration conf) { + final boolean pathStyleAccess = conf.getBoolean(PATH_STYLE_ACCESS, false); + if (pathStyleAccess) { + LOG.debug("Enabling path style access!"); + s3.setS3ClientOptions(new S3ClientOptions().withPathStyleAccess(true)); + } } private void initTransferManager() { diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md index 7382029f3a..e87b8849ff 100644 --- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md +++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md @@ -224,6 +224,13 @@ this capability. + + fs.s3a.path.style.access + Enable S3 path style access ie disabling the default virtual hosting behaviour. + Useful for S3A-compliant storage providers as it removes the need to set up DNS for virtual hosting. + + + fs.s3a.proxy.host Hostname of the (optional) proxy server for S3 connections. diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AConfiguration.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AConfiguration.java index ae1539d4c8..4a0bfbbfc0 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AConfiguration.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AConfiguration.java @@ -19,10 +19,14 @@ package org.apache.hadoop.fs.s3a; import com.amazonaws.services.s3.AmazonS3Client; +import com.amazonaws.services.s3.S3ClientOptions; +import com.amazonaws.services.s3.model.AmazonS3Exception; + import org.apache.commons.lang.StringUtils; import com.amazonaws.AmazonClientException; import org.apache.hadoop.conf.Configuration; - +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.contract.ContractTestUtils; import org.junit.Rule; import org.junit.Test; import org.junit.rules.Timeout; @@ -30,17 +34,19 @@ import org.slf4j.LoggerFactory; import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; import static org.junit.Assert.assertEquals; import static org.junit.Assert.fail; import java.io.File; import java.net.URI; -import java.io.IOException; +import java.lang.reflect.Field; import org.apache.hadoop.security.ProviderUtils; import org.apache.hadoop.security.alias.CredentialProvider; import org.apache.hadoop.security.alias.CredentialProviderFactory; - +import org.apache.http.HttpStatus; import org.junit.rules.TemporaryFolder; public class TestS3AConfiguration { @@ -354,4 +360,39 @@ public void testExcludingS3ACredentialProvider() throws Exception { assertEquals("SecretKey incorrect.", "456", creds.getAccessSecret()); } + + @Test + public void shouldBeAbleToSwitchOnS3PathStyleAccessViaConfigProperty() throws Exception { + + conf = new Configuration(); + conf.set(Constants.PATH_STYLE_ACCESS, Boolean.toString(true)); + assertTrue(conf.getBoolean(Constants.PATH_STYLE_ACCESS, false)); + + try { + fs = S3ATestUtils.createTestFileSystem(conf); + final Object object = getClientOptionsField(fs.getAmazonS3Client(), "clientOptions"); + assertNotNull(object); + assertTrue("Unexpected type found for clientOptions!", object instanceof S3ClientOptions); + assertTrue("Expected to find path style access to be switched on!", ((S3ClientOptions) object).isPathStyleAccess()); + byte[] file = ContractTestUtils.toAsciiByteArray("test file"); + ContractTestUtils.writeAndRead(fs, new Path("/path/style/access/testFile"), file, file.length, conf.getInt(Constants.FS_S3A_BLOCK_SIZE, file.length), false, true); + } catch (final AmazonS3Exception e) { + LOG.error("Caught exception: ", e); + // Catch/pass standard path style access behaviour when live bucket + // isn't in the same region as the s3 client default. See + // http://docs.aws.amazon.com/AmazonS3/latest/dev/VirtualHosting.html + assertEquals(e.getStatusCode(), HttpStatus.SC_MOVED_PERMANENTLY); + } + } + + private Object getClientOptionsField(AmazonS3Client s3client, String field) + throws NoSuchFieldException, IllegalAccessException { + final Field clientOptionsProps = s3client.getClass().getDeclaredField(field); + assertNotNull(clientOptionsProps); + if (!clientOptionsProps.isAccessible()) { + clientOptionsProps.setAccessible(true); + } + final Object object = clientOptionsProps.get(s3client); + return object; + } }