HADOOP-19286: S3A: Support cross region access when S3 region/endpoint is set (#7067)

Adds new option
   s3a.cross.region.access.enabled
Which is true by default

This enables cross region access as a separate config and enable/disables it irrespective of region/endpoint is set.

Contributed by Syed Shameerur Rahman
This commit is contained in:
Syed Shameerur Rahman 2024-10-02 00:41:11 +05:30 committed by GitHub
parent 4ff0dceebd
commit e9ed21c065
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 85 additions and 6 deletions

View File

@ -1372,6 +1372,19 @@ private Constants() {
*/
public static final String XA_HEADER_PREFIX = "header.";
/**
* S3 cross region access enabled ?
* Value: {@value}.
*/
public static final String AWS_S3_CROSS_REGION_ACCESS_ENABLED =
"fs.s3a.cross.region.access.enabled";
/**
* Default value for S3 cross region access enabled: {@value}.
*/
public static final boolean AWS_S3_CROSS_REGION_ACCESS_ENABLED_DEFAULT = true;
/**
* AWS S3 region for the bucket. When set bypasses the construction of
* region through endpoint url.

View File

@ -58,6 +58,8 @@
import static org.apache.hadoop.fs.s3a.Constants.AWS_REGION;
import static org.apache.hadoop.fs.s3a.Constants.AWS_S3_ACCESS_GRANTS_ENABLED;
import static org.apache.hadoop.fs.s3a.Constants.AWS_S3_ACCESS_GRANTS_FALLBACK_TO_IAM_ENABLED;
import static org.apache.hadoop.fs.s3a.Constants.AWS_S3_CROSS_REGION_ACCESS_ENABLED;
import static org.apache.hadoop.fs.s3a.Constants.AWS_S3_CROSS_REGION_ACCESS_ENABLED_DEFAULT;
import static org.apache.hadoop.fs.s3a.Constants.AWS_S3_DEFAULT_REGION;
import static org.apache.hadoop.fs.s3a.Constants.CENTRAL_ENDPOINT;
import static org.apache.hadoop.fs.s3a.Constants.FIPS_ENDPOINT;
@ -330,7 +332,6 @@ private <BuilderT extends S3BaseClientBuilder<BuilderT, ClientT>, ClientT> void
builder.endpointOverride(endpoint);
LOG.debug("Setting endpoint to {}", endpoint);
} else {
builder.crossRegionAccessEnabled(true);
origin = "central endpoint with cross region access";
LOG.debug("Enabling cross region access for endpoint {}",
endpointStr);
@ -343,7 +344,6 @@ private <BuilderT extends S3BaseClientBuilder<BuilderT, ClientT>, ClientT> void
// no region is configured, and none could be determined from the endpoint.
// Use US_EAST_2 as default.
region = Region.of(AWS_S3_DEFAULT_REGION);
builder.crossRegionAccessEnabled(true);
builder.region(region);
origin = "cross region access fallback";
} else if (configuredRegion.isEmpty()) {
@ -354,8 +354,14 @@ private <BuilderT extends S3BaseClientBuilder<BuilderT, ClientT>, ClientT> void
LOG.debug(SDK_REGION_CHAIN_IN_USE);
origin = "SDK region chain";
}
LOG.debug("Setting region to {} from {}", region, origin);
boolean isCrossRegionAccessEnabled = conf.getBoolean(AWS_S3_CROSS_REGION_ACCESS_ENABLED,
AWS_S3_CROSS_REGION_ACCESS_ENABLED_DEFAULT);
// s3 cross region access
if (isCrossRegionAccessEnabled) {
builder.crossRegionAccessEnabled(true);
}
LOG.debug("Setting region to {} from {} with cross region access {}",
region, origin, isCrossRegionAccessEnabled);
}
/**

View File

@ -48,6 +48,16 @@ There are multiple ways to connect to an S3 bucket
The S3A connector supports all these; S3 Endpoints are the primary mechanism used -either explicitly declared or automatically determined from the declared region of the bucket.
The S3A connector supports S3 cross region access via AWS SDK which is enabled by default. This allows users to access S3 buckets in a different region than the one defined in the S3 endpoint/region configuration, as long as they are within the same AWS partition. However, S3 cross-region access can be disabled by:
```xml
<property>
<name>fs.s3a.cross.region.access.enabled</name>
<value>false</value>
<description>S3 cross region access</description>
</property>
```
Not supported:
* AWS [Snowball](https://aws.amazon.com/snowball/).

View File

@ -439,6 +439,7 @@ public void testCustomUserAgent() throws Exception {
@Test
public void testRequestTimeout() throws Exception {
conf = new Configuration();
skipIfCrossRegionClient(conf);
// remove the safety check on minimum durations.
AWSClientConfig.setMinimumOperationDuration(Duration.ZERO);
try {
@ -632,8 +633,8 @@ public static boolean isSTSSignerCalled() {
*/
private static void skipIfCrossRegionClient(
Configuration configuration) {
if (configuration.get(ENDPOINT, null) == null
&& configuration.get(AWS_REGION, null) == null) {
if (configuration.getBoolean(AWS_S3_CROSS_REGION_ACCESS_ENABLED,
AWS_S3_CROSS_REGION_ACCESS_ENABLED_DEFAULT)) {
skip("Skipping test as cross region client is in use ");
}
}

View File

@ -44,8 +44,10 @@
import org.apache.hadoop.fs.s3a.statistics.impl.EmptyS3AStatisticsContext;
import org.apache.hadoop.fs.s3a.test.PublicDatasetTestUtils;
import static org.apache.hadoop.fs.contract.ContractTestUtils.skip;
import static org.apache.hadoop.fs.s3a.Constants.ALLOW_REQUESTER_PAYS;
import static org.apache.hadoop.fs.s3a.Constants.AWS_REGION;
import static org.apache.hadoop.fs.s3a.Constants.AWS_S3_CROSS_REGION_ACCESS_ENABLED;
import static org.apache.hadoop.fs.s3a.Constants.CENTRAL_ENDPOINT;
import static org.apache.hadoop.fs.s3a.Constants.ENDPOINT;
import static org.apache.hadoop.fs.s3a.Constants.FIPS_ENDPOINT;
@ -71,6 +73,8 @@ public class ITestS3AEndpointRegion extends AbstractS3ATestBase {
private static final String US_WEST_2 = "us-west-2";
private static final String SA_EAST_1 = "sa-east-1";
private static final String EU_WEST_2 = "eu-west-2";
private static final String CN_NORTHWEST_1 = "cn-northwest-1";
@ -346,6 +350,41 @@ public void testCentralEndpointAndDifferentRegionThanBucket() throws Throwable {
assertRequesterPaysFileExistence(newConf);
}
@Test
public void testWithOutCrossRegionAccess() throws Exception {
describe("Verify cross region access fails when disabled");
// skip the test if the region is sa-east-1
skipCrossRegionTest();
final Configuration newConf = new Configuration(getConfiguration());
// disable cross region access
newConf.setBoolean(AWS_S3_CROSS_REGION_ACCESS_ENABLED, false);
newConf.set(AWS_REGION, SA_EAST_1);
try (S3AFileSystem fs = new S3AFileSystem()) {
fs.initialize(getFileSystem().getUri(), newConf);
intercept(AWSRedirectException.class,
"does not match the AWS region containing the bucket",
() -> fs.exists(getFileSystem().getWorkingDirectory()));
}
}
@Test
public void testWithCrossRegionAccess() throws Exception {
describe("Verify cross region access succeed when enabled");
// skip the test if the region is sa-east-1
skipCrossRegionTest();
final Configuration newConf = new Configuration(getConfiguration());
removeBaseAndBucketOverrides(newConf,
AWS_S3_CROSS_REGION_ACCESS_ENABLED,
AWS_REGION);
// enable cross region access
newConf.setBoolean(AWS_S3_CROSS_REGION_ACCESS_ENABLED, true);
newConf.set(AWS_REGION, SA_EAST_1);
try (S3AFileSystem fs = new S3AFileSystem()) {
fs.initialize(getFileSystem().getUri(), newConf);
fs.exists(getFileSystem().getWorkingDirectory());
}
}
@Test
public void testCentralEndpointAndSameRegionAsBucket() throws Throwable {
describe("Access public bucket using central endpoint and region "
@ -478,6 +517,16 @@ public void testCentralEndpointAndNullRegionFipsWithCRUD() throws Throwable {
assertOpsUsingNewFs();
}
/**
* Skip the test if the region is null or sa-east-1.
*/
private void skipCrossRegionTest() throws IOException {
String region = getFileSystem().getS3AInternals().getBucketMetadata().bucketRegion();
if (region == null || SA_EAST_1.equals(region)) {
skip("Skipping test since region is null or it is set to sa-east-1");
}
}
private void assertOpsUsingNewFs() throws IOException {
final String file = getMethodName();
final Path basePath = methodPath();