HADOOP-18938. S3A: Fix endpoint region parsing for vpc endpoints. (#6466)

Contributed by Shintaro Onuma
This commit is contained in:
Shintaro Onuma 2024-09-05 14:14:04 +01:00 committed by GitHub
parent 6c01490f14
commit 1f302e83fd
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 70 additions and 2 deletions

View File

@ -21,6 +21,8 @@
import java.io.IOException; import java.io.IOException;
import java.net.URI; import java.net.URI;
import java.net.URISyntaxException; import java.net.URISyntaxException;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.hadoop.classification.VisibleForTesting; import org.apache.hadoop.classification.VisibleForTesting;
import org.apache.hadoop.fs.s3a.impl.AWSClientConfig; import org.apache.hadoop.fs.s3a.impl.AWSClientConfig;
@ -85,6 +87,9 @@ public class DefaultS3ClientFactory extends Configured
private static final String S3_SERVICE_NAME = "s3"; private static final String S3_SERVICE_NAME = "s3";
private static final Pattern VPC_ENDPOINT_PATTERN =
Pattern.compile("^(?:.+\\.)?([a-z0-9-]+)\\.vpce\\.amazonaws\\.(?:com|com\\.cn)$");
/** /**
* Subclasses refer to this. * Subclasses refer to this.
*/ */
@ -390,10 +395,19 @@ private static URI getS3Endpoint(String endpoint, final Configuration conf) {
* @param endpointEndsWithCentral true if the endpoint is configured as central. * @param endpointEndsWithCentral true if the endpoint is configured as central.
* @return the S3 region, null if unable to resolve from endpoint. * @return the S3 region, null if unable to resolve from endpoint.
*/ */
private static Region getS3RegionFromEndpoint(final String endpoint, @VisibleForTesting
static Region getS3RegionFromEndpoint(final String endpoint,
final boolean endpointEndsWithCentral) { final boolean endpointEndsWithCentral) {
if (!endpointEndsWithCentral) { if (!endpointEndsWithCentral) {
// S3 VPC endpoint parsing
Matcher matcher = VPC_ENDPOINT_PATTERN.matcher(endpoint);
if (matcher.find()) {
LOG.debug("Mapping to VPCE");
LOG.debug("Endpoint {} is vpc endpoint; parsing region as {}", endpoint, matcher.group(1));
return Region.of(matcher.group(1));
}
LOG.debug("Endpoint {} is not the default; parsing", endpoint); LOG.debug("Endpoint {} is not the default; parsing", endpoint);
return AwsHostNameUtils.parseSigningRegion(endpoint, S3_SERVICE_NAME).orElse(null); return AwsHostNameUtils.parseSigningRegion(endpoint, S3_SERVICE_NAME).orElse(null);
} }

View File

@ -97,6 +97,8 @@ public class ITestS3AEndpointRegion extends AbstractS3ATestBase {
private static final String VPC_ENDPOINT = "vpce-1a2b3c4d-5e6f.s3.us-west-2.vpce.amazonaws.com"; private static final String VPC_ENDPOINT = "vpce-1a2b3c4d-5e6f.s3.us-west-2.vpce.amazonaws.com";
private static final String CN_VPC_ENDPOINT = "vpce-1a2b3c4d-5e6f.s3.cn-northwest-1.vpce.amazonaws.com.cn";
public static final String EXCEPTION_THROWN_BY_INTERCEPTOR = "Exception thrown by interceptor"; public static final String EXCEPTION_THROWN_BY_INTERCEPTOR = "Exception thrown by interceptor";
/** /**
@ -294,7 +296,6 @@ public void testWithGovCloudEndpoint() throws Throwable {
} }
@Test @Test
@Ignore("Pending HADOOP-18938. S3A region logic to handle vpce and non standard endpoints")
public void testWithVPCE() throws Throwable { public void testWithVPCE() throws Throwable {
describe("Test with vpc endpoint"); describe("Test with vpc endpoint");
Configuration conf = getConfiguration(); Configuration conf = getConfiguration();
@ -304,6 +305,16 @@ public void testWithVPCE() throws Throwable {
expectInterceptorException(client); expectInterceptorException(client);
} }
@Test
public void testWithChinaVPCE() throws Throwable {
describe("Test with china vpc endpoint");
Configuration conf = getConfiguration();
S3Client client = createS3Client(conf, CN_VPC_ENDPOINT, null, CN_NORTHWEST_1, false);
expectInterceptorException(client);
}
@Test @Test
public void testCentralEndpointAndDifferentRegionThanBucket() throws Throwable { public void testCentralEndpointAndDifferentRegionThanBucket() throws Throwable {
describe("Access public bucket using central endpoint and region " describe("Access public bucket using central endpoint and region "

View File

@ -0,0 +1,43 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs.s3a;
import org.assertj.core.api.Assertions;
import org.junit.Test;
import software.amazon.awssdk.regions.Region;
public class TestS3AEndpointParsing extends AbstractS3AMockTest {
private static final String VPC_ENDPOINT = "vpce-1a2b3c4d-5e6f.s3.us-west-2.vpce.amazonaws.com";
private static final String NON_VPC_ENDPOINT = "s3.eu-west-1.amazonaws.com";
private static final String US_WEST_2 = "us-west-2";
private static final String EU_WEST_1 = "eu-west-1";
@Test
public void testVPCEndpoint() {
Region region = DefaultS3ClientFactory.getS3RegionFromEndpoint(VPC_ENDPOINT, false);
Assertions.assertThat(region).isEqualTo(Region.of(US_WEST_2));
}
@Test
public void testNonVPCEndpoint() {
Region region = DefaultS3ClientFactory.getS3RegionFromEndpoint(NON_VPC_ENDPOINT, false);
Assertions.assertThat(region).isEqualTo(Region.of(EU_WEST_1));
}
}