From 1f302e83fd93366544ccbe2bc5ee2de305e65cb6 Mon Sep 17 00:00:00 2001 From: Shintaro Onuma <31045635+shintaroonuma@users.noreply.github.com> Date: Thu, 5 Sep 2024 14:14:04 +0100 Subject: [PATCH] HADOOP-18938. S3A: Fix endpoint region parsing for vpc endpoints. (#6466) Contributed by Shintaro Onuma --- .../hadoop/fs/s3a/DefaultS3ClientFactory.java | 16 ++++++- .../hadoop/fs/s3a/ITestS3AEndpointRegion.java | 13 +++++- .../hadoop/fs/s3a/TestS3AEndpointParsing.java | 43 +++++++++++++++++++ 3 files changed, 70 insertions(+), 2 deletions(-) create mode 100644 hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AEndpointParsing.java diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java index ba9fc080c2..c52454ac15 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java @@ -21,6 +21,8 @@ import java.io.IOException; import java.net.URI; import java.net.URISyntaxException; +import java.util.regex.Matcher; +import java.util.regex.Pattern; import org.apache.hadoop.classification.VisibleForTesting; import org.apache.hadoop.fs.s3a.impl.AWSClientConfig; @@ -85,6 +87,9 @@ public class DefaultS3ClientFactory extends Configured private static final String S3_SERVICE_NAME = "s3"; + private static final Pattern VPC_ENDPOINT_PATTERN = + Pattern.compile("^(?:.+\\.)?([a-z0-9-]+)\\.vpce\\.amazonaws\\.(?:com|com\\.cn)$"); + /** * Subclasses refer to this. */ @@ -390,10 +395,19 @@ private static URI getS3Endpoint(String endpoint, final Configuration conf) { * @param endpointEndsWithCentral true if the endpoint is configured as central. * @return the S3 region, null if unable to resolve from endpoint. */ - private static Region getS3RegionFromEndpoint(final String endpoint, + @VisibleForTesting + static Region getS3RegionFromEndpoint(final String endpoint, final boolean endpointEndsWithCentral) { if (!endpointEndsWithCentral) { + // S3 VPC endpoint parsing + Matcher matcher = VPC_ENDPOINT_PATTERN.matcher(endpoint); + if (matcher.find()) { + LOG.debug("Mapping to VPCE"); + LOG.debug("Endpoint {} is vpc endpoint; parsing region as {}", endpoint, matcher.group(1)); + return Region.of(matcher.group(1)); + } + LOG.debug("Endpoint {} is not the default; parsing", endpoint); return AwsHostNameUtils.parseSigningRegion(endpoint, S3_SERVICE_NAME).orElse(null); } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEndpointRegion.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEndpointRegion.java index 8403b6bd6c..d06224df5b 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEndpointRegion.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEndpointRegion.java @@ -97,6 +97,8 @@ public class ITestS3AEndpointRegion extends AbstractS3ATestBase { private static final String VPC_ENDPOINT = "vpce-1a2b3c4d-5e6f.s3.us-west-2.vpce.amazonaws.com"; + private static final String CN_VPC_ENDPOINT = "vpce-1a2b3c4d-5e6f.s3.cn-northwest-1.vpce.amazonaws.com.cn"; + public static final String EXCEPTION_THROWN_BY_INTERCEPTOR = "Exception thrown by interceptor"; /** @@ -294,7 +296,6 @@ public void testWithGovCloudEndpoint() throws Throwable { } @Test - @Ignore("Pending HADOOP-18938. S3A region logic to handle vpce and non standard endpoints") public void testWithVPCE() throws Throwable { describe("Test with vpc endpoint"); Configuration conf = getConfiguration(); @@ -304,6 +305,16 @@ public void testWithVPCE() throws Throwable { expectInterceptorException(client); } + @Test + public void testWithChinaVPCE() throws Throwable { + describe("Test with china vpc endpoint"); + Configuration conf = getConfiguration(); + + S3Client client = createS3Client(conf, CN_VPC_ENDPOINT, null, CN_NORTHWEST_1, false); + + expectInterceptorException(client); + } + @Test public void testCentralEndpointAndDifferentRegionThanBucket() throws Throwable { describe("Access public bucket using central endpoint and region " diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AEndpointParsing.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AEndpointParsing.java new file mode 100644 index 0000000000..8a77c102ac --- /dev/null +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AEndpointParsing.java @@ -0,0 +1,43 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.s3a; + +import org.assertj.core.api.Assertions; +import org.junit.Test; +import software.amazon.awssdk.regions.Region; + +public class TestS3AEndpointParsing extends AbstractS3AMockTest { + + private static final String VPC_ENDPOINT = "vpce-1a2b3c4d-5e6f.s3.us-west-2.vpce.amazonaws.com"; + private static final String NON_VPC_ENDPOINT = "s3.eu-west-1.amazonaws.com"; + private static final String US_WEST_2 = "us-west-2"; + private static final String EU_WEST_1 = "eu-west-1"; + + @Test + public void testVPCEndpoint() { + Region region = DefaultS3ClientFactory.getS3RegionFromEndpoint(VPC_ENDPOINT, false); + Assertions.assertThat(region).isEqualTo(Region.of(US_WEST_2)); + } + + @Test + public void testNonVPCEndpoint() { + Region region = DefaultS3ClientFactory.getS3RegionFromEndpoint(NON_VPC_ENDPOINT, false); + Assertions.assertThat(region).isEqualTo(Region.of(EU_WEST_1)); + } +}