From 5e7ce26e66ef94fac6b377fb8976cd01cfda86bb Mon Sep 17 00:00:00 2001 From: Petre Bogdan Stolojan Date: Fri, 4 Feb 2022 15:37:08 +0000 Subject: [PATCH] HADOOP-18085. S3 SDK Upgrade causes AccessPoint ARN endpoint mistranslation (#3902) Part of HADOOP-17198. Support S3 Access Points. HADOOP-18068. "upgrade AWS SDK to 1.12.132" broke the access point endpoint translation. Correct endpoints should start with "s3-accesspoint.", after SDK upgrade they start with "s3.accesspoint-" which messes up tests + region detection by the SDK. Contributed by Bogdan Stolojan --- .../org/apache/hadoop/fs/s3a/ArnResource.java | 5 +- .../apache/hadoop/fs/s3a/TestArnResource.java | 53 +++++++++++++------ 2 files changed, 39 insertions(+), 19 deletions(-) diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/ArnResource.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/ArnResource.java index 7c866ac967..0294f77229 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/ArnResource.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/ArnResource.java @@ -21,12 +21,12 @@ import javax.annotation.Nonnull; import com.amazonaws.arn.Arn; -import com.amazonaws.regions.RegionUtils; /** * Represents an Arn Resource, this can be an accesspoint or bucket. */ public final class ArnResource { + private final static String ACCESSPOINT_ENDPOINT_FORMAT = "s3-accesspoint.%s.amazonaws.com"; /** * Resource name. @@ -106,8 +106,7 @@ public String getFullArn() { * @return resource endpoint. */ public String getEndpoint() { - return RegionUtils.getRegion(accessPointRegionKey) - .getServiceEndpoint("s3"); + return String.format(ACCESSPOINT_ENDPOINT_FORMAT, region); } /** diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestArnResource.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestArnResource.java index 5ac47752ec..36381bf14b 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestArnResource.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestArnResource.java @@ -39,39 +39,43 @@ public class TestArnResource extends HadoopTestBase { private final static Logger LOG = LoggerFactory.getLogger(TestArnResource.class); + private final static String MOCK_ACCOUNT = "123456789101"; + @Test public void parseAccessPointFromArn() throws IllegalArgumentException { describe("Parse AccessPoint ArnResource from arn string"); String accessPoint = "testAp"; - String accountId = "123456789101"; String[][] regionPartitionEndpoints = new String[][] { - {Regions.EU_WEST_1.getName(), "aws", "eu-west-1.amazonaws.com"}, - {Regions.US_GOV_EAST_1.getName(), "aws-us-gov", - "us-gov-east-1.amazonaws.com"}, - {Regions.CN_NORTH_1.getName(), "aws-cn", "cn-north-1.amazonaws.com"}, + {Regions.EU_WEST_1.getName(), "aws"}, + {Regions.US_GOV_EAST_1.getName(), "aws-us-gov"}, + {Regions.CN_NORTH_1.getName(), "aws-cn"}, }; for (String[] testPair : regionPartitionEndpoints) { String region = testPair[0]; String partition = testPair[1]; - String endpoint = testPair[2]; - // arn:partition:service:region:account-id:resource-type/resource-id - String arn = String.format("arn:%s:s3:%s:%s:accesspoint/%s", partition, region, accountId, - accessPoint); - - ArnResource resource = ArnResource.accessPointFromArn(arn); - assertEquals("Arn does not match", arn, resource.getFullArn()); + ArnResource resource = getArnResourceFrom(partition, region, MOCK_ACCOUNT, accessPoint); assertEquals("Access Point name does not match", accessPoint, resource.getName()); - assertEquals("Account Id does not match", accountId, resource.getOwnerAccountId()); + assertEquals("Account Id does not match", MOCK_ACCOUNT, resource.getOwnerAccountId()); assertEquals("Region does not match", region, resource.getRegion()); - Assertions.assertThat(resource.getEndpoint()) - .describedAs("Endpoint does not match") - .contains(endpoint); } } + @Test + public void makeSureEndpointHasTheCorrectFormat() { + // Access point (AP) endpoints are different from S3 bucket endpoints, thus when using APs the + // endpoints for the client are modified. This test makes sure endpoint is set up correctly. + ArnResource accessPoint = getArnResourceFrom("aws", "eu-west-1", MOCK_ACCOUNT, + "test"); + String expected = "s3-accesspoint.eu-west-1.amazonaws.com"; + + Assertions.assertThat(accessPoint.getEndpoint()) + .describedAs("Endpoint has invalid format. Access Point requests will not work") + .isEqualTo(expected); + } + @Test public void invalidARNsMustThrow() throws Exception { describe("Using an invalid ARN format must throw when initializing an ArnResource."); @@ -80,6 +84,23 @@ public void invalidARNsMustThrow() throws Exception { ArnResource.accessPointFromArn("invalid:arn:resource")); } + /** + * Create an {@link ArnResource} from string components + * @param partition - partition for ARN + * @param region - region for ARN + * @param accountId - accountId for ARN + * @param resourceName - ARN resource name + * @return ArnResource described by its properties + */ + private ArnResource getArnResourceFrom(String partition, String region, String accountId, + String resourceName) { + // arn:partition:service:region:account-id:resource-type/resource-id + String arn = String.format("arn:%s:s3:%s:%s:accesspoint/%s", partition, region, accountId, + resourceName); + + return ArnResource.accessPointFromArn(arn); + } + private void describe(String message) { LOG.info(message); }