From d3b98cb1b23841a57b966c5cedab312687f098cb Mon Sep 17 00:00:00 2001 From: HarshitGupta11 <50410275+HarshitGupta11@users.noreply.github.com> Date: Mon, 24 Jun 2024 15:11:11 +0530 Subject: [PATCH] HADOOP-19194:Add test to find unshaded dependencies in the aws sdk (#6865) The new test TestAWSV2SDK scans the aws sdk bundle.jar and prints out all classes which are unshaded, so at risk of creating classpath problems It does not fail the test if this holds, because the current SDKs do ship with unshaded classes; the test would always fail. The SDK upgrade process should include inspecting the output of this test to see if it has got worse (do a before/after check). Once the AWS SDK does shade everything, we can have this test fail on any regression Contributed by Harshit Gupta --- .../site/markdown/tools/hadoop-aws/testing.md | 1 + .../apache/hadoop/fs/sdk/TestAWSV2SDK.java | 94 +++++++++++++++++++ 2 files changed, 95 insertions(+) create mode 100644 hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/sdk/TestAWSV2SDK.java diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/testing.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/testing.md index 45d1c84765..7222eee98b 100644 --- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/testing.md +++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/testing.md @@ -1184,6 +1184,7 @@ your IDE or via maven. 1. Run a full AWS-test suite with S3 client-side encryption enabled by setting `fs.s3a.encryption.algorithm` to 'CSE-KMS' and setting up AWS-KMS Key ID in `fs.s3a.encryption.key`. +2. Verify that the output of test `TestAWSV2SDK` doesn't contain any unshaded classes. The dependency chain of the `hadoop-aws` module should be similar to this, albeit with different version numbers: diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/sdk/TestAWSV2SDK.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/sdk/TestAWSV2SDK.java new file mode 100644 index 0000000000..fca9fcc300 --- /dev/null +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/sdk/TestAWSV2SDK.java @@ -0,0 +1,94 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.sdk; + +import java.io.File; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Enumeration; +import java.util.List; +import java.util.jar.JarEntry; +import java.util.jar.JarFile; + +import org.junit.Test; +import org.apache.hadoop.test.AbstractHadoopTestBase; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import static org.assertj.core.api.Assertions.assertThat; + +/** + * Tests to verify AWS SDK based issues like duplicated shaded classes and others. + */ +public class TestAWSV2SDK extends AbstractHadoopTestBase { + + private static final Logger LOG = LoggerFactory.getLogger(TestAWSV2SDK.class.getName()); + + @Test + public void testShadedClasses() throws IOException { + String allClassPath = System.getProperty("java.class.path"); + LOG.debug("Current classpath:{}", allClassPath); + String[] classPaths = allClassPath.split(File.pathSeparator); + String v2ClassPath = null; + for (String classPath : classPaths) { + //Checking for only version 2.x sdk here + if (classPath.contains("awssdk/bundle/2")) { + v2ClassPath = classPath; + break; + } + } + LOG.debug("AWS SDK V2 Classpath:{}", v2ClassPath); + assertThat(v2ClassPath) + .as("AWS V2 SDK should be present on the classpath").isNotNull(); + List listOfV2SdkClasses = getClassNamesFromJarFile(v2ClassPath); + String awsSdkPrefix = "software/amazon/awssdk"; + List unshadedClasses = new ArrayList<>(); + for (String awsSdkClass : listOfV2SdkClasses) { + if (!awsSdkClass.startsWith(awsSdkPrefix)) { + unshadedClasses.add(awsSdkClass); + } + } + if (!unshadedClasses.isEmpty()) { + LOG.warn("Unshaded Classes Found :{}", unshadedClasses.size()); + LOG.warn("List of unshaded classes:{}", unshadedClasses); + } else { + LOG.info("No Unshaded classes found in the sdk."); + } + } + + /** + * Returns the list of classes in a jar file. + * @param jarFilePath: the location of the jar file from absolute path + * @return a list of classes contained by the jar file + * @throws IOException if the file is not present or the path is not readable + */ + private List getClassNamesFromJarFile(String jarFilePath) throws IOException { + List classNames = new ArrayList<>(); + try (JarFile jarFile = new JarFile(new File(jarFilePath))) { + Enumeration jarEntryEnumeration = jarFile.entries(); + while (jarEntryEnumeration.hasMoreElements()) { + JarEntry jarEntry = jarEntryEnumeration.nextElement(); + if (jarEntry.getName().endsWith(".class")) { + classNames.add(jarEntry.getName()); + } + } + } + return classNames; + } +}