From c3f35c422bbb7fe9c8e6509063896de549b127d1 Mon Sep 17 00:00:00 2001 From: Andrew Wang Date: Wed, 13 Sep 2017 16:57:50 -0700 Subject: [PATCH] HADOOP-14089. Automated checking for malformed client. Contributed by Sean Busbey. --- .../hadoop-client-api/pom.xml | 27 ++++++ .../hadoop-client-check-invariants/pom.xml | 74 ++++++++++++++++- .../ensure-jars-have-correct-contents.sh | 82 +++++++++++++++++++ .../pom.xml | 76 ++++++++++++++++- .../ensure-jars-have-correct-contents.sh | 70 ++++++++++++++++ .../hadoop-client-minicluster/pom.xml | 61 ++++++++++++-- .../hadoop-client-runtime/pom.xml | 50 +++++++++++ .../hadoop-mapreduce-client-shuffle/pom.xml | 7 ++ 8 files changed, 438 insertions(+), 9 deletions(-) create mode 100644 hadoop-client-modules/hadoop-client-check-invariants/src/test/resources/ensure-jars-have-correct-contents.sh create mode 100644 hadoop-client-modules/hadoop-client-check-test-invariants/src/test/resources/ensure-jars-have-correct-contents.sh diff --git a/hadoop-client-modules/hadoop-client-api/pom.xml b/hadoop-client-modules/hadoop-client-api/pom.xml index a50ffedd8a..edbddc9ff1 100644 --- a/hadoop-client-modules/hadoop-client-api/pom.xml +++ b/hadoop-client-modules/hadoop-client-api/pom.xml @@ -182,6 +182,21 @@ io/serializations + + + javax/el/ + ${shaded.dependency.prefix}.javax.el. + + **/pom.xml + + + + javax/cache/ + ${shaded.dependency.prefix}.javax.cache. + + **/pom.xml + + javax/servlet/ ${shaded.dependency.prefix}.javax.servlet. @@ -189,6 +204,13 @@ **/pom.xml + + javax/ws/ + ${shaded.dependency.prefix}.javax.ws. + + **/pom.xml + + net/ ${shaded.dependency.prefix}.net. @@ -199,6 +221,11 @@ net/topology/**/* + + + okio/ + ${shaded.dependency.prefix}.okio. + diff --git a/hadoop-client-modules/hadoop-client-check-invariants/pom.xml b/hadoop-client-modules/hadoop-client-check-invariants/pom.xml index 75f4d198a3..c6a8552765 100644 --- a/hadoop-client-modules/hadoop-client-check-invariants/pom.xml +++ b/hadoop-client-modules/hadoop-client-check-invariants/pom.xml @@ -25,7 +25,13 @@ 3.1.0-SNAPSHOT pom - Enforces our invariants for the api and runtime client modules. + + Enforces our invariants for the api and runtime client modules. + E.g. that modules have a specific set of transitive dependencies + and shaded artifacts only contain classes that are in particular + packages. Does the enforcement through the maven-enforcer-plugin + and an integration test. + Apache Hadoop Client Packaging Invariants @@ -82,6 +88,8 @@ commons-logging:commons-logging log4j:log4j + + com.google.code.findbugs:jsr305 @@ -97,7 +105,6 @@ - + + put-client-artifacts-in-a-property + pre-integration-test + + build-classpath + + + true + hadoop-client-artifacts + + + + + + + org.codehaus.mojo + exec-maven-plugin + + + check-jar-contents + integration-test + + exec + + + ${shell-executable} + ${project.build.testOutputDirectory} + false + + ensure-jars-have-correct-contents.sh + ${hadoop-client-artifacts} + + + + + diff --git a/hadoop-client-modules/hadoop-client-check-invariants/src/test/resources/ensure-jars-have-correct-contents.sh b/hadoop-client-modules/hadoop-client-check-invariants/src/test/resources/ensure-jars-have-correct-contents.sh new file mode 100644 index 0000000000..84efe7e08c --- /dev/null +++ b/hadoop-client-modules/hadoop-client-check-invariants/src/test/resources/ensure-jars-have-correct-contents.sh @@ -0,0 +1,82 @@ +#!/usr/bin/env bash +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Usage: $0 [/path/to/some/example.jar:/path/to/another/example/created.jar] +# +# accepts a single command line argument with a colon separated list of +# paths to jars to check. Iterates through each such passed jar and checks +# all the contained paths to make sure they follow the below constructed +# safe list. + +# we have to allow the directories that lead to the org/apache/hadoop dir +allowed_expr="(^org/$|^org/apache/$" +# We allow the following things to exist in our client artifacts: +# * classes in packages that start with org.apache.hadoop, which by +# convention should be in a path that looks like org/apache/hadoop +allowed_expr+="|^org/apache/hadoop/" +# * whatever in the "META-INF" directory +allowed_expr+="|^META-INF/" +# * whatever under the "webapps" directory; for things shipped by yarn +allowed_expr+="|^webapps/" +# * Hadoop's default configuration files, which have the form +# "_module_-default.xml" +allowed_expr+="|^[^-]*-default.xml$" +# * Hadoop's versioning properties files, which have the form +# "_module_-version-info.properties" +allowed_expr+="|^[^-]*-version-info.properties$" +# * Hadoop's application classloader properties file. +allowed_expr+="|^org.apache.hadoop.application-classloader.properties$" +# public suffix list used by httpcomponents +allowed_expr+="|^mozilla/$" +allowed_expr+="|^mozilla/public-suffix-list.txt$" +# Comes from commons-configuration, not sure if relocatable. +allowed_expr+="|^properties.dtd$" +allowed_expr+="|^PropertyList-1.0.dtd$" +# Comes from Ehcache, not relocatable at top level due to limitation +# of shade plugin AFAICT +allowed_expr+="|^ehcache-core.xsd$" +allowed_expr+="|^ehcache-107ext.xsd$" +# Comes from kerby's kerb-simplekdc, not relocatable since at top level +allowed_expr+="|^krb5-template.conf$" +allowed_expr+="|^krb5_udp-template.conf$" +# Jetty uses this style sheet for directory listings. TODO ensure our +# internal use of jetty disallows directory listings and remove this. +allowed_expr+="|^jetty-dir.css$" + +allowed_expr+=")" +declare -i bad_artifacts=0 +declare -a bad_contents +IFS=: read -r -d '' -a artifact_list < <(printf '%s\0' "$1") +for artifact in "${artifact_list[@]}"; do + bad_contents=($(jar tf "${artifact}" | grep -v -E "${allowed_expr}")) + if [ ${#bad_contents[@]} -gt 0 ]; then + echo "[ERROR] Found artifact with unexpected contents: '${artifact}'" + echo " Please check the following and either correct the build or update" + echo " the allowed list with reasoning." + echo "" + for bad_line in "${bad_contents[@]}"; do + echo " ${bad_line}" + done + bad_artifacts=${bad_artifacts}+1 + else + echo "[INFO] Artifact looks correct: '$(basename "${artifact}")'" + fi +done + +if [ "${bad_artifacts}" -gt 0 ]; then + exit 1 +fi diff --git a/hadoop-client-modules/hadoop-client-check-test-invariants/pom.xml b/hadoop-client-modules/hadoop-client-check-test-invariants/pom.xml index d0d071279b..691b545599 100644 --- a/hadoop-client-modules/hadoop-client-check-test-invariants/pom.xml +++ b/hadoop-client-modules/hadoop-client-check-test-invariants/pom.xml @@ -25,7 +25,13 @@ 3.1.0-SNAPSHOT pom - Enforces our invariants for the testing client modules. + + Enforces our invariants for the test client modules. + E.g. that modules have a specific set of transitive dependencies + and shaded artifacts only contain classes that are in particular + packages. Does the enforcement through the maven-enforcer-plugin + and an integration test. + Apache Hadoop Client Packaging Invariants for Test @@ -90,6 +96,8 @@ junit:junit org.hamcrest:hamcrest-core + + com.google.code.findbugs:jsr305 @@ -105,7 +113,6 @@ - + + org.apache.maven.plugins + maven-dependency-plugin + + + put-client-artifacts-in-a-property + pre-integration-test + + build-classpath + + + + hadoop-client-api,hadoop-client-runtime + true + hadoop-client-artifacts + + + + + + + org.codehaus.mojo + exec-maven-plugin + + + check-jar-contents + integration-test + + exec + + + ${shell-executable} + ${project.build.testOutputDirectory} + false + + ensure-jars-have-correct-contents.sh + ${hadoop-client-artifacts} + + + + + diff --git a/hadoop-client-modules/hadoop-client-check-test-invariants/src/test/resources/ensure-jars-have-correct-contents.sh b/hadoop-client-modules/hadoop-client-check-test-invariants/src/test/resources/ensure-jars-have-correct-contents.sh new file mode 100644 index 0000000000..fb9f4f920b --- /dev/null +++ b/hadoop-client-modules/hadoop-client-check-test-invariants/src/test/resources/ensure-jars-have-correct-contents.sh @@ -0,0 +1,70 @@ +#!/usr/bin/env bash +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Usage: $0 [/path/to/some/example.jar:/path/to/another/example/created.jar] +# +# accepts a single command line argument with a colon separated list of +# paths to jars to check. Iterates through each such passed jar and checks +# all the contained paths to make sure they follow the below constructed +# safe list. + +# we have to allow the directories that lead to the org/apache/hadoop dir +allowed_expr="(^org/$|^org/apache/$" +# We allow the following things to exist in our client artifacts: +# * classes in packages that start with org.apache.hadoop, which by +# convention should be in a path that looks like org/apache/hadoop +allowed_expr+="|^org/apache/hadoop/" +# * whatever in the "META-INF" directory +allowed_expr+="|^META-INF/" +# * whatever under the "webapps" directory; for minicluster UIs +allowed_expr+="|^webapps/" +# * Hadoop's default configuration files, which have the form +# "_module_-default.xml" +allowed_expr+="|^[^-]*-default.xml$" +# * Hadoop's versioning properties files, which have the form +# "_module_-version-info.properties" +allowed_expr+="|^[^-]*-version-info.properties$" +# * Hadoop's application classloader properties file. +allowed_expr+="|^org.apache.hadoop.application-classloader.properties$" +# * Used by JavaSandboxLinuxContainerRuntime as a default, loaded +# from root, so can't relocate. :( +allowed_expr+="|^java.policy$" + + +allowed_expr+=")" +declare -i bad_artifacts=0 +declare -a bad_contents +IFS=: read -r -d '' -a artifact_list < <(printf '%s\0' "$1") +for artifact in "${artifact_list[@]}"; do + bad_contents=($(jar tf "${artifact}" | grep -v -E "${allowed_expr}")) + if [ ${#bad_contents[@]} -gt 0 ]; then + echo "[ERROR] Found artifact with unexpected contents: '${artifact}'" + echo " Please check the following and either correct the build or update" + echo " the allowed list with reasoning." + echo "" + for bad_line in "${bad_contents[@]}"; do + echo " ${bad_line}" + done + bad_artifacts=${bad_artifacts}+1 + else + echo "[INFO] Artifact looks correct: '$(basename "${artifact}")'" + fi +done + +if [ "${bad_artifacts}" -gt 0 ]; then + exit 1 +fi diff --git a/hadoop-client-modules/hadoop-client-minicluster/pom.xml b/hadoop-client-modules/hadoop-client-minicluster/pom.xml index 681cb1c84e..0c8786697c 100644 --- a/hadoop-client-modules/hadoop-client-minicluster/pom.xml +++ b/hadoop-client-modules/hadoop-client-minicluster/pom.xml @@ -348,11 +348,6 @@ jersey-servlet true - - org.eclipse.jdt - core - true - net.sf.kosmosfs @@ -595,6 +590,7 @@ org.slf4j:slf4j-api commons-logging:commons-logging junit:junit + com.google.code.findbugs:jsr305 org.apache.hadoop:hadoop-yarn-server-timelineservice log4j:log4j @@ -656,6 +652,41 @@ org/hamcrest/*.class + + + org.glassfish.grizzly:grizzly-http-servlet + + catalog.cat + javaee_5.xsd + javaee_6.xsd + javaee_web_services_client_1_2.xsd + javaee_web_services_client_1_3.xsd + jsp_2_1.xsd + jsp_2_2.xsd + web-app_2_5.xsd + web-app_3_0.xsd + web-common_3_0.xsd + xml.xsd + + + + + org.eclipse.jetty:* + + about.html + + + + org.apache.hadoop:* + + + log4j.properties + container-log4j.properties + + capacity-scheduler.xml + krb5.conf + + @@ -738,6 +769,7 @@ **/pom.xml + javax/el/ ${shaded.dependency.prefix}.javax.el. @@ -745,6 +777,13 @@ **/pom.xml + + javax/cache/ + ${shaded.dependency.prefix}.javax.cache. + + **/pom.xml + + javax/inject/ ${shaded.dependency.prefix}.javax.inject. @@ -759,6 +798,13 @@ **/pom.xml + + javax/ws/ + ${shaded.dependency.prefix}.javax.ws. + + **/pom.xml + + jersey/ ${shaded.dependency.prefix}.jersey. @@ -776,6 +822,11 @@ net/topology/**/* + + + okio/ + ${shaded.dependency.prefix}.okio. + diff --git a/hadoop-client-modules/hadoop-client-runtime/pom.xml b/hadoop-client-modules/hadoop-client-runtime/pom.xml index 6879a23f08..7ed5ba7c07 100644 --- a/hadoop-client-modules/hadoop-client-runtime/pom.xml +++ b/hadoop-client-modules/hadoop-client-runtime/pom.xml @@ -94,6 +94,11 @@ commons-logging runtime + + com.google.code.findbugs + jsr305 + runtime + @@ -149,6 +154,9 @@ commons-logging:commons-logging log4j:log4j + + + com.google.code.findbugs:jsr305 @@ -181,6 +189,28 @@ META-INF/services/javax.* + + + org.apache.commons:commons-math3 + + assets/org/apache/commons/math3/**/* + + + + + org.eclipse.jetty:* + + about.html + + + + + org.apache.kerby:kerb-util + + keytab.txt + ccache.txt + + @@ -245,6 +275,7 @@ io/serializations + javax/el/ ${shaded.dependency.prefix}.javax.el. @@ -252,6 +283,13 @@ **/pom.xml + + javax/cache/ + ${shaded.dependency.prefix}.javax.cache. + + **/pom.xml + + javax/servlet/ ${shaded.dependency.prefix}.javax.servlet. @@ -259,6 +297,13 @@ **/pom.xml + + javax/ws/ + ${shaded.dependency.prefix}.javax.ws. + + **/pom.xml + + net/ ${shaded.dependency.prefix}.net. @@ -269,6 +314,11 @@ net/topology/**/* + + + okio/ + ${shaded.dependency.prefix}.okio. + + + + com.microsoft.sqlserver + mssql-jdbc + + org.apache.hadoop