diff --git a/dev-support/bin/hadoop.sh b/dev-support/bin/hadoop.sh index beebea8c97..526b9d5a78 100755 --- a/dev-support/bin/hadoop.sh +++ b/dev-support/bin/hadoop.sh @@ -512,7 +512,7 @@ function shadedclient_initialize maven_add_install shadedclient } -## @description build client facing shaded artifacts and test them +## @description build client facing shaded and non-shaded artifacts and test them ## @audience private ## @stability evolving ## @param repostatus @@ -545,13 +545,20 @@ function shadedclient_rebuild return 0 fi - big_console_header "Checking client artifacts on ${repostatus}" + big_console_header "Checking client artifacts on ${repostatus} with shaded clients" echo_and_redirect "${logfile}" \ "${MAVEN}" "${MAVEN_ARGS[@]}" verify -fae --batch-mode -am \ "${modules[@]}" \ -Dtest=NoUnitTests -Dmaven.javadoc.skip=true -Dcheckstyle.skip=true -Dspotbugs.skip=true + big_console_header "Checking client artifacts on ${repostatus} with non-shaded clients" + + echo_and_redirect "${logfile}" \ + "${MAVEN}" "${MAVEN_ARGS[@]}" verify -fae --batch-mode -am \ + "${modules[@]}" \ + -Pnoshade -Dtest=NoUnitTests -Dmaven.javadoc.skip=true -Dcheckstyle.skip=true -Dspotbugs.skip=true + count=$("${GREP}" -c '\[ERROR\]' "${logfile}") if [[ ${count} -gt 0 ]]; then add_vote_table -1 shadedclient "${repostatus} has errors when building and testing our client artifacts." diff --git a/hadoop-client-modules/hadoop-client-api/pom.xml b/hadoop-client-modules/hadoop-client-api/pom.xml index 9a40aa3647..55e6cbe115 100644 --- a/hadoop-client-modules/hadoop-client-api/pom.xml +++ b/hadoop-client-modules/hadoop-client-api/pom.xml @@ -67,6 +67,13 @@ + + + org.xerial.snappy + snappy-java + @@ -109,6 +116,10 @@ org.apache.hadoop:* + + + org.xerial.snappy:* + @@ -149,6 +160,9 @@ org/xml/sax/**/* org/bouncycastle/* org/bouncycastle/**/* + + org/xerial/snappy/* + org/xerial/snappy/**/* @@ -225,6 +239,9 @@ net/topology/* net/topology/**/* + + net/jpountz/* + net/jpountz/**/* diff --git a/hadoop-client-modules/hadoop-client-check-invariants/pom.xml b/hadoop-client-modules/hadoop-client-check-invariants/pom.xml index 4bd256d4ef..187a1a36a0 100644 --- a/hadoop-client-modules/hadoop-client-check-invariants/pom.xml +++ b/hadoop-client-modules/hadoop-client-check-invariants/pom.xml @@ -92,6 +92,8 @@ com.google.code.findbugs:jsr305 org.bouncycastle:* + + org.xerial.snappy:* diff --git a/hadoop-client-modules/hadoop-client-check-invariants/src/test/resources/ensure-jars-have-correct-contents.sh b/hadoop-client-modules/hadoop-client-check-invariants/src/test/resources/ensure-jars-have-correct-contents.sh index 7242ade356..2e927402d2 100644 --- a/hadoop-client-modules/hadoop-client-check-invariants/src/test/resources/ensure-jars-have-correct-contents.sh +++ b/hadoop-client-modules/hadoop-client-check-invariants/src/test/resources/ensure-jars-have-correct-contents.sh @@ -67,6 +67,8 @@ allowed_expr+="|^krb5_udp-template.conf$" # Jetty uses this style sheet for directory listings. TODO ensure our # internal use of jetty disallows directory listings and remove this. allowed_expr+="|^jetty-dir.css$" +# Snappy java is native library. We cannot relocate it to under org/apache/hadoop. +allowed_expr+="|^org/xerial/" allowed_expr+=")" declare -i bad_artifacts=0 diff --git a/hadoop-client-modules/hadoop-client-check-test-invariants/pom.xml b/hadoop-client-modules/hadoop-client-check-test-invariants/pom.xml index b2a957e67f..7f75d1bd83 100644 --- a/hadoop-client-modules/hadoop-client-check-test-invariants/pom.xml +++ b/hadoop-client-modules/hadoop-client-check-test-invariants/pom.xml @@ -100,6 +100,8 @@ com.google.code.findbugs:jsr305 org.bouncycastle:* + + org.xerial.snappy:* diff --git a/hadoop-client-modules/hadoop-client-integration-tests/pom.xml b/hadoop-client-modules/hadoop-client-integration-tests/pom.xml index 4b1c86d6ee..350c903ff0 100644 --- a/hadoop-client-modules/hadoop-client-integration-tests/pom.xml +++ b/hadoop-client-modules/hadoop-client-integration-tests/pom.xml @@ -52,6 +52,11 @@ junit test + + org.lz4 + lz4-java + test + diff --git a/hadoop-client-modules/hadoop-client-integration-tests/src/test/java/org/apache/hadoop/example/ITUseHadoopCodecs.java b/hadoop-client-modules/hadoop-client-integration-tests/src/test/java/org/apache/hadoop/example/ITUseHadoopCodecs.java new file mode 100644 index 0000000000..fd0effa143 --- /dev/null +++ b/hadoop-client-modules/hadoop-client-integration-tests/src/test/java/org/apache/hadoop/example/ITUseHadoopCodecs.java @@ -0,0 +1,144 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +package org.apache.hadoop.example; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; + +import java.io.*; +import java.util.Arrays; +import java.util.Random; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.CommonConfigurationKeys; +import org.apache.hadoop.io.DataInputBuffer; +import org.apache.hadoop.io.DataOutputBuffer; +import org.apache.hadoop.io.RandomDatum; +import org.apache.hadoop.io.compress.CompressionCodec; +import org.apache.hadoop.io.compress.CompressionInputStream; +import org.apache.hadoop.io.compress.CompressionOutputStream; +import org.apache.hadoop.io.compress.zlib.ZlibFactory; +import org.apache.hadoop.util.ReflectionUtils; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Ensure that we can perform codec operations given the API and runtime jars + * by performing some simple smoke tests. + */ +public class ITUseHadoopCodecs { + + private static final Logger LOG = LoggerFactory.getLogger(ITUseHadoopCodecs.class); + + private Configuration haddopConf = new Configuration(); + private int dataCount = 100; + private int dataSeed = new Random().nextInt(); + + @Test + public void testGzipCodec() throws IOException { + ZlibFactory.setNativeZlibLoaded(false); + assertFalse(ZlibFactory.isNativeZlibLoaded(haddopConf)); + codecTest(haddopConf, dataSeed, 0, "org.apache.hadoop.io.compress.GzipCodec"); + codecTest(haddopConf, dataSeed, dataCount, "org.apache.hadoop.io.compress.GzipCodec"); + } + + @Test + public void testSnappyCodec() throws IOException { + codecTest(haddopConf, dataSeed, 0, "org.apache.hadoop.io.compress.SnappyCodec"); + codecTest(haddopConf, dataSeed, dataCount, "org.apache.hadoop.io.compress.SnappyCodec"); + } + + @Test + public void testLz4Codec() { + Arrays.asList(false, true).forEach(config -> { + haddopConf.setBoolean( + CommonConfigurationKeys.IO_COMPRESSION_CODEC_LZ4_USELZ4HC_KEY, + config); + try { + codecTest(haddopConf, dataSeed, 0, "org.apache.hadoop.io.compress.Lz4Codec"); + codecTest(haddopConf, dataSeed, dataCount, "org.apache.hadoop.io.compress.Lz4Codec"); + } catch (IOException e) { + throw new RuntimeException("failed when running codecTest", e); + } + }); + } + + private void codecTest(Configuration conf, int seed, int count, String codecClass) + throws IOException { + + // Create the codec + CompressionCodec codec = null; + try { + codec = (CompressionCodec) + ReflectionUtils.newInstance(conf.getClassByName(codecClass), conf); + } catch (ClassNotFoundException cnfe) { + throw new IOException("Illegal codec!"); + } + LOG.info("Created a Codec object of type: " + codecClass); + + // Generate data + DataOutputBuffer data = new DataOutputBuffer(); + RandomDatum.Generator generator = new RandomDatum.Generator(seed); + for(int i = 0; i < count; ++i) { + generator.next(); + RandomDatum key = generator.getKey(); + RandomDatum value = generator.getValue(); + + key.write(data); + value.write(data); + } + LOG.info("Generated " + count + " records"); + + // Compress data + DataOutputBuffer compressedDataBuffer = new DataOutputBuffer(); + try (CompressionOutputStream deflateFilter = + codec.createOutputStream(compressedDataBuffer); + DataOutputStream deflateOut = + new DataOutputStream(new BufferedOutputStream(deflateFilter))) { + deflateOut.write(data.getData(), 0, data.getLength()); + deflateOut.flush(); + deflateFilter.finish(); + } + + // De-compress data + DataInputBuffer deCompressedDataBuffer = new DataInputBuffer(); + deCompressedDataBuffer.reset(compressedDataBuffer.getData(), 0, + compressedDataBuffer.getLength()); + DataInputBuffer originalData = new DataInputBuffer(); + originalData.reset(data.getData(), 0, data.getLength()); + try (CompressionInputStream inflateFilter = + codec.createInputStream(deCompressedDataBuffer); + DataInputStream originalIn = + new DataInputStream(new BufferedInputStream(originalData))) { + + // Check + int expected; + do { + expected = originalIn.read(); + assertEquals("Inflated stream read by byte does not match", + expected, inflateFilter.read()); + } while (expected != -1); + } + + LOG.info("SUCCESS! Completed checking " + count + " records"); + } +} diff --git a/hadoop-client-modules/hadoop-client-minicluster/pom.xml b/hadoop-client-modules/hadoop-client-minicluster/pom.xml index 4314cab417..c9b2011055 100644 --- a/hadoop-client-modules/hadoop-client-minicluster/pom.xml +++ b/hadoop-client-modules/hadoop-client-minicluster/pom.xml @@ -40,6 +40,12 @@ hadoop-client-api runtime + + + org.xerial.snappy + snappy-java + runtime + org.apache.hadoop hadoop-client-runtime @@ -683,6 +689,8 @@ org.bouncycastle:* + + org.xerial.snappy:* @@ -886,6 +894,9 @@ org/xml/sax/**/* org/bouncycastle/* org/bouncycastle/**/* + + org/xerial/snappy/* + org/xerial/snappy/**/* @@ -1004,6 +1015,9 @@ net/topology/* net/topology/**/* + + net/jpountz/* + net/jpountz/**/* diff --git a/hadoop-client-modules/hadoop-client-runtime/pom.xml b/hadoop-client-modules/hadoop-client-runtime/pom.xml index eeaaf40742..80dff3303e 100644 --- a/hadoop-client-modules/hadoop-client-runtime/pom.xml +++ b/hadoop-client-modules/hadoop-client-runtime/pom.xml @@ -60,6 +60,12 @@ hadoop-client-api runtime + + + org.xerial.snappy + snappy-java + runtime + @@ -163,6 +169,8 @@ org.ow2.asm:* org.bouncycastle:* + + org.xerial.snappy:* @@ -269,6 +277,9 @@ org/xml/sax/**/* org/bouncycastle/* org/bouncycastle/**/* + + org/xerial/snappy/* + org/xerial/snappy/**/* @@ -359,6 +370,9 @@ net/topology/* net/topology/**/* + + net/jpountz/* + net/jpountz/**/*