From 8f60a90688ea38e268ef6d6f12e4a52e0de018ec Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Tue, 6 Oct 2020 09:07:54 -0700 Subject: [PATCH] HADOOP-17125. Use snappy-java in SnappyCodec (#2297) This switches the SnappyCodec to use the java-snappy codec, rather than the native one. To use the codec, snappy-java.jar (from org.xerial.snappy) needs to be on the classpath. This comesin as an avro dependency, so it is already on the hadoop-common classpath, as well as in hadoop-common/lib. The version used is now managed in the hadoop-project POM; initially 1.1.7.7 Contributed by DB Tsai and Liang-Chi Hsieh Change-Id: Id52a404a0005480e68917cd17f0a27b7744aea4e --- BUILDING.txt | 5 +- dev-support/bin/dist-copynativelibs | 5 - hadoop-common-project/hadoop-common/pom.xml | 22 +-- .../hadoop-common/src/CMakeLists.txt | 29 --- .../hadoop/io/compress/Decompressor.java | 2 +- .../hadoop/io/compress/SnappyCodec.java | 40 +---- .../io/compress/snappy/SnappyCompressor.java | 39 ++-- .../compress/snappy/SnappyDecompressor.java | 42 ++--- .../apache/hadoop/util/NativeCodeLoader.java | 5 - .../hadoop/util/NativeLibraryChecker.java | 11 +- .../src/main/native/native.vcxproj | 24 +-- .../io/compress/snappy/SnappyCompressor.c | 166 ------------------ .../io/compress/snappy/SnappyDecompressor.c | 133 -------------- .../org_apache_hadoop_io_compress_snappy.h | 33 ---- .../src/site/markdown/NativeLibraries.md.vm | 3 +- .../io/compress/CompressDecompressTester.java | 30 +--- .../apache/hadoop/io/compress/TestCodec.java | 8 +- .../TestSnappyCompressorDecompressor.java | 58 +++++- .../hadoop/util/TestNativeCodeLoader.java | 4 - .../hadoop-mapreduce-client-jobclient/pom.xml | 1 + hadoop-project-dist/pom.xml | 2 - hadoop-project/pom.xml | 8 +- 22 files changed, 109 insertions(+), 561 deletions(-) delete mode 100644 hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/compress/snappy/SnappyCompressor.c delete mode 100644 hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/compress/snappy/SnappyDecompressor.c delete mode 100644 hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/compress/snappy/org_apache_hadoop_io_compress_snappy.h diff --git a/BUILDING.txt b/BUILDING.txt index d54ce83183..fa5127e017 100644 --- a/BUILDING.txt +++ b/BUILDING.txt @@ -75,7 +75,7 @@ Installing required packages for clean install of Ubuntu 14.04 LTS Desktop: Optional packages: -* Snappy compression +* Snappy compression (only used for hadoop-mapreduce-client-nativetask) $ sudo apt-get install snappy libsnappy-dev * Intel ISA-L library for erasure coding Please refer to https://01.org/intel%C2%AE-storage-acceleration-library-open-source-version @@ -161,7 +161,8 @@ Maven build goals: Snappy is a compression library that can be utilized by the native code. It is currently an optional component, meaning that Hadoop can be built with - or without this dependency. + or without this dependency. Snappy library as optional dependency is only + used for hadoop-mapreduce-client-nativetask. * Use -Drequire.snappy to fail the build if libsnappy.so is not found. If this option is not specified and the snappy library is missing, diff --git a/dev-support/bin/dist-copynativelibs b/dev-support/bin/dist-copynativelibs index ffc82b8fb1..7f2b6ad1f5 100755 --- a/dev-support/bin/dist-copynativelibs +++ b/dev-support/bin/dist-copynativelibs @@ -111,9 +111,6 @@ for i in "$@"; do --openssllibbundle=*) OPENSSLLIBBUNDLE=${i#*=} ;; - --snappybinbundle=*) - SNAPPYBINBUNDLE=${i#*=} - ;; --snappylib=*) SNAPPYLIB=${i#*=} ;; @@ -176,8 +173,6 @@ if [[ -d "${BIN_DIR}" ]] ; then exit 1 fi - bundle_native_bin "${SNAPPYBINBUNDLE}" "${SNAPPYLIBBUNDLE}" "snappy.lib" "snappy" "${SNAPPYLIB}" - bundle_native_bin "${ZSTDBINBUNDLE}" "${ZSTDLIBBUNDLE}" "zstd.lib" "zstd" "${ZSTDLIB}" bundle_native_bin "${OPENSSLBINBUNDLE}" "${OPENSSLLIBBUNDLE}" "openssl.lib" "crypto" "${OPENSSLLIB}" diff --git a/hadoop-common-project/hadoop-common/pom.xml b/hadoop-common-project/hadoop-common/pom.xml index c22222d8a3..6cab2db8ac 100644 --- a/hadoop-common-project/hadoop-common/pom.xml +++ b/hadoop-common-project/hadoop-common/pom.xml @@ -362,6 +362,11 @@ wildfly-openssl-java provided + + org.xerial.snappy + snappy-java + compile + @@ -641,10 +646,6 @@ false - - - - false @@ -698,11 +699,7 @@ ${project.build.directory}/native/javah ${sun.arch.data.model} ${require.bzip2} - ${require.snappy} ${require.zstd} - ${snappy.prefix} - ${snappy.lib} - ${snappy.include} ${zstd.prefix} ${zstd.lib} ${zstd.include} @@ -757,14 +754,9 @@ - - - false - false - true @@ -864,10 +856,6 @@ /nologo /p:Configuration=Release /p:OutDir=${project.build.directory}/bin/ - /p:CustomSnappyPrefix=${snappy.prefix} - /p:CustomSnappyLib=${snappy.lib} - /p:CustomSnappyInclude=${snappy.include} - /p:RequireSnappy=${require.snappy} /p:CustomZstdPrefix=${zstd.prefix} /p:CustomZstdLib=${zstd.lib} /p:CustomZstdInclude=${zstd.include} diff --git a/hadoop-common-project/hadoop-common/src/CMakeLists.txt b/hadoop-common-project/hadoop-common/src/CMakeLists.txt index 10591f6ce2..71c950b7f1 100644 --- a/hadoop-common-project/hadoop-common/src/CMakeLists.txt +++ b/hadoop-common-project/hadoop-common/src/CMakeLists.txt @@ -67,33 +67,6 @@ else() endif() set(CMAKE_FIND_LIBRARY_SUFFIXES ${STORED_CMAKE_FIND_LIBRARY_SUFFIXES}) -# Require snappy. -set(STORED_CMAKE_FIND_LIBRARY_SUFFIXES ${CMAKE_FIND_LIBRARY_SUFFIXES}) -hadoop_set_find_shared_library_version("1") -find_library(SNAPPY_LIBRARY - NAMES snappy - PATHS ${CUSTOM_SNAPPY_PREFIX} ${CUSTOM_SNAPPY_PREFIX}/lib - ${CUSTOM_SNAPPY_PREFIX}/lib64 ${CUSTOM_SNAPPY_LIB}) -set(CMAKE_FIND_LIBRARY_SUFFIXES ${STORED_CMAKE_FIND_LIBRARY_SUFFIXES}) -find_path(SNAPPY_INCLUDE_DIR - NAMES snappy.h - PATHS ${CUSTOM_SNAPPY_PREFIX} ${CUSTOM_SNAPPY_PREFIX}/include - ${CUSTOM_SNAPPY_INCLUDE}) -if(SNAPPY_LIBRARY AND SNAPPY_INCLUDE_DIR) - get_filename_component(HADOOP_SNAPPY_LIBRARY ${SNAPPY_LIBRARY} NAME) - set(SNAPPY_SOURCE_FILES - "${SRC}/io/compress/snappy/SnappyCompressor.c" - "${SRC}/io/compress/snappy/SnappyDecompressor.c") - set(REQUIRE_SNAPPY ${REQUIRE_SNAPPY}) # Stop warning about unused variable. - message(STATUS "Found Snappy: ${SNAPPY_LIBRARY}") -else() - set(SNAPPY_INCLUDE_DIR "") - set(SNAPPY_SOURCE_FILES "") - if(REQUIRE_SNAPPY) - message(FATAL_ERROR "Required snappy library could not be found. SNAPPY_LIBRARY=${SNAPPY_LIBRARY}, SNAPPY_INCLUDE_DIR=${SNAPPY_INCLUDE_DIR}, CUSTOM_SNAPPY_INCLUDE_DIR=${CUSTOM_SNAPPY_INCLUDE_DIR}, CUSTOM_SNAPPY_PREFIX=${CUSTOM_SNAPPY_PREFIX}, CUSTOM_SNAPPY_INCLUDE=${CUSTOM_SNAPPY_INCLUDE}") - endif() -endif() - # Require zstandard SET(STORED_CMAKE_FIND_LIBRARY_SUFFIXES ${CMAKE_FIND_LIBRARY_SUFFIXES}) hadoop_set_find_shared_library_version("1") @@ -253,7 +226,6 @@ include_directories( ${JNI_INCLUDE_DIRS} ${ZLIB_INCLUDE_DIRS} ${BZIP2_INCLUDE_DIR} - ${SNAPPY_INCLUDE_DIR} ${ISAL_INCLUDE_DIR} ${ZSTD_INCLUDE_DIR} ${OPENSSL_INCLUDE_DIR} @@ -269,7 +241,6 @@ hadoop_add_dual_library(hadoop ${SRC}/io/compress/lz4/lz4.c ${SRC}/io/compress/lz4/lz4hc.c ${ISAL_SOURCE_FILES} - ${SNAPPY_SOURCE_FILES} ${ZSTD_SOURCE_FILES} ${OPENSSL_SOURCE_FILES} ${SRC}/io/compress/zlib/ZlibCompressor.c diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/Decompressor.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/Decompressor.java index 3808003de2..e9558fab87 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/Decompressor.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/Decompressor.java @@ -92,7 +92,7 @@ public interface Decompressor { * {@link #needsInput()} should be called in order to determine if more * input data is required. * - * @param b Buffer for the compressed data + * @param b Buffer for the uncompressed data * @param off Start offset of the data * @param len Size of the buffer * @return The actual number of bytes of uncompressed data. diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/SnappyCodec.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/SnappyCodec.java index 686f30c9f8..77cf36a339 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/SnappyCodec.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/SnappyCodec.java @@ -28,7 +28,6 @@ import org.apache.hadoop.io.compress.snappy.SnappyCompressor; import org.apache.hadoop.io.compress.snappy.SnappyDecompressor; import org.apache.hadoop.io.compress.snappy.SnappyDecompressor.SnappyDirectDecompressor; import org.apache.hadoop.fs.CommonConfigurationKeys; -import org.apache.hadoop.util.NativeCodeLoader; /** * This class creates snappy compressors/decompressors. @@ -56,37 +55,6 @@ public class SnappyCodec implements Configurable, CompressionCodec, DirectDecomp return conf; } - /** - * Are the native snappy libraries loaded & initialized? - */ - public static void checkNativeCodeLoaded() { - if (!NativeCodeLoader.buildSupportsSnappy()) { - throw new RuntimeException("native snappy library not available: " + - "this version of libhadoop was built without " + - "snappy support."); - } - if (!NativeCodeLoader.isNativeCodeLoaded()) { - throw new RuntimeException("Failed to load libhadoop."); - } - if (!SnappyCompressor.isNativeCodeLoaded()) { - throw new RuntimeException("native snappy library not available: " + - "SnappyCompressor has not been loaded."); - } - if (!SnappyDecompressor.isNativeCodeLoaded()) { - throw new RuntimeException("native snappy library not available: " + - "SnappyDecompressor has not been loaded."); - } - } - - public static boolean isNativeCodeLoaded() { - return SnappyCompressor.isNativeCodeLoaded() && - SnappyDecompressor.isNativeCodeLoaded(); - } - - public static String getLibraryName() { - return SnappyCompressor.getLibraryName(); - } - /** * Create a {@link CompressionOutputStream} that will write to the given * {@link OutputStream}. @@ -115,7 +83,6 @@ public class SnappyCodec implements Configurable, CompressionCodec, DirectDecomp public CompressionOutputStream createOutputStream(OutputStream out, Compressor compressor) throws IOException { - checkNativeCodeLoaded(); int bufferSize = conf.getInt( CommonConfigurationKeys.IO_COMPRESSION_CODEC_SNAPPY_BUFFERSIZE_KEY, CommonConfigurationKeys.IO_COMPRESSION_CODEC_SNAPPY_BUFFERSIZE_DEFAULT); @@ -133,7 +100,6 @@ public class SnappyCodec implements Configurable, CompressionCodec, DirectDecomp */ @Override public Class getCompressorType() { - checkNativeCodeLoaded(); return SnappyCompressor.class; } @@ -144,7 +110,6 @@ public class SnappyCodec implements Configurable, CompressionCodec, DirectDecomp */ @Override public Compressor createCompressor() { - checkNativeCodeLoaded(); int bufferSize = conf.getInt( CommonConfigurationKeys.IO_COMPRESSION_CODEC_SNAPPY_BUFFERSIZE_KEY, CommonConfigurationKeys.IO_COMPRESSION_CODEC_SNAPPY_BUFFERSIZE_DEFAULT); @@ -179,7 +144,6 @@ public class SnappyCodec implements Configurable, CompressionCodec, DirectDecomp public CompressionInputStream createInputStream(InputStream in, Decompressor decompressor) throws IOException { - checkNativeCodeLoaded(); return new BlockDecompressorStream(in, decompressor, conf.getInt( CommonConfigurationKeys.IO_COMPRESSION_CODEC_SNAPPY_BUFFERSIZE_KEY, CommonConfigurationKeys.IO_COMPRESSION_CODEC_SNAPPY_BUFFERSIZE_DEFAULT)); @@ -192,7 +156,6 @@ public class SnappyCodec implements Configurable, CompressionCodec, DirectDecomp */ @Override public Class getDecompressorType() { - checkNativeCodeLoaded(); return SnappyDecompressor.class; } @@ -203,7 +166,6 @@ public class SnappyCodec implements Configurable, CompressionCodec, DirectDecomp */ @Override public Decompressor createDecompressor() { - checkNativeCodeLoaded(); int bufferSize = conf.getInt( CommonConfigurationKeys.IO_COMPRESSION_CODEC_SNAPPY_BUFFERSIZE_KEY, CommonConfigurationKeys.IO_COMPRESSION_CODEC_SNAPPY_BUFFERSIZE_DEFAULT); @@ -215,7 +177,7 @@ public class SnappyCodec implements Configurable, CompressionCodec, DirectDecomp */ @Override public DirectDecompressor createDirectDecompressor() { - return isNativeCodeLoaded() ? new SnappyDirectDecompressor() : null; + return new SnappyDirectDecompressor(); } /** diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/snappy/SnappyCompressor.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/snappy/SnappyCompressor.java index 3d386800e4..2d514705d1 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/snappy/SnappyCompressor.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/snappy/SnappyCompressor.java @@ -24,9 +24,9 @@ import java.nio.ByteBuffer; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.io.compress.Compressor; -import org.apache.hadoop.util.NativeCodeLoader; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.xerial.snappy.Snappy; /** * A {@link Compressor} based on the snappy compression algorithm. @@ -48,24 +48,6 @@ public class SnappyCompressor implements Compressor { private long bytesRead = 0L; private long bytesWritten = 0L; - private static boolean nativeSnappyLoaded = false; - - static { - if (NativeCodeLoader.isNativeCodeLoaded() && - NativeCodeLoader.buildSupportsSnappy()) { - try { - initIDs(); - nativeSnappyLoaded = true; - } catch (Throwable t) { - LOG.error("failed to load SnappyCompressor", t); - } - } - } - - public static boolean isNativeCodeLoaded() { - return nativeSnappyLoaded; - } - /** * Creates a new compressor. * @@ -225,7 +207,7 @@ public class SnappyCompressor implements Compressor { } // Compress data - n = compressBytesDirect(); + n = compressDirectBuf(); compressedDirectBuf.limit(n); uncompressedDirectBuf.clear(); // snappy consumes all buffer input @@ -291,9 +273,16 @@ public class SnappyCompressor implements Compressor { public void end() { } - private native static void initIDs(); - - private native int compressBytesDirect(); - - public native static String getLibraryName(); + private int compressDirectBuf() throws IOException { + if (uncompressedDirectBufLen == 0) { + return 0; + } else { + // Set the position and limit of `uncompressedDirectBuf` for reading + uncompressedDirectBuf.limit(uncompressedDirectBufLen).position(0); + int size = Snappy.compress((ByteBuffer) uncompressedDirectBuf, + (ByteBuffer) compressedDirectBuf); + uncompressedDirectBufLen = 0; + return size; + } + } } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/snappy/SnappyDecompressor.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/snappy/SnappyDecompressor.java index f31b76c347..d3775e286e 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/snappy/SnappyDecompressor.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/snappy/SnappyDecompressor.java @@ -24,9 +24,9 @@ import java.nio.ByteBuffer; import org.apache.hadoop.io.compress.Decompressor; import org.apache.hadoop.io.compress.DirectDecompressor; -import org.apache.hadoop.util.NativeCodeLoader; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.xerial.snappy.Snappy; /** * A {@link Decompressor} based on the snappy compression algorithm. @@ -45,24 +45,6 @@ public class SnappyDecompressor implements Decompressor { private int userBufOff = 0, userBufLen = 0; private boolean finished; - private static boolean nativeSnappyLoaded = false; - - static { - if (NativeCodeLoader.isNativeCodeLoaded() && - NativeCodeLoader.buildSupportsSnappy()) { - try { - initIDs(); - nativeSnappyLoaded = true; - } catch (Throwable t) { - LOG.error("failed to load SnappyDecompressor", t); - } - } - } - - public static boolean isNativeCodeLoaded() { - return nativeSnappyLoaded; - } - /** * Creates a new compressor. * @@ -201,7 +183,7 @@ public class SnappyDecompressor implements Decompressor { * {@link #needsInput()} should be called in order to determine if more * input data is required. * - * @param b Buffer for the compressed data + * @param b Buffer for the uncompressed data * @param off Start offset of the data * @param len Size of the buffer * @return The actual number of bytes of compressed data. @@ -232,7 +214,7 @@ public class SnappyDecompressor implements Decompressor { uncompressedDirectBuf.limit(directBufferSize); // Decompress data - n = decompressBytesDirect(); + n = decompressDirectBuf(); uncompressedDirectBuf.limit(n); if (userBufLen <= 0) { @@ -276,10 +258,20 @@ public class SnappyDecompressor implements Decompressor { // do nothing } - private native static void initIDs(); + private int decompressDirectBuf() throws IOException { + if (compressedDirectBufLen == 0) { + return 0; + } else { + // Set the position and limit of `compressedDirectBuf` for reading + compressedDirectBuf.limit(compressedDirectBufLen).position(0); + int size = Snappy.uncompress((ByteBuffer) compressedDirectBuf, + (ByteBuffer) uncompressedDirectBuf); + compressedDirectBufLen = 0; + compressedDirectBuf.clear(); + return size; + } + } - private native int decompressBytesDirect(); - int decompressDirect(ByteBuffer src, ByteBuffer dst) throws IOException { assert (this instanceof SnappyDirectDecompressor); @@ -298,7 +290,7 @@ public class SnappyDecompressor implements Decompressor { directBufferSize = dst.remaining(); int n = 0; try { - n = decompressBytesDirect(); + n = decompressDirectBuf(); presliced.position(presliced.position() + n); // SNAPPY always consumes the whole buffer or throws an exception src.position(src.limit()); diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/NativeCodeLoader.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/NativeCodeLoader.java index a8a380ed07..11d1176f92 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/NativeCodeLoader.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/NativeCodeLoader.java @@ -74,11 +74,6 @@ public final class NativeCodeLoader { return nativeCodeLoaded; } - /** - * Returns true only if this build was compiled with support for snappy. - */ - public static native boolean buildSupportsSnappy(); - /** * Returns true only if this build was compiled with support for ISA-L. */ diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/NativeLibraryChecker.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/NativeLibraryChecker.java index 2338824857..e40f01195b 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/NativeLibraryChecker.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/NativeLibraryChecker.java @@ -23,7 +23,6 @@ import org.apache.hadoop.io.erasurecode.ErasureCodeNative; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.crypto.OpensslCipher; import org.apache.hadoop.io.compress.Lz4Codec; -import org.apache.hadoop.io.compress.SnappyCodec; import org.apache.hadoop.io.compress.bzip2.Bzip2Factory; import org.apache.hadoop.io.compress.zlib.ZlibFactory; import org.apache.hadoop.classification.InterfaceAudience; @@ -67,7 +66,6 @@ public class NativeLibraryChecker { Configuration conf = new Configuration(); boolean nativeHadoopLoaded = NativeCodeLoader.isNativeCodeLoaded(); boolean zlibLoaded = false; - boolean snappyLoaded = false; boolean isalLoaded = false; boolean zStdLoaded = false; boolean pmdkLoaded = false; @@ -80,7 +78,6 @@ public class NativeLibraryChecker { String openSslDetail = ""; String hadoopLibraryName = ""; String zlibLibraryName = ""; - String snappyLibraryName = ""; String isalDetail = ""; String pmdkDetail = ""; String zstdLibraryName = ""; @@ -99,11 +96,6 @@ public class NativeLibraryChecker { if (zStdLoaded && NativeCodeLoader.buildSupportsZstd()) { zstdLibraryName = ZStandardCodec.getLibraryName(); } - snappyLoaded = NativeCodeLoader.buildSupportsSnappy() && - SnappyCodec.isNativeCodeLoaded(); - if (snappyLoaded && NativeCodeLoader.buildSupportsSnappy()) { - snappyLibraryName = SnappyCodec.getLibraryName(); - } isalDetail = ErasureCodeNative.getLoadingFailureReason(); if (isalDetail != null) { @@ -152,7 +144,6 @@ public class NativeLibraryChecker { System.out.printf("hadoop: %b %s%n", nativeHadoopLoaded, hadoopLibraryName); System.out.printf("zlib: %b %s%n", zlibLoaded, zlibLibraryName); System.out.printf("zstd : %b %s%n", zStdLoaded, zstdLibraryName); - System.out.printf("snappy: %b %s%n", snappyLoaded, snappyLibraryName); System.out.printf("lz4: %b %s%n", lz4Loaded, lz4LibraryName); System.out.printf("bzip2: %b %s%n", bzip2Loaded, bzip2LibraryName); System.out.printf("openssl: %b %s%n", openSslLoaded, openSslDetail); @@ -164,7 +155,7 @@ public class NativeLibraryChecker { } if ((!nativeHadoopLoaded) || (Shell.WINDOWS && (!winutilsExists)) || - (checkAll && !(zlibLoaded && snappyLoaded && lz4Loaded + (checkAll && !(zlibLoaded && lz4Loaded && bzip2Loaded && isalLoaded && zStdLoaded))) { // return 1 to indicated check failed ExitUtil.terminate(1); diff --git a/hadoop-common-project/hadoop-common/src/main/native/native.vcxproj b/hadoop-common-project/hadoop-common/src/main/native/native.vcxproj index ac3767b276..19b4d95e43 100644 --- a/hadoop-common-project/hadoop-common/src/main/native/native.vcxproj +++ b/hadoop-common-project/hadoop-common/src/main/native/native.vcxproj @@ -17,7 +17,7 @@ limitations under the License. --> - + Release @@ -69,15 +69,6 @@ hadoop - $(CustomSnappyPrefix) - $(CustomSnappyPrefix)\lib - $(CustomSnappyPrefix)\bin - $(CustomSnappyLib) - $(CustomSnappyPrefix) - $(CustomSnappyPrefix)\include - $(CustomSnappyInclude) - true - $(SnappyInclude);$(IncludePath) $(ZLIB_HOME);$(IncludePath) @@ -87,11 +78,6 @@ $(CustomIsalLib) true - - - - - /D HADOOP_SNAPPY_LIBRARY=L\"snappy.dll\" - - - /D HADOOP_SNAPPY_LIBRARY=L\"snappy.dll\" - @@ -157,7 +137,6 @@ - /D HADOOP_SNAPPY_LIBRARY=L\"snappy.dll\" /D HADOOP_ISAL_LIBRARY=L\"isa-l.dll\" @@ -181,7 +160,6 @@ - diff --git a/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/compress/snappy/SnappyCompressor.c b/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/compress/snappy/SnappyCompressor.c deleted file mode 100644 index 9a09f078d8..0000000000 --- a/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/compress/snappy/SnappyCompressor.c +++ /dev/null @@ -1,166 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -#include "org_apache_hadoop_io_compress_snappy.h" - -#if defined HADOOP_SNAPPY_LIBRARY - -#include -#include -#include - -#ifdef UNIX -#include -#include "config.h" -#endif // UNIX - -#ifdef WINDOWS -#include "winutils.h" -#endif - -#include "org_apache_hadoop_io_compress_snappy_SnappyCompressor.h" - -#define JINT_MAX 0x7fffffff - -static jfieldID SnappyCompressor_uncompressedDirectBuf; -static jfieldID SnappyCompressor_uncompressedDirectBufLen; -static jfieldID SnappyCompressor_compressedDirectBuf; -static jfieldID SnappyCompressor_directBufferSize; - -#ifdef UNIX -static snappy_status (*dlsym_snappy_compress)(const char*, size_t, char*, size_t*); -#endif - -#ifdef WINDOWS -typedef snappy_status (__cdecl *__dlsym_snappy_compress)(const char*, size_t, char*, size_t*); -static __dlsym_snappy_compress dlsym_snappy_compress; -#endif - -JNIEXPORT void JNICALL Java_org_apache_hadoop_io_compress_snappy_SnappyCompressor_initIDs -(JNIEnv *env, jclass clazz){ -#ifdef UNIX - // Load libsnappy.so - void *libsnappy = dlopen(HADOOP_SNAPPY_LIBRARY, RTLD_LAZY | RTLD_GLOBAL); - if (!libsnappy) { - char msg[1000]; - snprintf(msg, 1000, "%s (%s)!", "Cannot load " HADOOP_SNAPPY_LIBRARY, dlerror()); - THROW(env, "java/lang/UnsatisfiedLinkError", msg); - return; - } -#endif - -#ifdef WINDOWS - HMODULE libsnappy = LoadLibrary(HADOOP_SNAPPY_LIBRARY); - if (!libsnappy) { - THROW(env, "java/lang/UnsatisfiedLinkError", "Cannot load snappy.dll"); - return; - } -#endif - - // Locate the requisite symbols from libsnappy.so -#ifdef UNIX - dlerror(); // Clear any existing error - LOAD_DYNAMIC_SYMBOL(dlsym_snappy_compress, env, libsnappy, "snappy_compress"); -#endif - -#ifdef WINDOWS - LOAD_DYNAMIC_SYMBOL(__dlsym_snappy_compress, dlsym_snappy_compress, env, libsnappy, "snappy_compress"); -#endif - - SnappyCompressor_uncompressedDirectBuf = (*env)->GetFieldID(env, clazz, - "uncompressedDirectBuf", - "Ljava/nio/Buffer;"); - SnappyCompressor_uncompressedDirectBufLen = (*env)->GetFieldID(env, clazz, - "uncompressedDirectBufLen", "I"); - SnappyCompressor_compressedDirectBuf = (*env)->GetFieldID(env, clazz, - "compressedDirectBuf", - "Ljava/nio/Buffer;"); - SnappyCompressor_directBufferSize = (*env)->GetFieldID(env, clazz, - "directBufferSize", "I"); -} - -JNIEXPORT jint JNICALL Java_org_apache_hadoop_io_compress_snappy_SnappyCompressor_compressBytesDirect -(JNIEnv *env, jobject thisj){ - const char* uncompressed_bytes; - char* compressed_bytes; - snappy_status ret; - // Get members of SnappyCompressor - jobject uncompressed_direct_buf = (*env)->GetObjectField(env, thisj, SnappyCompressor_uncompressedDirectBuf); - jint uncompressed_direct_buf_len = (*env)->GetIntField(env, thisj, SnappyCompressor_uncompressedDirectBufLen); - jobject compressed_direct_buf = (*env)->GetObjectField(env, thisj, SnappyCompressor_compressedDirectBuf); - jint compressed_direct_buf_len = (*env)->GetIntField(env, thisj, SnappyCompressor_directBufferSize); - size_t buf_len; - - // Get the input direct buffer - uncompressed_bytes = (const char*)(*env)->GetDirectBufferAddress(env, uncompressed_direct_buf); - - if (uncompressed_bytes == 0) { - return (jint)0; - } - - // Get the output direct buffer - compressed_bytes = (char *)(*env)->GetDirectBufferAddress(env, compressed_direct_buf); - - if (compressed_bytes == 0) { - return (jint)0; - } - - /* size_t should always be 4 bytes or larger. */ - buf_len = (size_t)compressed_direct_buf_len; - ret = dlsym_snappy_compress(uncompressed_bytes, uncompressed_direct_buf_len, - compressed_bytes, &buf_len); - if (ret != SNAPPY_OK){ - THROW(env, "java/lang/InternalError", "Could not compress data. Buffer length is too small."); - return 0; - } - if (buf_len > JINT_MAX) { - THROW(env, "java/lang/InternalError", "Invalid return buffer length."); - return 0; - } - - (*env)->SetIntField(env, thisj, SnappyCompressor_uncompressedDirectBufLen, 0); - return (jint)buf_len; -} - -JNIEXPORT jstring JNICALL -Java_org_apache_hadoop_io_compress_snappy_SnappyCompressor_getLibraryName(JNIEnv *env, jclass class) { -#ifdef UNIX - if (dlsym_snappy_compress) { - Dl_info dl_info; - if(dladdr( - dlsym_snappy_compress, - &dl_info)) { - return (*env)->NewStringUTF(env, dl_info.dli_fname); - } - } - - return (*env)->NewStringUTF(env, HADOOP_SNAPPY_LIBRARY); -#endif - -#ifdef WINDOWS - LPWSTR filename = NULL; - GetLibraryName(dlsym_snappy_compress, &filename); - if (filename != NULL) { - return (*env)->NewString(env, filename, (jsize) wcslen(filename)); - } else { - return (*env)->NewStringUTF(env, "Unavailable"); - } -#endif -} -#endif //define HADOOP_SNAPPY_LIBRARY diff --git a/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/compress/snappy/SnappyDecompressor.c b/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/compress/snappy/SnappyDecompressor.c deleted file mode 100644 index 69ec101752..0000000000 --- a/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/compress/snappy/SnappyDecompressor.c +++ /dev/null @@ -1,133 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "org_apache_hadoop_io_compress_snappy.h" - -#if defined HADOOP_SNAPPY_LIBRARY - -#include -#include -#include - -#ifdef UNIX -#include -#include "config.h" -#endif - -#include "org_apache_hadoop_io_compress_snappy_SnappyDecompressor.h" - -static jfieldID SnappyDecompressor_compressedDirectBuf; -static jfieldID SnappyDecompressor_compressedDirectBufLen; -static jfieldID SnappyDecompressor_uncompressedDirectBuf; -static jfieldID SnappyDecompressor_directBufferSize; - -#ifdef UNIX -static snappy_status (*dlsym_snappy_uncompress)(const char*, size_t, char*, size_t*); -#endif - -#ifdef WINDOWS -typedef snappy_status (__cdecl *__dlsym_snappy_uncompress)(const char*, size_t, char*, size_t*); -static __dlsym_snappy_uncompress dlsym_snappy_uncompress; -#endif - -JNIEXPORT void JNICALL Java_org_apache_hadoop_io_compress_snappy_SnappyDecompressor_initIDs -(JNIEnv *env, jclass clazz){ - - // Load libsnappy.so -#ifdef UNIX - void *libsnappy = dlopen(HADOOP_SNAPPY_LIBRARY, RTLD_LAZY | RTLD_GLOBAL); - if (!libsnappy) { - char* msg = (char*)malloc(1000); - snprintf(msg, 1000, "%s (%s)!", "Cannot load " HADOOP_SNAPPY_LIBRARY, dlerror()); - THROW(env, "java/lang/UnsatisfiedLinkError", msg); - return; - } -#endif - -#ifdef WINDOWS - HMODULE libsnappy = LoadLibrary(HADOOP_SNAPPY_LIBRARY); - if (!libsnappy) { - THROW(env, "java/lang/UnsatisfiedLinkError", "Cannot load snappy.dll"); - return; - } -#endif - - // Locate the requisite symbols from libsnappy.so -#ifdef UNIX - dlerror(); // Clear any existing error - LOAD_DYNAMIC_SYMBOL(dlsym_snappy_uncompress, env, libsnappy, "snappy_uncompress"); - -#endif - -#ifdef WINDOWS - LOAD_DYNAMIC_SYMBOL(__dlsym_snappy_uncompress, dlsym_snappy_uncompress, env, libsnappy, "snappy_uncompress"); -#endif - - SnappyDecompressor_compressedDirectBuf = (*env)->GetFieldID(env,clazz, - "compressedDirectBuf", - "Ljava/nio/Buffer;"); - SnappyDecompressor_compressedDirectBufLen = (*env)->GetFieldID(env,clazz, - "compressedDirectBufLen", "I"); - SnappyDecompressor_uncompressedDirectBuf = (*env)->GetFieldID(env,clazz, - "uncompressedDirectBuf", - "Ljava/nio/Buffer;"); - SnappyDecompressor_directBufferSize = (*env)->GetFieldID(env, clazz, - "directBufferSize", "I"); -} - -JNIEXPORT jint JNICALL Java_org_apache_hadoop_io_compress_snappy_SnappyDecompressor_decompressBytesDirect -(JNIEnv *env, jobject thisj){ - const char* compressed_bytes = NULL; - char* uncompressed_bytes = NULL; - snappy_status ret; - // Get members of SnappyDecompressor - jobject compressed_direct_buf = (*env)->GetObjectField(env,thisj, SnappyDecompressor_compressedDirectBuf); - jint compressed_direct_buf_len = (*env)->GetIntField(env,thisj, SnappyDecompressor_compressedDirectBufLen); - jobject uncompressed_direct_buf = (*env)->GetObjectField(env,thisj, SnappyDecompressor_uncompressedDirectBuf); - size_t uncompressed_direct_buf_len = (*env)->GetIntField(env, thisj, SnappyDecompressor_directBufferSize); - - // Get the input direct buffer - compressed_bytes = (const char*)(*env)->GetDirectBufferAddress(env, compressed_direct_buf); - - if (compressed_bytes == 0) { - return (jint)0; - } - - // Get the output direct buffer - uncompressed_bytes = (char *)(*env)->GetDirectBufferAddress(env, uncompressed_direct_buf); - - if (uncompressed_bytes == 0) { - return (jint)0; - } - - ret = dlsym_snappy_uncompress(compressed_bytes, compressed_direct_buf_len, - uncompressed_bytes, &uncompressed_direct_buf_len); - if (ret == SNAPPY_BUFFER_TOO_SMALL){ - THROW(env, "java/lang/InternalError", "Could not decompress data. Buffer length is too small."); - } else if (ret == SNAPPY_INVALID_INPUT){ - THROW(env, "java/lang/InternalError", "Could not decompress data. Input is invalid."); - } else if (ret != SNAPPY_OK){ - THROW(env, "java/lang/InternalError", "Could not decompress data."); - } - - (*env)->SetIntField(env, thisj, SnappyDecompressor_compressedDirectBufLen, 0); - - return (jint)uncompressed_direct_buf_len; -} - -#endif //define HADOOP_SNAPPY_LIBRARY diff --git a/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/compress/snappy/org_apache_hadoop_io_compress_snappy.h b/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/compress/snappy/org_apache_hadoop_io_compress_snappy.h deleted file mode 100644 index 8394efe477..0000000000 --- a/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/compress/snappy/org_apache_hadoop_io_compress_snappy.h +++ /dev/null @@ -1,33 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -#ifndef ORG_APACHE_HADOOP_IO_COMPRESS_SNAPPY_SNAPPY_H -#define ORG_APACHE_HADOOP_IO_COMPRESS_SNAPPY_SNAPPY_H - -#include "org_apache_hadoop.h" - -#ifdef UNIX -#include -#endif - -#include -#include -#include - -#endif //ORG_APACHE_HADOOP_IO_COMPRESS_SNAPPY_SNAPPY_H diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/NativeLibraries.md.vm b/hadoop-common-project/hadoop-common/src/site/markdown/NativeLibraries.md.vm index e4f720cee8..1e62e94394 100644 --- a/hadoop-common-project/hadoop-common/src/site/markdown/NativeLibraries.md.vm +++ b/hadoop-common-project/hadoop-common/src/site/markdown/NativeLibraries.md.vm @@ -47,7 +47,7 @@ Components The native hadoop library includes various components: -* Compression Codecs (bzip2, lz4, snappy, zlib) +* Compression Codecs (bzip2, lz4, zlib) * Native IO utilities for [HDFS Short-Circuit Local Reads](../hadoop-hdfs/ShortCircuitLocalReads.html) and [Centralized Cache Management in HDFS](../hadoop-hdfs/CentralizedCacheManagement.html) * CRC32 checksum implementation @@ -117,7 +117,6 @@ NativeLibraryChecker is a tool to check whether native libraries are loaded corr Native library checking: hadoop: true /home/ozawa/hadoop/lib/native/libhadoop.so.1.0.0 zlib: true /lib/x86_64-linux-gnu/libz.so.1 - snappy: true /usr/lib/libsnappy.so.1 zstd: true /usr/lib/libzstd.so.1 lz4: true revision:99 bzip2: false diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/CompressDecompressTester.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/CompressDecompressTester.java index 8be2dce06d..8082e3ab0b 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/CompressDecompressTester.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/CompressDecompressTester.java @@ -79,27 +79,6 @@ public class CompressDecompressTester CompressDecompressTester of( byte[] rawData) { return new CompressDecompressTester(rawData); @@ -432,7 +411,7 @@ public class CompressDecompressTester 0) { + assertEquals(expected.get(), outBuf.get()); + } + outBuf.clear(); + } + } + outBuf.flip(); + while (outBuf.remaining() > 0) { + assertEquals(expected.get(), outBuf.get()); + } + outBuf.clear(); + assertEquals(0, expected.remaining()); + } } diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestNativeCodeLoader.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestNativeCodeLoader.java index 58874fdcdf..d3da6c1910 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestNativeCodeLoader.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestNativeCodeLoader.java @@ -22,7 +22,6 @@ import static org.junit.Assert.*; import org.apache.hadoop.crypto.OpensslCipher; import org.apache.hadoop.io.compress.Lz4Codec; -import org.apache.hadoop.io.compress.SnappyCodec; import org.apache.hadoop.io.compress.zlib.ZlibFactory; import org.apache.hadoop.util.NativeCodeLoader; import org.slf4j.Logger; @@ -52,9 +51,6 @@ public class TestNativeCodeLoader { // library names are depended on platform and build envs // so just check names are available assertFalse(ZlibFactory.getLibraryName().isEmpty()); - if (NativeCodeLoader.buildSupportsSnappy()) { - assertFalse(SnappyCodec.getLibraryName().isEmpty()); - } if (NativeCodeLoader.buildSupportsOpenssl()) { assertFalse(OpensslCipher.getLibraryName().isEmpty()); } diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/pom.xml b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/pom.xml index f0d8022847..5f6c838ece 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/pom.xml +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/pom.xml @@ -183,6 +183,7 @@ src/test/java/org/apache/hadoop/cli/data60bytes src/test/resources/job_1329348432655_0001-10.jhist + **/jobTokenPassword diff --git a/hadoop-project-dist/pom.xml b/hadoop-project-dist/pom.xml index 79a8964087..acb2855bbe 100644 --- a/hadoop-project-dist/pom.xml +++ b/hadoop-project-dist/pom.xml @@ -40,7 +40,6 @@ UNDEF false - false false false @@ -341,7 +340,6 @@ --openssllib=${openssl.lib} --opensslbinbundle=${bundle.openssl.in.bin} --openssllibbundle=${bundle.openssl} - --snappybinbundle=${bundle.snappy.in.bin} --snappylib=${snappy.lib} --snappylibbundle=${bundle.snappy} --zstdbinbundle=${bundle.zstd.in.bin} diff --git a/hadoop-project/pom.xml b/hadoop-project/pom.xml index adfe8bd9f6..2af97b5b52 100644 --- a/hadoop-project/pom.xml +++ b/hadoop-project/pom.xml @@ -141,6 +141,7 @@ 3.2.4 3.10.6.Final 4.1.50.Final + 1.1.7.7 0.5.1 @@ -1710,6 +1711,11 @@ jna ${jna.version} + + org.xerial.snappy + snappy-java + ${snappy-java.version} + @@ -2193,7 +2199,6 @@ file:/dev/urandom - true true true @@ -2205,7 +2210,6 @@ - ${env.PATH};${hadoop.common.build.dir}/bin;${snappy.lib} ${env.PATH};${hadoop.common.build.dir}/bin;${zstd.lib} ${env.PATH};${hadoop.common.build.dir}/bin;${openssl.lib} ${env.PATH};${hadoop.common.build.dir}/bin;${isal.lib}