From 34aa6137bd890a565ace26be278a50f81b3dda20 Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Wed, 18 Nov 2020 12:03:25 -0800 Subject: [PATCH] HADOOP-17292. Using lz4-java in Lz4Codec (#2350) Contributed by Liang-Chi Hsieh. --- hadoop-common-project/hadoop-common/pom.xml | 10 +- .../hadoop-common/src/CMakeLists.txt | 4 - .../apache/hadoop/io/compress/Lz4Codec.java | 39 - .../hadoop/io/compress/lz4/Lz4Compressor.java | 60 +- .../io/compress/lz4/Lz4Decompressor.java | 49 +- .../hadoop/util/NativeLibraryChecker.java | 12 +- .../hadoop/io/compress/lz4/Lz4Compressor.c | 128 -- .../hadoop/io/compress/lz4/Lz4Decompressor.c | 81 - .../org/apache/hadoop/io/compress/lz4/lz4hc.c | 1538 ----------------- .../org/apache/hadoop/io/compress/lz4/lz4hc.h | 438 ----- .../io/compress/CompressDecompressTester.java | 3 +- .../apache/hadoop/io/compress/TestCodec.java | 26 +- .../lz4/TestLz4CompressorDecompressor.java | 46 +- .../hadoop/util/TestNativeCodeLoader.java | 2 - .../src/test/resources/lz4/.sequencefile.crc | Bin 0 -> 164 bytes .../src/test/resources/lz4/sequencefile | Bin 0 -> 19967 bytes .../pom.xml | 9 +- .../src/main/native}/lz4/lz4.c | 0 .../src/main/native}/lz4/lz4.h | 0 hadoop-project/pom.xml | 6 + 20 files changed, 132 insertions(+), 2319 deletions(-) delete mode 100644 hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/compress/lz4/Lz4Compressor.c delete mode 100644 hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/compress/lz4/Lz4Decompressor.c delete mode 100644 hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/compress/lz4/lz4hc.c delete mode 100644 hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/compress/lz4/lz4hc.h create mode 100644 hadoop-common-project/hadoop-common/src/test/resources/lz4/.sequencefile.crc create mode 100644 hadoop-common-project/hadoop-common/src/test/resources/lz4/sequencefile rename {hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/compress => hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-nativetask/src/main/native}/lz4/lz4.c (100%) rename {hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/compress => hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-nativetask/src/main/native}/lz4/lz4.h (100%) diff --git a/hadoop-common-project/hadoop-common/pom.xml b/hadoop-common-project/hadoop-common/pom.xml index cc786e8750..f692c8db39 100644 --- a/hadoop-common-project/hadoop-common/pom.xml +++ b/hadoop-common-project/hadoop-common/pom.xml @@ -371,6 +371,11 @@ snappy-java compile + + org.lz4 + lz4-java + provided + @@ -577,11 +582,6 @@ src/main/native/m4/* src/test/empty-file src/test/all-tests - src/main/native/src/org/apache/hadoop/io/compress/lz4/lz4.h - src/main/native/src/org/apache/hadoop/io/compress/lz4/lz4.c - src/main/native/src/org/apache/hadoop/io/compress/lz4/lz4hc.h - src/main/native/src/org/apache/hadoop/io/compress/lz4/lz4hc.c - src/main/native/src/org/apache/hadoop/io/compress/lz4/lz4hc_encoder.h src/main/native/gtest/**/* src/test/resources/test-untar.tgz src/test/resources/test.har/_SUCCESS diff --git a/hadoop-common-project/hadoop-common/src/CMakeLists.txt b/hadoop-common-project/hadoop-common/src/CMakeLists.txt index 71c950b7f1..d2ef03645a 100644 --- a/hadoop-common-project/hadoop-common/src/CMakeLists.txt +++ b/hadoop-common-project/hadoop-common/src/CMakeLists.txt @@ -236,10 +236,6 @@ configure_file(${CMAKE_SOURCE_DIR}/config.h.cmake ${CMAKE_BINARY_DIR}/config.h) set(CMAKE_BUILD_WITH_INSTALL_RPATH TRUE) hadoop_add_dual_library(hadoop main/native/src/exception.c - ${SRC}/io/compress/lz4/Lz4Compressor.c - ${SRC}/io/compress/lz4/Lz4Decompressor.c - ${SRC}/io/compress/lz4/lz4.c - ${SRC}/io/compress/lz4/lz4hc.c ${ISAL_SOURCE_FILES} ${ZSTD_SOURCE_FILES} ${OPENSSL_SOURCE_FILES} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/Lz4Codec.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/Lz4Codec.java index ba6b487150..8bfb7fe95c 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/Lz4Codec.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/Lz4Codec.java @@ -27,17 +27,12 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.io.compress.lz4.Lz4Compressor; import org.apache.hadoop.io.compress.lz4.Lz4Decompressor; import org.apache.hadoop.fs.CommonConfigurationKeys; -import org.apache.hadoop.util.NativeCodeLoader; /** * This class creates lz4 compressors/decompressors. */ public class Lz4Codec implements Configurable, CompressionCodec { - static { - NativeCodeLoader.isNativeCodeLoaded(); - } - Configuration conf; /** @@ -60,19 +55,6 @@ public class Lz4Codec implements Configurable, CompressionCodec { return conf; } - /** - * Are the native lz4 libraries loaded & initialized? - * - * @return true if loaded & initialized, otherwise false - */ - public static boolean isNativeCodeLoaded() { - return NativeCodeLoader.isNativeCodeLoaded(); - } - - public static String getLibraryName() { - return Lz4Compressor.getLibraryName(); - } - /** * Create a {@link CompressionOutputStream} that will write to the given * {@link OutputStream}. @@ -101,9 +83,6 @@ public class Lz4Codec implements Configurable, CompressionCodec { public CompressionOutputStream createOutputStream(OutputStream out, Compressor compressor) throws IOException { - if (!isNativeCodeLoaded()) { - throw new RuntimeException("native lz4 library not available"); - } int bufferSize = conf.getInt( CommonConfigurationKeys.IO_COMPRESSION_CODEC_LZ4_BUFFERSIZE_KEY, CommonConfigurationKeys.IO_COMPRESSION_CODEC_LZ4_BUFFERSIZE_DEFAULT); @@ -121,10 +100,6 @@ public class Lz4Codec implements Configurable, CompressionCodec { */ @Override public Class getCompressorType() { - if (!isNativeCodeLoaded()) { - throw new RuntimeException("native lz4 library not available"); - } - return Lz4Compressor.class; } @@ -135,9 +110,6 @@ public class Lz4Codec implements Configurable, CompressionCodec { */ @Override public Compressor createCompressor() { - if (!isNativeCodeLoaded()) { - throw new RuntimeException("native lz4 library not available"); - } int bufferSize = conf.getInt( CommonConfigurationKeys.IO_COMPRESSION_CODEC_LZ4_BUFFERSIZE_KEY, CommonConfigurationKeys.IO_COMPRESSION_CODEC_LZ4_BUFFERSIZE_DEFAULT); @@ -175,10 +147,6 @@ public class Lz4Codec implements Configurable, CompressionCodec { public CompressionInputStream createInputStream(InputStream in, Decompressor decompressor) throws IOException { - if (!isNativeCodeLoaded()) { - throw new RuntimeException("native lz4 library not available"); - } - return new BlockDecompressorStream(in, decompressor, conf.getInt( CommonConfigurationKeys.IO_COMPRESSION_CODEC_LZ4_BUFFERSIZE_KEY, CommonConfigurationKeys.IO_COMPRESSION_CODEC_LZ4_BUFFERSIZE_DEFAULT)); @@ -191,10 +159,6 @@ public class Lz4Codec implements Configurable, CompressionCodec { */ @Override public Class getDecompressorType() { - if (!isNativeCodeLoaded()) { - throw new RuntimeException("native lz4 library not available"); - } - return Lz4Decompressor.class; } @@ -205,9 +169,6 @@ public class Lz4Codec implements Configurable, CompressionCodec { */ @Override public Decompressor createDecompressor() { - if (!isNativeCodeLoaded()) { - throw new RuntimeException("native lz4 library not available"); - } int bufferSize = conf.getInt( CommonConfigurationKeys.IO_COMPRESSION_CODEC_LZ4_BUFFERSIZE_KEY, CommonConfigurationKeys.IO_COMPRESSION_CODEC_LZ4_BUFFERSIZE_DEFAULT); diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/lz4/Lz4Compressor.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/lz4/Lz4Compressor.java index 5789ff6ca1..575644e00f 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/lz4/Lz4Compressor.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/lz4/Lz4Compressor.java @@ -22,9 +22,11 @@ import java.io.IOException; import java.nio.Buffer; import java.nio.ByteBuffer; +import net.jpountz.lz4.LZ4Factory; +import net.jpountz.lz4.LZ4Compressor; + import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.io.compress.Compressor; -import org.apache.hadoop.util.NativeCodeLoader; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -49,22 +51,7 @@ public class Lz4Compressor implements Compressor { private long bytesRead = 0L; private long bytesWritten = 0L; - private final boolean useLz4HC; - - static { - if (NativeCodeLoader.isNativeCodeLoaded()) { - // Initialize the native library - try { - initIDs(); - } catch (Throwable t) { - // Ignore failure to load/initialize lz4 - LOG.warn(t.toString()); - } - } else { - LOG.error("Cannot load " + Lz4Compressor.class.getName() + - " without native hadoop library!"); - } - } + private final LZ4Compressor lz4Compressor; /** * Creates a new compressor. @@ -74,9 +61,21 @@ public class Lz4Compressor implements Compressor { * which trades CPU for compression ratio. */ public Lz4Compressor(int directBufferSize, boolean useLz4HC) { - this.useLz4HC = useLz4HC; this.directBufferSize = directBufferSize; + try { + LZ4Factory lz4Factory = LZ4Factory.fastestInstance(); + if (useLz4HC) { + lz4Compressor = lz4Factory.highCompressor(); + } else { + lz4Compressor = lz4Factory.fastCompressor(); + } + } catch (AssertionError t) { + throw new RuntimeException("lz4-java library is not available: " + + "Lz4Compressor has not been loaded. You need to add " + + "lz4-java.jar to your CLASSPATH. " + t, t); + } + uncompressedDirectBuf = ByteBuffer.allocateDirect(directBufferSize); // Compression is guaranteed to succeed if 'dstCapacity' >= @@ -243,7 +242,7 @@ public class Lz4Compressor implements Compressor { } // Compress data - n = useLz4HC ? compressBytesDirectHC() : compressBytesDirect(); + n = compressDirectBuf(); compressedDirectBuf.limit(n); uncompressedDirectBuf.clear(); // lz4 consumes all buffer input @@ -309,11 +308,20 @@ public class Lz4Compressor implements Compressor { public synchronized void end() { } - private native static void initIDs(); - - private native int compressBytesDirect(); - - private native int compressBytesDirectHC(); - - public native static String getLibraryName(); + private int compressDirectBuf() { + if (uncompressedDirectBufLen == 0) { + return 0; + } else { + // Set the position and limit of `uncompressedDirectBuf` for reading + uncompressedDirectBuf.limit(uncompressedDirectBufLen).position(0); + compressedDirectBuf.clear(); + lz4Compressor.compress((ByteBuffer) uncompressedDirectBuf, + (ByteBuffer) compressedDirectBuf); + uncompressedDirectBufLen = 0; + uncompressedDirectBuf.limit(directBufferSize).position(0); + int size = compressedDirectBuf.position(); + compressedDirectBuf.position(0); + return size; + } + } } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/lz4/Lz4Decompressor.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/lz4/Lz4Decompressor.java index f26ae8481c..2b62ef78b2 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/lz4/Lz4Decompressor.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/lz4/Lz4Decompressor.java @@ -22,8 +22,10 @@ import java.io.IOException; import java.nio.Buffer; import java.nio.ByteBuffer; +import net.jpountz.lz4.LZ4Factory; +import net.jpountz.lz4.LZ4SafeDecompressor; + import org.apache.hadoop.io.compress.Decompressor; -import org.apache.hadoop.util.NativeCodeLoader; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -44,20 +46,7 @@ public class Lz4Decompressor implements Decompressor { private int userBufOff = 0, userBufLen = 0; private boolean finished; - static { - if (NativeCodeLoader.isNativeCodeLoaded()) { - // Initialize the native library - try { - initIDs(); - } catch (Throwable t) { - // Ignore failure to load/initialize lz4 - LOG.warn(t.toString()); - } - } else { - LOG.error("Cannot load " + Lz4Compressor.class.getName() + - " without native hadoop library!"); - } - } + private LZ4SafeDecompressor lz4Decompressor; /** * Creates a new compressor. @@ -67,6 +56,15 @@ public class Lz4Decompressor implements Decompressor { public Lz4Decompressor(int directBufferSize) { this.directBufferSize = directBufferSize; + try { + LZ4Factory lz4Factory = LZ4Factory.fastestInstance(); + lz4Decompressor = lz4Factory.safeDecompressor(); + } catch (AssertionError t) { + throw new RuntimeException("lz4-java library is not available: " + + "Lz4Decompressor has not been loaded. You need to add " + + "lz4-java.jar to your CLASSPATH. " + t, t); + } + compressedDirectBuf = ByteBuffer.allocateDirect(directBufferSize); uncompressedDirectBuf = ByteBuffer.allocateDirect(directBufferSize); uncompressedDirectBuf.position(directBufferSize); @@ -200,7 +198,7 @@ public class Lz4Decompressor implements Decompressor { * @param b Buffer for the compressed data * @param off Start offset of the data * @param len Size of the buffer - * @return The actual number of bytes of compressed data. + * @return The actual number of bytes of uncompressed data. * @throws IOException */ @Override @@ -228,7 +226,7 @@ public class Lz4Decompressor implements Decompressor { uncompressedDirectBuf.limit(directBufferSize); // Decompress data - n = decompressBytesDirect(); + n = decompressDirectBuf(); uncompressedDirectBuf.limit(n); if (userBufLen <= 0) { @@ -272,7 +270,18 @@ public class Lz4Decompressor implements Decompressor { // do nothing } - private native static void initIDs(); - - private native int decompressBytesDirect(); + private int decompressDirectBuf() { + if (compressedDirectBufLen == 0) { + return 0; + } else { + compressedDirectBuf.limit(compressedDirectBufLen).position(0); + lz4Decompressor.decompress((ByteBuffer) compressedDirectBuf, + (ByteBuffer) uncompressedDirectBuf); + compressedDirectBufLen = 0; + compressedDirectBuf.clear(); + int size = uncompressedDirectBuf.position(); + uncompressedDirectBuf.position(0); + return size; + } + } } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/NativeLibraryChecker.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/NativeLibraryChecker.java index e40f01195b..3847902e79 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/NativeLibraryChecker.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/NativeLibraryChecker.java @@ -22,7 +22,6 @@ import org.apache.hadoop.io.compress.ZStandardCodec; import org.apache.hadoop.io.erasurecode.ErasureCodeNative; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.crypto.OpensslCipher; -import org.apache.hadoop.io.compress.Lz4Codec; import org.apache.hadoop.io.compress.bzip2.Bzip2Factory; import org.apache.hadoop.io.compress.zlib.ZlibFactory; import org.apache.hadoop.classification.InterfaceAudience; @@ -69,8 +68,6 @@ public class NativeLibraryChecker { boolean isalLoaded = false; boolean zStdLoaded = false; boolean pmdkLoaded = false; - // lz4 is linked within libhadoop - boolean lz4Loaded = nativeHadoopLoaded; boolean bzip2Loaded = Bzip2Factory.isNativeBzip2Loaded(conf); boolean openSslLoaded = false; boolean winutilsExists = false; @@ -81,7 +78,6 @@ public class NativeLibraryChecker { String isalDetail = ""; String pmdkDetail = ""; String zstdLibraryName = ""; - String lz4LibraryName = ""; String bzip2LibraryName = ""; String winutilsPath = null; @@ -119,9 +115,6 @@ public class NativeLibraryChecker { openSslLoaded = true; } - if (lz4Loaded) { - lz4LibraryName = Lz4Codec.getLibraryName(); - } if (bzip2Loaded) { bzip2LibraryName = Bzip2Factory.getLibraryName(conf); } @@ -144,7 +137,6 @@ public class NativeLibraryChecker { System.out.printf("hadoop: %b %s%n", nativeHadoopLoaded, hadoopLibraryName); System.out.printf("zlib: %b %s%n", zlibLoaded, zlibLibraryName); System.out.printf("zstd : %b %s%n", zStdLoaded, zstdLibraryName); - System.out.printf("lz4: %b %s%n", lz4Loaded, lz4LibraryName); System.out.printf("bzip2: %b %s%n", bzip2Loaded, bzip2LibraryName); System.out.printf("openssl: %b %s%n", openSslLoaded, openSslDetail); System.out.printf("ISA-L: %b %s%n", isalLoaded, isalDetail); @@ -155,8 +147,8 @@ public class NativeLibraryChecker { } if ((!nativeHadoopLoaded) || (Shell.WINDOWS && (!winutilsExists)) || - (checkAll && !(zlibLoaded && lz4Loaded - && bzip2Loaded && isalLoaded && zStdLoaded))) { + (checkAll && !(zlibLoaded && bzip2Loaded + && isalLoaded && zStdLoaded))) { // return 1 to indicated check failed ExitUtil.terminate(1); } diff --git a/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/compress/lz4/Lz4Compressor.c b/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/compress/lz4/Lz4Compressor.c deleted file mode 100644 index a54997e335..0000000000 --- a/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/compress/lz4/Lz4Compressor.c +++ /dev/null @@ -1,128 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -#include "org_apache_hadoop.h" -#include "org_apache_hadoop_io_compress_lz4_Lz4Compressor.h" - -#ifdef UNIX -#include "config.h" -#endif // UNIX -#include "lz4.h" -#include "lz4hc.h" - - -static jfieldID Lz4Compressor_uncompressedDirectBuf; -static jfieldID Lz4Compressor_uncompressedDirectBufLen; -static jfieldID Lz4Compressor_compressedDirectBuf; -static jfieldID Lz4Compressor_dstCapacity; - - -JNIEXPORT void JNICALL Java_org_apache_hadoop_io_compress_lz4_Lz4Compressor_initIDs -(JNIEnv *env, jclass clazz){ - - Lz4Compressor_uncompressedDirectBuf = (*env)->GetFieldID(env, clazz, - "uncompressedDirectBuf", - "Ljava/nio/Buffer;"); - Lz4Compressor_uncompressedDirectBufLen = (*env)->GetFieldID(env, clazz, - "uncompressedDirectBufLen", "I"); - Lz4Compressor_compressedDirectBuf = (*env)->GetFieldID(env, clazz, - "compressedDirectBuf", - "Ljava/nio/Buffer;"); - Lz4Compressor_dstCapacity = (*env)->GetFieldID(env, clazz, - "dstCapacity", "I"); -} - -JNIEXPORT jint JNICALL Java_org_apache_hadoop_io_compress_lz4_Lz4Compressor_compressBytesDirect -(JNIEnv *env, jobject thisj){ - const char* uncompressed_bytes; - char *compressed_bytes; - - // Get members of Lz4Compressor - jobject uncompressed_direct_buf = (*env)->GetObjectField(env, thisj, Lz4Compressor_uncompressedDirectBuf); - jint uncompressed_direct_buf_len = (*env)->GetIntField(env, thisj, Lz4Compressor_uncompressedDirectBufLen); - jobject compressed_direct_buf = (*env)->GetObjectField(env, thisj, Lz4Compressor_compressedDirectBuf); - jint compressed_direct_buf_len = (*env)->GetIntField(env, thisj, Lz4Compressor_dstCapacity); - - // Get the input direct buffer - uncompressed_bytes = (const char*)(*env)->GetDirectBufferAddress(env, uncompressed_direct_buf); - - if (uncompressed_bytes == 0) { - return (jint)0; - } - - // Get the output direct buffer - compressed_bytes = (char *)(*env)->GetDirectBufferAddress(env, compressed_direct_buf); - - if (compressed_bytes == 0) { - return (jint)0; - } - - compressed_direct_buf_len = LZ4_compress_default(uncompressed_bytes, compressed_bytes, uncompressed_direct_buf_len, compressed_direct_buf_len); - if (compressed_direct_buf_len < 0){ - THROW(env, "java/lang/InternalError", "LZ4_compress failed"); - } - - (*env)->SetIntField(env, thisj, Lz4Compressor_uncompressedDirectBufLen, 0); - - return (jint)compressed_direct_buf_len; -} - -JNIEXPORT jstring JNICALL -Java_org_apache_hadoop_io_compress_lz4_Lz4Compressor_getLibraryName( - JNIEnv *env, jclass class - ) { - char version_buf[128]; - snprintf(version_buf, sizeof(version_buf), "revision:%d", LZ4_versionNumber()); - return (*env)->NewStringUTF(env, version_buf); -} - -JNIEXPORT jint JNICALL Java_org_apache_hadoop_io_compress_lz4_Lz4Compressor_compressBytesDirectHC -(JNIEnv *env, jobject thisj){ - const char* uncompressed_bytes = NULL; - char* compressed_bytes = NULL; - - // Get members of Lz4Compressor - jobject uncompressed_direct_buf = (*env)->GetObjectField(env, thisj, Lz4Compressor_uncompressedDirectBuf); - jint uncompressed_direct_buf_len = (*env)->GetIntField(env, thisj, Lz4Compressor_uncompressedDirectBufLen); - jobject compressed_direct_buf = (*env)->GetObjectField(env, thisj, Lz4Compressor_compressedDirectBuf); - jint compressed_direct_buf_len = (*env)->GetIntField(env, thisj, Lz4Compressor_dstCapacity); - - // Get the input direct buffer - uncompressed_bytes = (const char*)(*env)->GetDirectBufferAddress(env, uncompressed_direct_buf); - - if (uncompressed_bytes == 0) { - return (jint)0; - } - - // Get the output direct buffer - compressed_bytes = (char *)(*env)->GetDirectBufferAddress(env, compressed_direct_buf); - - if (compressed_bytes == 0) { - return (jint)0; - } - - compressed_direct_buf_len = LZ4_compress_HC(uncompressed_bytes, compressed_bytes, uncompressed_direct_buf_len, compressed_direct_buf_len, 0); - if (compressed_direct_buf_len < 0){ - THROW(env, "java/lang/InternalError", "LZ4_compressHC failed"); - } - - (*env)->SetIntField(env, thisj, Lz4Compressor_uncompressedDirectBufLen, 0); - - return (jint)compressed_direct_buf_len; -} diff --git a/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/compress/lz4/Lz4Decompressor.c b/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/compress/lz4/Lz4Decompressor.c deleted file mode 100644 index cdeaa315d1..0000000000 --- a/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/compress/lz4/Lz4Decompressor.c +++ /dev/null @@ -1,81 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "org_apache_hadoop.h" -#include "org_apache_hadoop_io_compress_lz4_Lz4Decompressor.h" - -#ifdef UNIX -#include "config.h" -#endif // UNIX -#include "lz4.h" - - -static jfieldID Lz4Decompressor_compressedDirectBuf; -static jfieldID Lz4Decompressor_compressedDirectBufLen; -static jfieldID Lz4Decompressor_uncompressedDirectBuf; -static jfieldID Lz4Decompressor_directBufferSize; - -JNIEXPORT void JNICALL Java_org_apache_hadoop_io_compress_lz4_Lz4Decompressor_initIDs -(JNIEnv *env, jclass clazz){ - - Lz4Decompressor_compressedDirectBuf = (*env)->GetFieldID(env,clazz, - "compressedDirectBuf", - "Ljava/nio/Buffer;"); - Lz4Decompressor_compressedDirectBufLen = (*env)->GetFieldID(env,clazz, - "compressedDirectBufLen", "I"); - Lz4Decompressor_uncompressedDirectBuf = (*env)->GetFieldID(env,clazz, - "uncompressedDirectBuf", - "Ljava/nio/Buffer;"); - Lz4Decompressor_directBufferSize = (*env)->GetFieldID(env, clazz, - "directBufferSize", "I"); -} - -JNIEXPORT jint JNICALL Java_org_apache_hadoop_io_compress_lz4_Lz4Decompressor_decompressBytesDirect -(JNIEnv *env, jobject thisj){ - const char *compressed_bytes; - char *uncompressed_bytes; - - // Get members of Lz4Decompressor - jobject compressed_direct_buf = (*env)->GetObjectField(env,thisj, Lz4Decompressor_compressedDirectBuf); - jint compressed_direct_buf_len = (*env)->GetIntField(env,thisj, Lz4Decompressor_compressedDirectBufLen); - jobject uncompressed_direct_buf = (*env)->GetObjectField(env,thisj, Lz4Decompressor_uncompressedDirectBuf); - size_t uncompressed_direct_buf_len = (*env)->GetIntField(env, thisj, Lz4Decompressor_directBufferSize); - - // Get the input direct buffer - compressed_bytes = (const char*)(*env)->GetDirectBufferAddress(env, compressed_direct_buf); - - if (compressed_bytes == 0) { - return (jint)0; - } - - // Get the output direct buffer - uncompressed_bytes = (char *)(*env)->GetDirectBufferAddress(env, uncompressed_direct_buf); - - if (uncompressed_bytes == 0) { - return (jint)0; - } - - uncompressed_direct_buf_len = LZ4_decompress_safe(compressed_bytes, uncompressed_bytes, compressed_direct_buf_len, uncompressed_direct_buf_len); - if (uncompressed_direct_buf_len < 0) { - THROW(env, "java/lang/InternalError", "LZ4_uncompress_unknownOutputSize failed."); - } - - (*env)->SetIntField(env, thisj, Lz4Decompressor_compressedDirectBufLen, 0); - - return (jint)uncompressed_direct_buf_len; -} diff --git a/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/compress/lz4/lz4hc.c b/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/compress/lz4/lz4hc.c deleted file mode 100644 index 5922ed7b16..0000000000 --- a/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/compress/lz4/lz4hc.c +++ /dev/null @@ -1,1538 +0,0 @@ -/* - LZ4 HC - High Compression Mode of LZ4 - Copyright (C) 2011-2017, Yann Collet. - - BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above - copyright notice, this list of conditions and the following disclaimer - in the documentation and/or other materials provided with the - distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - You can contact the author at : - - LZ4 source repository : https://github.com/lz4/lz4 - - LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c -*/ -/* note : lz4hc is not an independent module, it requires lz4.h/lz4.c for proper compilation */ - - -/* ************************************* -* Tuning Parameter -***************************************/ - -/*! HEAPMODE : - * Select how default compression function will allocate workplace memory, - * in stack (0:fastest), or in heap (1:requires malloc()). - * Since workplace is rather large, heap mode is recommended. - */ -#ifndef LZ4HC_HEAPMODE -# define LZ4HC_HEAPMODE 1 -#endif - - -/*=== Dependency ===*/ -#define LZ4_HC_STATIC_LINKING_ONLY -#include "lz4hc.h" - - -/*=== Common LZ4 definitions ===*/ -#if defined(__GNUC__) -# pragma GCC diagnostic ignored "-Wunused-function" -#endif -#if defined (__clang__) -# pragma clang diagnostic ignored "-Wunused-function" -#endif - -/*=== Enums ===*/ -typedef enum { noDictCtx, usingDictCtxHc } dictCtx_directive; - - -#define LZ4_COMMONDEFS_ONLY -#ifndef LZ4_SRC_INCLUDED -#include "lz4.c" /* LZ4_count, constants, mem */ -#endif - -/*=== Constants ===*/ -#define OPTIMAL_ML (int)((ML_MASK-1)+MINMATCH) -#define LZ4_OPT_NUM (1<<12) - - -/*=== Macros ===*/ -#define MIN(a,b) ( (a) < (b) ? (a) : (b) ) -#define MAX(a,b) ( (a) > (b) ? (a) : (b) ) -#define HASH_FUNCTION(i) (((i) * 2654435761U) >> ((MINMATCH*8)-LZ4HC_HASH_LOG)) -#define DELTANEXTMAXD(p) chainTable[(p) & LZ4HC_MAXD_MASK] /* flexible, LZ4HC_MAXD dependent */ -#define DELTANEXTU16(table, pos) table[(U16)(pos)] /* faster */ -/* Make fields passed to, and updated by LZ4HC_encodeSequence explicit */ -#define UPDATABLE(ip, op, anchor) &ip, &op, &anchor - -static U32 LZ4HC_hashPtr(const void* ptr) { return HASH_FUNCTION(LZ4_read32(ptr)); } - - -/************************************** -* HC Compression -**************************************/ -static void LZ4HC_clearTables (LZ4HC_CCtx_internal* hc4) -{ - MEM_INIT((void*)hc4->hashTable, 0, sizeof(hc4->hashTable)); - MEM_INIT(hc4->chainTable, 0xFF, sizeof(hc4->chainTable)); -} - -static void LZ4HC_init_internal (LZ4HC_CCtx_internal* hc4, const BYTE* start) -{ - uptrval startingOffset = (uptrval)(hc4->end - hc4->base); - if (startingOffset > 1 GB) { - LZ4HC_clearTables(hc4); - startingOffset = 0; - } - startingOffset += 64 KB; - hc4->nextToUpdate = (U32) startingOffset; - hc4->base = start - startingOffset; - hc4->end = start; - hc4->dictBase = start - startingOffset; - hc4->dictLimit = (U32) startingOffset; - hc4->lowLimit = (U32) startingOffset; -} - - -/* Update chains up to ip (excluded) */ -LZ4_FORCE_INLINE void LZ4HC_Insert (LZ4HC_CCtx_internal* hc4, const BYTE* ip) -{ - U16* const chainTable = hc4->chainTable; - U32* const hashTable = hc4->hashTable; - const BYTE* const base = hc4->base; - U32 const target = (U32)(ip - base); - U32 idx = hc4->nextToUpdate; - - while (idx < target) { - U32 const h = LZ4HC_hashPtr(base+idx); - size_t delta = idx - hashTable[h]; - if (delta>LZ4_DISTANCE_MAX) delta = LZ4_DISTANCE_MAX; - DELTANEXTU16(chainTable, idx) = (U16)delta; - hashTable[h] = idx; - idx++; - } - - hc4->nextToUpdate = target; -} - -/** LZ4HC_countBack() : - * @return : negative value, nb of common bytes before ip/match */ -LZ4_FORCE_INLINE -int LZ4HC_countBack(const BYTE* const ip, const BYTE* const match, - const BYTE* const iMin, const BYTE* const mMin) -{ - int back = 0; - int const min = (int)MAX(iMin - ip, mMin - match); - assert(min <= 0); - assert(ip >= iMin); assert((size_t)(ip-iMin) < (1U<<31)); - assert(match >= mMin); assert((size_t)(match - mMin) < (1U<<31)); - while ( (back > min) - && (ip[back-1] == match[back-1]) ) - back--; - return back; -} - -#if defined(_MSC_VER) -# define LZ4HC_rotl32(x,r) _rotl(x,r) -#else -# define LZ4HC_rotl32(x,r) ((x << r) | (x >> (32 - r))) -#endif - - -static U32 LZ4HC_rotatePattern(size_t const rotate, U32 const pattern) -{ - size_t const bitsToRotate = (rotate & (sizeof(pattern) - 1)) << 3; - if (bitsToRotate == 0) - return pattern; - return LZ4HC_rotl32(pattern, (int)bitsToRotate); -} - -/* LZ4HC_countPattern() : - * pattern32 must be a sample of repetitive pattern of length 1, 2 or 4 (but not 3!) */ -static unsigned -LZ4HC_countPattern(const BYTE* ip, const BYTE* const iEnd, U32 const pattern32) -{ - const BYTE* const iStart = ip; - reg_t const pattern = (sizeof(pattern)==8) ? (reg_t)pattern32 + (((reg_t)pattern32) << 32) : pattern32; - - while (likely(ip < iEnd-(sizeof(pattern)-1))) { - reg_t const diff = LZ4_read_ARCH(ip) ^ pattern; - if (!diff) { ip+=sizeof(pattern); continue; } - ip += LZ4_NbCommonBytes(diff); - return (unsigned)(ip - iStart); - } - - if (LZ4_isLittleEndian()) { - reg_t patternByte = pattern; - while ((ip>= 8; - } - } else { /* big endian */ - U32 bitOffset = (sizeof(pattern)*8) - 8; - while (ip < iEnd) { - BYTE const byte = (BYTE)(pattern >> bitOffset); - if (*ip != byte) break; - ip ++; bitOffset -= 8; - } - } - - return (unsigned)(ip - iStart); -} - -/* LZ4HC_reverseCountPattern() : - * pattern must be a sample of repetitive pattern of length 1, 2 or 4 (but not 3!) - * read using natural platform endianess */ -static unsigned -LZ4HC_reverseCountPattern(const BYTE* ip, const BYTE* const iLow, U32 pattern) -{ - const BYTE* const iStart = ip; - - while (likely(ip >= iLow+4)) { - if (LZ4_read32(ip-4) != pattern) break; - ip -= 4; - } - { const BYTE* bytePtr = (const BYTE*)(&pattern) + 3; /* works for any endianess */ - while (likely(ip>iLow)) { - if (ip[-1] != *bytePtr) break; - ip--; bytePtr--; - } } - return (unsigned)(iStart - ip); -} - -/* LZ4HC_protectDictEnd() : - * Checks if the match is in the last 3 bytes of the dictionary, so reading the - * 4 byte MINMATCH would overflow. - * @returns true if the match index is okay. - */ -static int LZ4HC_protectDictEnd(U32 const dictLimit, U32 const matchIndex) -{ - return ((U32)((dictLimit - 1) - matchIndex) >= 3); -} - -typedef enum { rep_untested, rep_not, rep_confirmed } repeat_state_e; -typedef enum { favorCompressionRatio=0, favorDecompressionSpeed } HCfavor_e; - -LZ4_FORCE_INLINE int -LZ4HC_InsertAndGetWiderMatch ( - LZ4HC_CCtx_internal* hc4, - const BYTE* const ip, - const BYTE* const iLowLimit, - const BYTE* const iHighLimit, - int longest, - const BYTE** matchpos, - const BYTE** startpos, - const int maxNbAttempts, - const int patternAnalysis, - const int chainSwap, - const dictCtx_directive dict, - const HCfavor_e favorDecSpeed) -{ - U16* const chainTable = hc4->chainTable; - U32* const HashTable = hc4->hashTable; - const LZ4HC_CCtx_internal * const dictCtx = hc4->dictCtx; - const BYTE* const base = hc4->base; - const U32 dictLimit = hc4->dictLimit; - const BYTE* const lowPrefixPtr = base + dictLimit; - const U32 ipIndex = (U32)(ip - base); - const U32 lowestMatchIndex = (hc4->lowLimit + (LZ4_DISTANCE_MAX + 1) > ipIndex) ? hc4->lowLimit : ipIndex - LZ4_DISTANCE_MAX; - const BYTE* const dictBase = hc4->dictBase; - int const lookBackLength = (int)(ip-iLowLimit); - int nbAttempts = maxNbAttempts; - U32 matchChainPos = 0; - U32 const pattern = LZ4_read32(ip); - U32 matchIndex; - repeat_state_e repeat = rep_untested; - size_t srcPatternLength = 0; - - DEBUGLOG(7, "LZ4HC_InsertAndGetWiderMatch"); - /* First Match */ - LZ4HC_Insert(hc4, ip); - matchIndex = HashTable[LZ4HC_hashPtr(ip)]; - DEBUGLOG(7, "First match at index %u / %u (lowestMatchIndex)", - matchIndex, lowestMatchIndex); - - while ((matchIndex>=lowestMatchIndex) && (nbAttempts)) { - int matchLength=0; - nbAttempts--; - assert(matchIndex < ipIndex); - if (favorDecSpeed && (ipIndex - matchIndex < 8)) { - /* do nothing */ - } else if (matchIndex >= dictLimit) { /* within current Prefix */ - const BYTE* const matchPtr = base + matchIndex; - assert(matchPtr >= lowPrefixPtr); - assert(matchPtr < ip); - assert(longest >= 1); - if (LZ4_read16(iLowLimit + longest - 1) == LZ4_read16(matchPtr - lookBackLength + longest - 1)) { - if (LZ4_read32(matchPtr) == pattern) { - int const back = lookBackLength ? LZ4HC_countBack(ip, matchPtr, iLowLimit, lowPrefixPtr) : 0; - matchLength = MINMATCH + (int)LZ4_count(ip+MINMATCH, matchPtr+MINMATCH, iHighLimit); - matchLength -= back; - if (matchLength > longest) { - longest = matchLength; - *matchpos = matchPtr + back; - *startpos = ip + back; - } } } - } else { /* lowestMatchIndex <= matchIndex < dictLimit */ - const BYTE* const matchPtr = dictBase + matchIndex; - if (LZ4_read32(matchPtr) == pattern) { - const BYTE* const dictStart = dictBase + hc4->lowLimit; - int back = 0; - const BYTE* vLimit = ip + (dictLimit - matchIndex); - if (vLimit > iHighLimit) vLimit = iHighLimit; - matchLength = (int)LZ4_count(ip+MINMATCH, matchPtr+MINMATCH, vLimit) + MINMATCH; - if ((ip+matchLength == vLimit) && (vLimit < iHighLimit)) - matchLength += LZ4_count(ip+matchLength, lowPrefixPtr, iHighLimit); - back = lookBackLength ? LZ4HC_countBack(ip, matchPtr, iLowLimit, dictStart) : 0; - matchLength -= back; - if (matchLength > longest) { - longest = matchLength; - *matchpos = base + matchIndex + back; /* virtual pos, relative to ip, to retrieve offset */ - *startpos = ip + back; - } } } - - if (chainSwap && matchLength==longest) { /* better match => select a better chain */ - assert(lookBackLength==0); /* search forward only */ - if (matchIndex + (U32)longest <= ipIndex) { - int const kTrigger = 4; - U32 distanceToNextMatch = 1; - int const end = longest - MINMATCH + 1; - int step = 1; - int accel = 1 << kTrigger; - int pos; - for (pos = 0; pos < end; pos += step) { - U32 const candidateDist = DELTANEXTU16(chainTable, matchIndex + (U32)pos); - step = (accel++ >> kTrigger); - if (candidateDist > distanceToNextMatch) { - distanceToNextMatch = candidateDist; - matchChainPos = (U32)pos; - accel = 1 << kTrigger; - } - } - if (distanceToNextMatch > 1) { - if (distanceToNextMatch > matchIndex) break; /* avoid overflow */ - matchIndex -= distanceToNextMatch; - continue; - } } } - - { U32 const distNextMatch = DELTANEXTU16(chainTable, matchIndex); - if (patternAnalysis && distNextMatch==1 && matchChainPos==0) { - U32 const matchCandidateIdx = matchIndex-1; - /* may be a repeated pattern */ - if (repeat == rep_untested) { - if ( ((pattern & 0xFFFF) == (pattern >> 16)) - & ((pattern & 0xFF) == (pattern >> 24)) ) { - repeat = rep_confirmed; - srcPatternLength = LZ4HC_countPattern(ip+sizeof(pattern), iHighLimit, pattern) + sizeof(pattern); - } else { - repeat = rep_not; - } } - if ( (repeat == rep_confirmed) && (matchCandidateIdx >= lowestMatchIndex) - && LZ4HC_protectDictEnd(dictLimit, matchCandidateIdx) ) { - const int extDict = matchCandidateIdx < dictLimit; - const BYTE* const matchPtr = (extDict ? dictBase : base) + matchCandidateIdx; - if (LZ4_read32(matchPtr) == pattern) { /* good candidate */ - const BYTE* const dictStart = dictBase + hc4->lowLimit; - const BYTE* const iLimit = extDict ? dictBase + dictLimit : iHighLimit; - size_t forwardPatternLength = LZ4HC_countPattern(matchPtr+sizeof(pattern), iLimit, pattern) + sizeof(pattern); - if (extDict && matchPtr + forwardPatternLength == iLimit) { - U32 const rotatedPattern = LZ4HC_rotatePattern(forwardPatternLength, pattern); - forwardPatternLength += LZ4HC_countPattern(lowPrefixPtr, iHighLimit, rotatedPattern); - } - { const BYTE* const lowestMatchPtr = extDict ? dictStart : lowPrefixPtr; - size_t backLength = LZ4HC_reverseCountPattern(matchPtr, lowestMatchPtr, pattern); - size_t currentSegmentLength; - if (!extDict && matchPtr - backLength == lowPrefixPtr && hc4->lowLimit < dictLimit) { - U32 const rotatedPattern = LZ4HC_rotatePattern((U32)(-(int)backLength), pattern); - backLength += LZ4HC_reverseCountPattern(dictBase + dictLimit, dictStart, rotatedPattern); - } - /* Limit backLength not go further than lowestMatchIndex */ - backLength = matchCandidateIdx - MAX(matchCandidateIdx - (U32)backLength, lowestMatchIndex); - assert(matchCandidateIdx - backLength >= lowestMatchIndex); - currentSegmentLength = backLength + forwardPatternLength; - /* Adjust to end of pattern if the source pattern fits, otherwise the beginning of the pattern */ - if ( (currentSegmentLength >= srcPatternLength) /* current pattern segment large enough to contain full srcPatternLength */ - && (forwardPatternLength <= srcPatternLength) ) { /* haven't reached this position yet */ - U32 const newMatchIndex = matchCandidateIdx + (U32)forwardPatternLength - (U32)srcPatternLength; /* best position, full pattern, might be followed by more match */ - if (LZ4HC_protectDictEnd(dictLimit, newMatchIndex)) - matchIndex = newMatchIndex; - else { - /* Can only happen if started in the prefix */ - assert(newMatchIndex >= dictLimit - 3 && newMatchIndex < dictLimit && !extDict); - matchIndex = dictLimit; - } - } else { - U32 const newMatchIndex = matchCandidateIdx - (U32)backLength; /* farthest position in current segment, will find a match of length currentSegmentLength + maybe some back */ - if (!LZ4HC_protectDictEnd(dictLimit, newMatchIndex)) { - assert(newMatchIndex >= dictLimit - 3 && newMatchIndex < dictLimit && !extDict); - matchIndex = dictLimit; - } else { - matchIndex = newMatchIndex; - if (lookBackLength==0) { /* no back possible */ - size_t const maxML = MIN(currentSegmentLength, srcPatternLength); - if ((size_t)longest < maxML) { - assert(base + matchIndex < ip); - if (ip - (base+matchIndex) > LZ4_DISTANCE_MAX) break; - assert(maxML < 2 GB); - longest = (int)maxML; - *matchpos = base + matchIndex; /* virtual pos, relative to ip, to retrieve offset */ - *startpos = ip; - } - { U32 const distToNextPattern = DELTANEXTU16(chainTable, matchIndex); - if (distToNextPattern > matchIndex) break; /* avoid overflow */ - matchIndex -= distToNextPattern; - } } } } } - continue; - } } - } } /* PA optimization */ - - /* follow current chain */ - matchIndex -= DELTANEXTU16(chainTable, matchIndex + matchChainPos); - - } /* while ((matchIndex>=lowestMatchIndex) && (nbAttempts)) */ - - if ( dict == usingDictCtxHc - && nbAttempts - && ipIndex - lowestMatchIndex < LZ4_DISTANCE_MAX) { - size_t const dictEndOffset = (size_t)(dictCtx->end - dictCtx->base); - U32 dictMatchIndex = dictCtx->hashTable[LZ4HC_hashPtr(ip)]; - assert(dictEndOffset <= 1 GB); - matchIndex = dictMatchIndex + lowestMatchIndex - (U32)dictEndOffset; - while (ipIndex - matchIndex <= LZ4_DISTANCE_MAX && nbAttempts--) { - const BYTE* const matchPtr = dictCtx->base + dictMatchIndex; - - if (LZ4_read32(matchPtr) == pattern) { - int mlt; - int back = 0; - const BYTE* vLimit = ip + (dictEndOffset - dictMatchIndex); - if (vLimit > iHighLimit) vLimit = iHighLimit; - mlt = (int)LZ4_count(ip+MINMATCH, matchPtr+MINMATCH, vLimit) + MINMATCH; - back = lookBackLength ? LZ4HC_countBack(ip, matchPtr, iLowLimit, dictCtx->base + dictCtx->dictLimit) : 0; - mlt -= back; - if (mlt > longest) { - longest = mlt; - *matchpos = base + matchIndex + back; - *startpos = ip + back; - } } - - { U32 const nextOffset = DELTANEXTU16(dictCtx->chainTable, dictMatchIndex); - dictMatchIndex -= nextOffset; - matchIndex -= nextOffset; - } } } - - return longest; -} - -LZ4_FORCE_INLINE -int LZ4HC_InsertAndFindBestMatch(LZ4HC_CCtx_internal* const hc4, /* Index table will be updated */ - const BYTE* const ip, const BYTE* const iLimit, - const BYTE** matchpos, - const int maxNbAttempts, - const int patternAnalysis, - const dictCtx_directive dict) -{ - const BYTE* uselessPtr = ip; - /* note : LZ4HC_InsertAndGetWiderMatch() is able to modify the starting position of a match (*startpos), - * but this won't be the case here, as we define iLowLimit==ip, - * so LZ4HC_InsertAndGetWiderMatch() won't be allowed to search past ip */ - return LZ4HC_InsertAndGetWiderMatch(hc4, ip, ip, iLimit, MINMATCH-1, matchpos, &uselessPtr, maxNbAttempts, patternAnalysis, 0 /*chainSwap*/, dict, favorCompressionRatio); -} - -/* LZ4HC_encodeSequence() : - * @return : 0 if ok, - * 1 if buffer issue detected */ -LZ4_FORCE_INLINE int LZ4HC_encodeSequence ( - const BYTE** ip, - BYTE** op, - const BYTE** anchor, - int matchLength, - const BYTE* const match, - limitedOutput_directive limit, - BYTE* oend) -{ - size_t length; - BYTE* const token = (*op)++; - -#if defined(LZ4_DEBUG) && (LZ4_DEBUG >= 6) - static const BYTE* start = NULL; - static U32 totalCost = 0; - U32 const pos = (start==NULL) ? 0 : (U32)(*anchor - start); - U32 const ll = (U32)(*ip - *anchor); - U32 const llAdd = (ll>=15) ? ((ll-15) / 255) + 1 : 0; - U32 const mlAdd = (matchLength>=19) ? ((matchLength-19) / 255) + 1 : 0; - U32 const cost = 1 + llAdd + ll + 2 + mlAdd; - if (start==NULL) start = *anchor; /* only works for single segment */ - /* g_debuglog_enable = (pos >= 2228) & (pos <= 2262); */ - DEBUGLOG(6, "pos:%7u -- literals:%3u, match:%4i, offset:%5u, cost:%3u + %u", - pos, - (U32)(*ip - *anchor), matchLength, (U32)(*ip-match), - cost, totalCost); - totalCost += cost; -#endif - - /* Encode Literal length */ - length = (size_t)(*ip - *anchor); - if ((limit) && ((*op + (length / 255) + length + (2 + 1 + LASTLITERALS)) > oend)) return 1; /* Check output limit */ - if (length >= RUN_MASK) { - size_t len = length - RUN_MASK; - *token = (RUN_MASK << ML_BITS); - for(; len >= 255 ; len -= 255) *(*op)++ = 255; - *(*op)++ = (BYTE)len; - } else { - *token = (BYTE)(length << ML_BITS); - } - - /* Copy Literals */ - LZ4_wildCopy8(*op, *anchor, (*op) + length); - *op += length; - - /* Encode Offset */ - assert( (*ip - match) <= LZ4_DISTANCE_MAX ); /* note : consider providing offset as a value, rather than as a pointer difference */ - LZ4_writeLE16(*op, (U16)(*ip-match)); *op += 2; - - /* Encode MatchLength */ - assert(matchLength >= MINMATCH); - length = (size_t)matchLength - MINMATCH; - if ((limit) && (*op + (length / 255) + (1 + LASTLITERALS) > oend)) return 1; /* Check output limit */ - if (length >= ML_MASK) { - *token += ML_MASK; - length -= ML_MASK; - for(; length >= 510 ; length -= 510) { *(*op)++ = 255; *(*op)++ = 255; } - if (length >= 255) { length -= 255; *(*op)++ = 255; } - *(*op)++ = (BYTE)length; - } else { - *token += (BYTE)(length); - } - - /* Prepare next loop */ - *ip += matchLength; - *anchor = *ip; - - return 0; -} - -LZ4_FORCE_INLINE int LZ4HC_compress_hashChain ( - LZ4HC_CCtx_internal* const ctx, - const char* const source, - char* const dest, - int* srcSizePtr, - int const maxOutputSize, - unsigned maxNbAttempts, - const limitedOutput_directive limit, - const dictCtx_directive dict - ) -{ - const int inputSize = *srcSizePtr; - const int patternAnalysis = (maxNbAttempts > 128); /* levels 9+ */ - - const BYTE* ip = (const BYTE*) source; - const BYTE* anchor = ip; - const BYTE* const iend = ip + inputSize; - const BYTE* const mflimit = iend - MFLIMIT; - const BYTE* const matchlimit = (iend - LASTLITERALS); - - BYTE* optr = (BYTE*) dest; - BYTE* op = (BYTE*) dest; - BYTE* oend = op + maxOutputSize; - - int ml0, ml, ml2, ml3; - const BYTE* start0; - const BYTE* ref0; - const BYTE* ref = NULL; - const BYTE* start2 = NULL; - const BYTE* ref2 = NULL; - const BYTE* start3 = NULL; - const BYTE* ref3 = NULL; - - /* init */ - *srcSizePtr = 0; - if (limit == fillOutput) oend -= LASTLITERALS; /* Hack for support LZ4 format restriction */ - if (inputSize < LZ4_minLength) goto _last_literals; /* Input too small, no compression (all literals) */ - - /* Main Loop */ - while (ip <= mflimit) { - ml = LZ4HC_InsertAndFindBestMatch(ctx, ip, matchlimit, &ref, maxNbAttempts, patternAnalysis, dict); - if (ml encode ML1 */ - optr = op; - if (LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ml, ref, limit, oend)) goto _dest_overflow; - continue; - } - - if (start0 < ip) { /* first match was skipped at least once */ - if (start2 < ip + ml0) { /* squeezing ML1 between ML0(original ML1) and ML2 */ - ip = start0; ref = ref0; ml = ml0; /* restore initial ML1 */ - } } - - /* Here, start0==ip */ - if ((start2 - ip) < 3) { /* First Match too small : removed */ - ml = ml2; - ip = start2; - ref =ref2; - goto _Search2; - } - -_Search3: - /* At this stage, we have : - * ml2 > ml1, and - * ip1+3 <= ip2 (usually < ip1+ml1) */ - if ((start2 - ip) < OPTIMAL_ML) { - int correction; - int new_ml = ml; - if (new_ml > OPTIMAL_ML) new_ml = OPTIMAL_ML; - if (ip+new_ml > start2 + ml2 - MINMATCH) new_ml = (int)(start2 - ip) + ml2 - MINMATCH; - correction = new_ml - (int)(start2 - ip); - if (correction > 0) { - start2 += correction; - ref2 += correction; - ml2 -= correction; - } - } - /* Now, we have start2 = ip+new_ml, with new_ml = min(ml, OPTIMAL_ML=18) */ - - if (start2 + ml2 <= mflimit) { - ml3 = LZ4HC_InsertAndGetWiderMatch(ctx, - start2 + ml2 - 3, start2, matchlimit, ml2, &ref3, &start3, - maxNbAttempts, patternAnalysis, 0, dict, favorCompressionRatio); - } else { - ml3 = ml2; - } - - if (ml3 == ml2) { /* No better match => encode ML1 and ML2 */ - /* ip & ref are known; Now for ml */ - if (start2 < ip+ml) ml = (int)(start2 - ip); - /* Now, encode 2 sequences */ - optr = op; - if (LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ml, ref, limit, oend)) goto _dest_overflow; - ip = start2; - optr = op; - if (LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ml2, ref2, limit, oend)) goto _dest_overflow; - continue; - } - - if (start3 < ip+ml+3) { /* Not enough space for match 2 : remove it */ - if (start3 >= (ip+ml)) { /* can write Seq1 immediately ==> Seq2 is removed, so Seq3 becomes Seq1 */ - if (start2 < ip+ml) { - int correction = (int)(ip+ml - start2); - start2 += correction; - ref2 += correction; - ml2 -= correction; - if (ml2 < MINMATCH) { - start2 = start3; - ref2 = ref3; - ml2 = ml3; - } - } - - optr = op; - if (LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ml, ref, limit, oend)) goto _dest_overflow; - ip = start3; - ref = ref3; - ml = ml3; - - start0 = start2; - ref0 = ref2; - ml0 = ml2; - goto _Search2; - } - - start2 = start3; - ref2 = ref3; - ml2 = ml3; - goto _Search3; - } - - /* - * OK, now we have 3 ascending matches; - * let's write the first one ML1. - * ip & ref are known; Now decide ml. - */ - if (start2 < ip+ml) { - if ((start2 - ip) < OPTIMAL_ML) { - int correction; - if (ml > OPTIMAL_ML) ml = OPTIMAL_ML; - if (ip + ml > start2 + ml2 - MINMATCH) ml = (int)(start2 - ip) + ml2 - MINMATCH; - correction = ml - (int)(start2 - ip); - if (correction > 0) { - start2 += correction; - ref2 += correction; - ml2 -= correction; - } - } else { - ml = (int)(start2 - ip); - } - } - optr = op; - if (LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ml, ref, limit, oend)) goto _dest_overflow; - - /* ML2 becomes ML1 */ - ip = start2; ref = ref2; ml = ml2; - - /* ML3 becomes ML2 */ - start2 = start3; ref2 = ref3; ml2 = ml3; - - /* let's find a new ML3 */ - goto _Search3; - } - -_last_literals: - /* Encode Last Literals */ - { size_t lastRunSize = (size_t)(iend - anchor); /* literals */ - size_t litLength = (lastRunSize + 255 - RUN_MASK) / 255; - size_t const totalSize = 1 + litLength + lastRunSize; - if (limit == fillOutput) oend += LASTLITERALS; /* restore correct value */ - if (limit && (op + totalSize > oend)) { - if (limit == limitedOutput) return 0; /* Check output limit */ - /* adapt lastRunSize to fill 'dest' */ - lastRunSize = (size_t)(oend - op) - 1; - litLength = (lastRunSize + 255 - RUN_MASK) / 255; - lastRunSize -= litLength; - } - ip = anchor + lastRunSize; - - if (lastRunSize >= RUN_MASK) { - size_t accumulator = lastRunSize - RUN_MASK; - *op++ = (RUN_MASK << ML_BITS); - for(; accumulator >= 255 ; accumulator -= 255) *op++ = 255; - *op++ = (BYTE) accumulator; - } else { - *op++ = (BYTE)(lastRunSize << ML_BITS); - } - memcpy(op, anchor, lastRunSize); - op += lastRunSize; - } - - /* End */ - *srcSizePtr = (int) (((const char*)ip) - source); - return (int) (((char*)op)-dest); - -_dest_overflow: - if (limit == fillOutput) { - op = optr; /* restore correct out pointer */ - goto _last_literals; - } - return 0; -} - - -static int LZ4HC_compress_optimal( LZ4HC_CCtx_internal* ctx, - const char* const source, char* dst, - int* srcSizePtr, int dstCapacity, - int const nbSearches, size_t sufficient_len, - const limitedOutput_directive limit, int const fullUpdate, - const dictCtx_directive dict, - HCfavor_e favorDecSpeed); - - -LZ4_FORCE_INLINE int LZ4HC_compress_generic_internal ( - LZ4HC_CCtx_internal* const ctx, - const char* const src, - char* const dst, - int* const srcSizePtr, - int const dstCapacity, - int cLevel, - const limitedOutput_directive limit, - const dictCtx_directive dict - ) -{ - typedef enum { lz4hc, lz4opt } lz4hc_strat_e; - typedef struct { - lz4hc_strat_e strat; - U32 nbSearches; - U32 targetLength; - } cParams_t; - static const cParams_t clTable[LZ4HC_CLEVEL_MAX+1] = { - { lz4hc, 2, 16 }, /* 0, unused */ - { lz4hc, 2, 16 }, /* 1, unused */ - { lz4hc, 2, 16 }, /* 2, unused */ - { lz4hc, 4, 16 }, /* 3 */ - { lz4hc, 8, 16 }, /* 4 */ - { lz4hc, 16, 16 }, /* 5 */ - { lz4hc, 32, 16 }, /* 6 */ - { lz4hc, 64, 16 }, /* 7 */ - { lz4hc, 128, 16 }, /* 8 */ - { lz4hc, 256, 16 }, /* 9 */ - { lz4opt, 96, 64 }, /*10==LZ4HC_CLEVEL_OPT_MIN*/ - { lz4opt, 512,128 }, /*11 */ - { lz4opt,16384,LZ4_OPT_NUM }, /* 12==LZ4HC_CLEVEL_MAX */ - }; - - DEBUGLOG(4, "LZ4HC_compress_generic(ctx=%p, src=%p, srcSize=%d)", ctx, src, *srcSizePtr); - - if (limit == fillOutput && dstCapacity < 1) return 0; /* Impossible to store anything */ - if ((U32)*srcSizePtr > (U32)LZ4_MAX_INPUT_SIZE) return 0; /* Unsupported input size (too large or negative) */ - - ctx->end += *srcSizePtr; - if (cLevel < 1) cLevel = LZ4HC_CLEVEL_DEFAULT; /* note : convention is different from lz4frame, maybe something to review */ - cLevel = MIN(LZ4HC_CLEVEL_MAX, cLevel); - { cParams_t const cParam = clTable[cLevel]; - HCfavor_e const favor = ctx->favorDecSpeed ? favorDecompressionSpeed : favorCompressionRatio; - int result; - - if (cParam.strat == lz4hc) { - result = LZ4HC_compress_hashChain(ctx, - src, dst, srcSizePtr, dstCapacity, - cParam.nbSearches, limit, dict); - } else { - assert(cParam.strat == lz4opt); - result = LZ4HC_compress_optimal(ctx, - src, dst, srcSizePtr, dstCapacity, - (int)cParam.nbSearches, cParam.targetLength, limit, - cLevel == LZ4HC_CLEVEL_MAX, /* ultra mode */ - dict, favor); - } - if (result <= 0) ctx->dirty = 1; - return result; - } -} - -static void LZ4HC_setExternalDict(LZ4HC_CCtx_internal* ctxPtr, const BYTE* newBlock); - -static int -LZ4HC_compress_generic_noDictCtx ( - LZ4HC_CCtx_internal* const ctx, - const char* const src, - char* const dst, - int* const srcSizePtr, - int const dstCapacity, - int cLevel, - limitedOutput_directive limit - ) -{ - assert(ctx->dictCtx == NULL); - return LZ4HC_compress_generic_internal(ctx, src, dst, srcSizePtr, dstCapacity, cLevel, limit, noDictCtx); -} - -static int -LZ4HC_compress_generic_dictCtx ( - LZ4HC_CCtx_internal* const ctx, - const char* const src, - char* const dst, - int* const srcSizePtr, - int const dstCapacity, - int cLevel, - limitedOutput_directive limit - ) -{ - const size_t position = (size_t)(ctx->end - ctx->base) - ctx->lowLimit; - assert(ctx->dictCtx != NULL); - if (position >= 64 KB) { - ctx->dictCtx = NULL; - return LZ4HC_compress_generic_noDictCtx(ctx, src, dst, srcSizePtr, dstCapacity, cLevel, limit); - } else if (position == 0 && *srcSizePtr > 4 KB) { - memcpy(ctx, ctx->dictCtx, sizeof(LZ4HC_CCtx_internal)); - LZ4HC_setExternalDict(ctx, (const BYTE *)src); - ctx->compressionLevel = (short)cLevel; - return LZ4HC_compress_generic_noDictCtx(ctx, src, dst, srcSizePtr, dstCapacity, cLevel, limit); - } else { - return LZ4HC_compress_generic_internal(ctx, src, dst, srcSizePtr, dstCapacity, cLevel, limit, usingDictCtxHc); - } -} - -static int -LZ4HC_compress_generic ( - LZ4HC_CCtx_internal* const ctx, - const char* const src, - char* const dst, - int* const srcSizePtr, - int const dstCapacity, - int cLevel, - limitedOutput_directive limit - ) -{ - if (ctx->dictCtx == NULL) { - return LZ4HC_compress_generic_noDictCtx(ctx, src, dst, srcSizePtr, dstCapacity, cLevel, limit); - } else { - return LZ4HC_compress_generic_dictCtx(ctx, src, dst, srcSizePtr, dstCapacity, cLevel, limit); - } -} - - -int LZ4_sizeofStateHC(void) { return (int)sizeof(LZ4_streamHC_t); } - -#ifndef _MSC_VER /* for some reason, Visual fails the aligment test on 32-bit x86 : - * it reports an aligment of 8-bytes, - * while actually aligning LZ4_streamHC_t on 4 bytes. */ -static size_t LZ4_streamHC_t_alignment(void) -{ - struct { char c; LZ4_streamHC_t t; } t_a; - return sizeof(t_a) - sizeof(t_a.t); -} -#endif - -/* state is presumed correctly initialized, - * in which case its size and alignment have already been validate */ -int LZ4_compress_HC_extStateHC_fastReset (void* state, const char* src, char* dst, int srcSize, int dstCapacity, int compressionLevel) -{ - LZ4HC_CCtx_internal* const ctx = &((LZ4_streamHC_t*)state)->internal_donotuse; -#ifndef _MSC_VER /* for some reason, Visual fails the aligment test on 32-bit x86 : - * it reports an aligment of 8-bytes, - * while actually aligning LZ4_streamHC_t on 4 bytes. */ - assert(((size_t)state & (LZ4_streamHC_t_alignment() - 1)) == 0); /* check alignment */ -#endif - if (((size_t)(state)&(sizeof(void*)-1)) != 0) return 0; /* Error : state is not aligned for pointers (32 or 64 bits) */ - LZ4_resetStreamHC_fast((LZ4_streamHC_t*)state, compressionLevel); - LZ4HC_init_internal (ctx, (const BYTE*)src); - if (dstCapacity < LZ4_compressBound(srcSize)) - return LZ4HC_compress_generic (ctx, src, dst, &srcSize, dstCapacity, compressionLevel, limitedOutput); - else - return LZ4HC_compress_generic (ctx, src, dst, &srcSize, dstCapacity, compressionLevel, notLimited); -} - -int LZ4_compress_HC_extStateHC (void* state, const char* src, char* dst, int srcSize, int dstCapacity, int compressionLevel) -{ - LZ4_streamHC_t* const ctx = LZ4_initStreamHC(state, sizeof(*ctx)); - if (ctx==NULL) return 0; /* init failure */ - return LZ4_compress_HC_extStateHC_fastReset(state, src, dst, srcSize, dstCapacity, compressionLevel); -} - -int LZ4_compress_HC(const char* src, char* dst, int srcSize, int dstCapacity, int compressionLevel) -{ -#if defined(LZ4HC_HEAPMODE) && LZ4HC_HEAPMODE==1 - LZ4_streamHC_t* const statePtr = (LZ4_streamHC_t*)ALLOC(sizeof(LZ4_streamHC_t)); -#else - LZ4_streamHC_t state; - LZ4_streamHC_t* const statePtr = &state; -#endif - int const cSize = LZ4_compress_HC_extStateHC(statePtr, src, dst, srcSize, dstCapacity, compressionLevel); -#if defined(LZ4HC_HEAPMODE) && LZ4HC_HEAPMODE==1 - FREEMEM(statePtr); -#endif - return cSize; -} - -/* state is presumed sized correctly (>= sizeof(LZ4_streamHC_t)) */ -int LZ4_compress_HC_destSize(void* state, const char* source, char* dest, int* sourceSizePtr, int targetDestSize, int cLevel) -{ - LZ4_streamHC_t* const ctx = LZ4_initStreamHC(state, sizeof(*ctx)); - if (ctx==NULL) return 0; /* init failure */ - LZ4HC_init_internal(&ctx->internal_donotuse, (const BYTE*) source); - LZ4_setCompressionLevel(ctx, cLevel); - return LZ4HC_compress_generic(&ctx->internal_donotuse, source, dest, sourceSizePtr, targetDestSize, cLevel, fillOutput); -} - - - -/************************************** -* Streaming Functions -**************************************/ -/* allocation */ -LZ4_streamHC_t* LZ4_createStreamHC(void) -{ - LZ4_streamHC_t* const LZ4_streamHCPtr = (LZ4_streamHC_t*)ALLOC(sizeof(LZ4_streamHC_t)); - if (LZ4_streamHCPtr==NULL) return NULL; - LZ4_initStreamHC(LZ4_streamHCPtr, sizeof(*LZ4_streamHCPtr)); /* full initialization, malloc'ed buffer can be full of garbage */ - return LZ4_streamHCPtr; -} - -int LZ4_freeStreamHC (LZ4_streamHC_t* LZ4_streamHCPtr) -{ - DEBUGLOG(4, "LZ4_freeStreamHC(%p)", LZ4_streamHCPtr); - if (!LZ4_streamHCPtr) return 0; /* support free on NULL */ - FREEMEM(LZ4_streamHCPtr); - return 0; -} - - -LZ4_streamHC_t* LZ4_initStreamHC (void* buffer, size_t size) -{ - LZ4_streamHC_t* const LZ4_streamHCPtr = (LZ4_streamHC_t*)buffer; - if (buffer == NULL) return NULL; - if (size < sizeof(LZ4_streamHC_t)) return NULL; -#ifndef _MSC_VER /* for some reason, Visual fails the aligment test on 32-bit x86 : - * it reports an aligment of 8-bytes, - * while actually aligning LZ4_streamHC_t on 4 bytes. */ - if (((size_t)buffer) & (LZ4_streamHC_t_alignment() - 1)) return NULL; /* alignment check */ -#endif - /* if compilation fails here, LZ4_STREAMHCSIZE must be increased */ - LZ4_STATIC_ASSERT(sizeof(LZ4HC_CCtx_internal) <= LZ4_STREAMHCSIZE); - DEBUGLOG(4, "LZ4_initStreamHC(%p, %u)", LZ4_streamHCPtr, (unsigned)size); - /* end-base will trigger a clearTable on starting compression */ - LZ4_streamHCPtr->internal_donotuse.end = (const BYTE *)(ptrdiff_t)-1; - LZ4_streamHCPtr->internal_donotuse.base = NULL; - LZ4_streamHCPtr->internal_donotuse.dictCtx = NULL; - LZ4_streamHCPtr->internal_donotuse.favorDecSpeed = 0; - LZ4_streamHCPtr->internal_donotuse.dirty = 0; - LZ4_setCompressionLevel(LZ4_streamHCPtr, LZ4HC_CLEVEL_DEFAULT); - return LZ4_streamHCPtr; -} - -/* just a stub */ -void LZ4_resetStreamHC (LZ4_streamHC_t* LZ4_streamHCPtr, int compressionLevel) -{ - LZ4_initStreamHC(LZ4_streamHCPtr, sizeof(*LZ4_streamHCPtr)); - LZ4_setCompressionLevel(LZ4_streamHCPtr, compressionLevel); -} - -void LZ4_resetStreamHC_fast (LZ4_streamHC_t* LZ4_streamHCPtr, int compressionLevel) -{ - DEBUGLOG(4, "LZ4_resetStreamHC_fast(%p, %d)", LZ4_streamHCPtr, compressionLevel); - if (LZ4_streamHCPtr->internal_donotuse.dirty) { - LZ4_initStreamHC(LZ4_streamHCPtr, sizeof(*LZ4_streamHCPtr)); - } else { - /* preserve end - base : can trigger clearTable's threshold */ - LZ4_streamHCPtr->internal_donotuse.end -= (uptrval)LZ4_streamHCPtr->internal_donotuse.base; - LZ4_streamHCPtr->internal_donotuse.base = NULL; - LZ4_streamHCPtr->internal_donotuse.dictCtx = NULL; - } - LZ4_setCompressionLevel(LZ4_streamHCPtr, compressionLevel); -} - -void LZ4_setCompressionLevel(LZ4_streamHC_t* LZ4_streamHCPtr, int compressionLevel) -{ - DEBUGLOG(5, "LZ4_setCompressionLevel(%p, %d)", LZ4_streamHCPtr, compressionLevel); - if (compressionLevel < 1) compressionLevel = LZ4HC_CLEVEL_DEFAULT; - if (compressionLevel > LZ4HC_CLEVEL_MAX) compressionLevel = LZ4HC_CLEVEL_MAX; - LZ4_streamHCPtr->internal_donotuse.compressionLevel = (short)compressionLevel; -} - -void LZ4_favorDecompressionSpeed(LZ4_streamHC_t* LZ4_streamHCPtr, int favor) -{ - LZ4_streamHCPtr->internal_donotuse.favorDecSpeed = (favor!=0); -} - -/* LZ4_loadDictHC() : - * LZ4_streamHCPtr is presumed properly initialized */ -int LZ4_loadDictHC (LZ4_streamHC_t* LZ4_streamHCPtr, - const char* dictionary, int dictSize) -{ - LZ4HC_CCtx_internal* const ctxPtr = &LZ4_streamHCPtr->internal_donotuse; - DEBUGLOG(4, "LZ4_loadDictHC(%p, %p, %d)", LZ4_streamHCPtr, dictionary, dictSize); - assert(LZ4_streamHCPtr != NULL); - if (dictSize > 64 KB) { - dictionary += (size_t)dictSize - 64 KB; - dictSize = 64 KB; - } - /* need a full initialization, there are bad side-effects when using resetFast() */ - { int const cLevel = ctxPtr->compressionLevel; - LZ4_initStreamHC(LZ4_streamHCPtr, sizeof(*LZ4_streamHCPtr)); - LZ4_setCompressionLevel(LZ4_streamHCPtr, cLevel); - } - LZ4HC_init_internal (ctxPtr, (const BYTE*)dictionary); - ctxPtr->end = (const BYTE*)dictionary + dictSize; - if (dictSize >= 4) LZ4HC_Insert (ctxPtr, ctxPtr->end-3); - return dictSize; -} - -void LZ4_attach_HC_dictionary(LZ4_streamHC_t *working_stream, const LZ4_streamHC_t *dictionary_stream) { - working_stream->internal_donotuse.dictCtx = dictionary_stream != NULL ? &(dictionary_stream->internal_donotuse) : NULL; -} - -/* compression */ - -static void LZ4HC_setExternalDict(LZ4HC_CCtx_internal* ctxPtr, const BYTE* newBlock) -{ - DEBUGLOG(4, "LZ4HC_setExternalDict(%p, %p)", ctxPtr, newBlock); - if (ctxPtr->end >= ctxPtr->base + ctxPtr->dictLimit + 4) - LZ4HC_Insert (ctxPtr, ctxPtr->end-3); /* Referencing remaining dictionary content */ - - /* Only one memory segment for extDict, so any previous extDict is lost at this stage */ - ctxPtr->lowLimit = ctxPtr->dictLimit; - ctxPtr->dictLimit = (U32)(ctxPtr->end - ctxPtr->base); - ctxPtr->dictBase = ctxPtr->base; - ctxPtr->base = newBlock - ctxPtr->dictLimit; - ctxPtr->end = newBlock; - ctxPtr->nextToUpdate = ctxPtr->dictLimit; /* match referencing will resume from there */ - - /* cannot reference an extDict and a dictCtx at the same time */ - ctxPtr->dictCtx = NULL; -} - -static int LZ4_compressHC_continue_generic (LZ4_streamHC_t* LZ4_streamHCPtr, - const char* src, char* dst, - int* srcSizePtr, int dstCapacity, - limitedOutput_directive limit) -{ - LZ4HC_CCtx_internal* const ctxPtr = &LZ4_streamHCPtr->internal_donotuse; - DEBUGLOG(4, "LZ4_compressHC_continue_generic(ctx=%p, src=%p, srcSize=%d)", - LZ4_streamHCPtr, src, *srcSizePtr); - assert(ctxPtr != NULL); - /* auto-init if forgotten */ - if (ctxPtr->base == NULL) LZ4HC_init_internal (ctxPtr, (const BYTE*) src); - - /* Check overflow */ - if ((size_t)(ctxPtr->end - ctxPtr->base) > 2 GB) { - size_t dictSize = (size_t)(ctxPtr->end - ctxPtr->base) - ctxPtr->dictLimit; - if (dictSize > 64 KB) dictSize = 64 KB; - LZ4_loadDictHC(LZ4_streamHCPtr, (const char*)(ctxPtr->end) - dictSize, (int)dictSize); - } - - /* Check if blocks follow each other */ - if ((const BYTE*)src != ctxPtr->end) - LZ4HC_setExternalDict(ctxPtr, (const BYTE*)src); - - /* Check overlapping input/dictionary space */ - { const BYTE* sourceEnd = (const BYTE*) src + *srcSizePtr; - const BYTE* const dictBegin = ctxPtr->dictBase + ctxPtr->lowLimit; - const BYTE* const dictEnd = ctxPtr->dictBase + ctxPtr->dictLimit; - if ((sourceEnd > dictBegin) && ((const BYTE*)src < dictEnd)) { - if (sourceEnd > dictEnd) sourceEnd = dictEnd; - ctxPtr->lowLimit = (U32)(sourceEnd - ctxPtr->dictBase); - if (ctxPtr->dictLimit - ctxPtr->lowLimit < 4) ctxPtr->lowLimit = ctxPtr->dictLimit; - } - } - - return LZ4HC_compress_generic (ctxPtr, src, dst, srcSizePtr, dstCapacity, ctxPtr->compressionLevel, limit); -} - -int LZ4_compress_HC_continue (LZ4_streamHC_t* LZ4_streamHCPtr, const char* src, char* dst, int srcSize, int dstCapacity) -{ - if (dstCapacity < LZ4_compressBound(srcSize)) - return LZ4_compressHC_continue_generic (LZ4_streamHCPtr, src, dst, &srcSize, dstCapacity, limitedOutput); - else - return LZ4_compressHC_continue_generic (LZ4_streamHCPtr, src, dst, &srcSize, dstCapacity, notLimited); -} - -int LZ4_compress_HC_continue_destSize (LZ4_streamHC_t* LZ4_streamHCPtr, const char* src, char* dst, int* srcSizePtr, int targetDestSize) -{ - return LZ4_compressHC_continue_generic(LZ4_streamHCPtr, src, dst, srcSizePtr, targetDestSize, fillOutput); -} - - - -/* dictionary saving */ - -int LZ4_saveDictHC (LZ4_streamHC_t* LZ4_streamHCPtr, char* safeBuffer, int dictSize) -{ - LZ4HC_CCtx_internal* const streamPtr = &LZ4_streamHCPtr->internal_donotuse; - int const prefixSize = (int)(streamPtr->end - (streamPtr->base + streamPtr->dictLimit)); - DEBUGLOG(4, "LZ4_saveDictHC(%p, %p, %d)", LZ4_streamHCPtr, safeBuffer, dictSize); - if (dictSize > 64 KB) dictSize = 64 KB; - if (dictSize < 4) dictSize = 0; - if (dictSize > prefixSize) dictSize = prefixSize; - memmove(safeBuffer, streamPtr->end - dictSize, dictSize); - { U32 const endIndex = (U32)(streamPtr->end - streamPtr->base); - streamPtr->end = (const BYTE*)safeBuffer + dictSize; - streamPtr->base = streamPtr->end - endIndex; - streamPtr->dictLimit = endIndex - (U32)dictSize; - streamPtr->lowLimit = endIndex - (U32)dictSize; - if (streamPtr->nextToUpdate < streamPtr->dictLimit) streamPtr->nextToUpdate = streamPtr->dictLimit; - } - return dictSize; -} - - -/*************************************************** -* Deprecated Functions -***************************************************/ - -/* These functions currently generate deprecation warnings */ - -/* Wrappers for deprecated compression functions */ -int LZ4_compressHC(const char* src, char* dst, int srcSize) { return LZ4_compress_HC (src, dst, srcSize, LZ4_compressBound(srcSize), 0); } -int LZ4_compressHC_limitedOutput(const char* src, char* dst, int srcSize, int maxDstSize) { return LZ4_compress_HC(src, dst, srcSize, maxDstSize, 0); } -int LZ4_compressHC2(const char* src, char* dst, int srcSize, int cLevel) { return LZ4_compress_HC (src, dst, srcSize, LZ4_compressBound(srcSize), cLevel); } -int LZ4_compressHC2_limitedOutput(const char* src, char* dst, int srcSize, int maxDstSize, int cLevel) { return LZ4_compress_HC(src, dst, srcSize, maxDstSize, cLevel); } -int LZ4_compressHC_withStateHC (void* state, const char* src, char* dst, int srcSize) { return LZ4_compress_HC_extStateHC (state, src, dst, srcSize, LZ4_compressBound(srcSize), 0); } -int LZ4_compressHC_limitedOutput_withStateHC (void* state, const char* src, char* dst, int srcSize, int maxDstSize) { return LZ4_compress_HC_extStateHC (state, src, dst, srcSize, maxDstSize, 0); } -int LZ4_compressHC2_withStateHC (void* state, const char* src, char* dst, int srcSize, int cLevel) { return LZ4_compress_HC_extStateHC(state, src, dst, srcSize, LZ4_compressBound(srcSize), cLevel); } -int LZ4_compressHC2_limitedOutput_withStateHC (void* state, const char* src, char* dst, int srcSize, int maxDstSize, int cLevel) { return LZ4_compress_HC_extStateHC(state, src, dst, srcSize, maxDstSize, cLevel); } -int LZ4_compressHC_continue (LZ4_streamHC_t* ctx, const char* src, char* dst, int srcSize) { return LZ4_compress_HC_continue (ctx, src, dst, srcSize, LZ4_compressBound(srcSize)); } -int LZ4_compressHC_limitedOutput_continue (LZ4_streamHC_t* ctx, const char* src, char* dst, int srcSize, int maxDstSize) { return LZ4_compress_HC_continue (ctx, src, dst, srcSize, maxDstSize); } - - -/* Deprecated streaming functions */ -int LZ4_sizeofStreamStateHC(void) { return LZ4_STREAMHCSIZE; } - -/* state is presumed correctly sized, aka >= sizeof(LZ4_streamHC_t) - * @return : 0 on success, !=0 if error */ -int LZ4_resetStreamStateHC(void* state, char* inputBuffer) -{ - LZ4_streamHC_t* const hc4 = LZ4_initStreamHC(state, sizeof(*hc4)); - if (hc4 == NULL) return 1; /* init failed */ - LZ4HC_init_internal (&hc4->internal_donotuse, (const BYTE*)inputBuffer); - return 0; -} - -void* LZ4_createHC (const char* inputBuffer) -{ - LZ4_streamHC_t* const hc4 = LZ4_createStreamHC(); - if (hc4 == NULL) return NULL; /* not enough memory */ - LZ4HC_init_internal (&hc4->internal_donotuse, (const BYTE*)inputBuffer); - return hc4; -} - -int LZ4_freeHC (void* LZ4HC_Data) -{ - if (!LZ4HC_Data) return 0; /* support free on NULL */ - FREEMEM(LZ4HC_Data); - return 0; -} - -int LZ4_compressHC2_continue (void* LZ4HC_Data, const char* src, char* dst, int srcSize, int cLevel) -{ - return LZ4HC_compress_generic (&((LZ4_streamHC_t*)LZ4HC_Data)->internal_donotuse, src, dst, &srcSize, 0, cLevel, notLimited); -} - -int LZ4_compressHC2_limitedOutput_continue (void* LZ4HC_Data, const char* src, char* dst, int srcSize, int dstCapacity, int cLevel) -{ - return LZ4HC_compress_generic (&((LZ4_streamHC_t*)LZ4HC_Data)->internal_donotuse, src, dst, &srcSize, dstCapacity, cLevel, limitedOutput); -} - -char* LZ4_slideInputBufferHC(void* LZ4HC_Data) -{ - LZ4_streamHC_t *ctx = (LZ4_streamHC_t*)LZ4HC_Data; - const BYTE *bufferStart = ctx->internal_donotuse.base + ctx->internal_donotuse.lowLimit; - LZ4_resetStreamHC_fast(ctx, ctx->internal_donotuse.compressionLevel); - /* avoid const char * -> char * conversion warning :( */ - return (char *)(uptrval)bufferStart; -} - - -/* ================================================ - * LZ4 Optimal parser (levels [LZ4HC_CLEVEL_OPT_MIN - LZ4HC_CLEVEL_MAX]) - * ===============================================*/ -typedef struct { - int price; - int off; - int mlen; - int litlen; -} LZ4HC_optimal_t; - -/* price in bytes */ -LZ4_FORCE_INLINE int LZ4HC_literalsPrice(int const litlen) -{ - int price = litlen; - assert(litlen >= 0); - if (litlen >= (int)RUN_MASK) - price += 1 + ((litlen-(int)RUN_MASK) / 255); - return price; -} - - -/* requires mlen >= MINMATCH */ -LZ4_FORCE_INLINE int LZ4HC_sequencePrice(int litlen, int mlen) -{ - int price = 1 + 2 ; /* token + 16-bit offset */ - assert(litlen >= 0); - assert(mlen >= MINMATCH); - - price += LZ4HC_literalsPrice(litlen); - - if (mlen >= (int)(ML_MASK+MINMATCH)) - price += 1 + ((mlen-(int)(ML_MASK+MINMATCH)) / 255); - - return price; -} - - -typedef struct { - int off; - int len; -} LZ4HC_match_t; - -LZ4_FORCE_INLINE LZ4HC_match_t -LZ4HC_FindLongerMatch(LZ4HC_CCtx_internal* const ctx, - const BYTE* ip, const BYTE* const iHighLimit, - int minLen, int nbSearches, - const dictCtx_directive dict, - const HCfavor_e favorDecSpeed) -{ - LZ4HC_match_t match = { 0 , 0 }; - const BYTE* matchPtr = NULL; - /* note : LZ4HC_InsertAndGetWiderMatch() is able to modify the starting position of a match (*startpos), - * but this won't be the case here, as we define iLowLimit==ip, - * so LZ4HC_InsertAndGetWiderMatch() won't be allowed to search past ip */ - int matchLength = LZ4HC_InsertAndGetWiderMatch(ctx, ip, ip, iHighLimit, minLen, &matchPtr, &ip, nbSearches, 1 /*patternAnalysis*/, 1 /*chainSwap*/, dict, favorDecSpeed); - if (matchLength <= minLen) return match; - if (favorDecSpeed) { - if ((matchLength>18) & (matchLength<=36)) matchLength=18; /* favor shortcut */ - } - match.len = matchLength; - match.off = (int)(ip-matchPtr); - return match; -} - - -static int LZ4HC_compress_optimal ( LZ4HC_CCtx_internal* ctx, - const char* const source, - char* dst, - int* srcSizePtr, - int dstCapacity, - int const nbSearches, - size_t sufficient_len, - const limitedOutput_directive limit, - int const fullUpdate, - const dictCtx_directive dict, - const HCfavor_e favorDecSpeed) -{ -#define TRAILING_LITERALS 3 - LZ4HC_optimal_t opt[LZ4_OPT_NUM + TRAILING_LITERALS]; /* ~64 KB, which is a bit large for stack... */ - - const BYTE* ip = (const BYTE*) source; - const BYTE* anchor = ip; - const BYTE* const iend = ip + *srcSizePtr; - const BYTE* const mflimit = iend - MFLIMIT; - const BYTE* const matchlimit = iend - LASTLITERALS; - BYTE* op = (BYTE*) dst; - BYTE* opSaved = (BYTE*) dst; - BYTE* oend = op + dstCapacity; - - /* init */ - DEBUGLOG(5, "LZ4HC_compress_optimal(dst=%p, dstCapa=%u)", dst, (unsigned)dstCapacity); - *srcSizePtr = 0; - if (limit == fillOutput) oend -= LASTLITERALS; /* Hack for support LZ4 format restriction */ - if (sufficient_len >= LZ4_OPT_NUM) sufficient_len = LZ4_OPT_NUM-1; - - /* Main Loop */ - assert(ip - anchor < LZ4_MAX_INPUT_SIZE); - while (ip <= mflimit) { - int const llen = (int)(ip - anchor); - int best_mlen, best_off; - int cur, last_match_pos = 0; - - LZ4HC_match_t const firstMatch = LZ4HC_FindLongerMatch(ctx, ip, matchlimit, MINMATCH-1, nbSearches, dict, favorDecSpeed); - if (firstMatch.len==0) { ip++; continue; } - - if ((size_t)firstMatch.len > sufficient_len) { - /* good enough solution : immediate encoding */ - int const firstML = firstMatch.len; - const BYTE* const matchPos = ip - firstMatch.off; - opSaved = op; - if ( LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), firstML, matchPos, limit, oend) ) /* updates ip, op and anchor */ - goto _dest_overflow; - continue; - } - - /* set prices for first positions (literals) */ - { int rPos; - for (rPos = 0 ; rPos < MINMATCH ; rPos++) { - int const cost = LZ4HC_literalsPrice(llen + rPos); - opt[rPos].mlen = 1; - opt[rPos].off = 0; - opt[rPos].litlen = llen + rPos; - opt[rPos].price = cost; - DEBUGLOG(7, "rPos:%3i => price:%3i (litlen=%i) -- initial setup", - rPos, cost, opt[rPos].litlen); - } } - /* set prices using initial match */ - { int mlen = MINMATCH; - int const matchML = firstMatch.len; /* necessarily < sufficient_len < LZ4_OPT_NUM */ - int const offset = firstMatch.off; - assert(matchML < LZ4_OPT_NUM); - for ( ; mlen <= matchML ; mlen++) { - int const cost = LZ4HC_sequencePrice(llen, mlen); - opt[mlen].mlen = mlen; - opt[mlen].off = offset; - opt[mlen].litlen = llen; - opt[mlen].price = cost; - DEBUGLOG(7, "rPos:%3i => price:%3i (matchlen=%i) -- initial setup", - mlen, cost, mlen); - } } - last_match_pos = firstMatch.len; - { int addLit; - for (addLit = 1; addLit <= TRAILING_LITERALS; addLit ++) { - opt[last_match_pos+addLit].mlen = 1; /* literal */ - opt[last_match_pos+addLit].off = 0; - opt[last_match_pos+addLit].litlen = addLit; - opt[last_match_pos+addLit].price = opt[last_match_pos].price + LZ4HC_literalsPrice(addLit); - DEBUGLOG(7, "rPos:%3i => price:%3i (litlen=%i) -- initial setup", - last_match_pos+addLit, opt[last_match_pos+addLit].price, addLit); - } } - - /* check further positions */ - for (cur = 1; cur < last_match_pos; cur++) { - const BYTE* const curPtr = ip + cur; - LZ4HC_match_t newMatch; - - if (curPtr > mflimit) break; - DEBUGLOG(7, "rPos:%u[%u] vs [%u]%u", - cur, opt[cur].price, opt[cur+1].price, cur+1); - if (fullUpdate) { - /* not useful to search here if next position has same (or lower) cost */ - if ( (opt[cur+1].price <= opt[cur].price) - /* in some cases, next position has same cost, but cost rises sharply after, so a small match would still be beneficial */ - && (opt[cur+MINMATCH].price < opt[cur].price + 3/*min seq price*/) ) - continue; - } else { - /* not useful to search here if next position has same (or lower) cost */ - if (opt[cur+1].price <= opt[cur].price) continue; - } - - DEBUGLOG(7, "search at rPos:%u", cur); - if (fullUpdate) - newMatch = LZ4HC_FindLongerMatch(ctx, curPtr, matchlimit, MINMATCH-1, nbSearches, dict, favorDecSpeed); - else - /* only test matches of minimum length; slightly faster, but misses a few bytes */ - newMatch = LZ4HC_FindLongerMatch(ctx, curPtr, matchlimit, last_match_pos - cur, nbSearches, dict, favorDecSpeed); - if (!newMatch.len) continue; - - if ( ((size_t)newMatch.len > sufficient_len) - || (newMatch.len + cur >= LZ4_OPT_NUM) ) { - /* immediate encoding */ - best_mlen = newMatch.len; - best_off = newMatch.off; - last_match_pos = cur + 1; - goto encode; - } - - /* before match : set price with literals at beginning */ - { int const baseLitlen = opt[cur].litlen; - int litlen; - for (litlen = 1; litlen < MINMATCH; litlen++) { - int const price = opt[cur].price - LZ4HC_literalsPrice(baseLitlen) + LZ4HC_literalsPrice(baseLitlen+litlen); - int const pos = cur + litlen; - if (price < opt[pos].price) { - opt[pos].mlen = 1; /* literal */ - opt[pos].off = 0; - opt[pos].litlen = baseLitlen+litlen; - opt[pos].price = price; - DEBUGLOG(7, "rPos:%3i => price:%3i (litlen=%i)", - pos, price, opt[pos].litlen); - } } } - - /* set prices using match at position = cur */ - { int const matchML = newMatch.len; - int ml = MINMATCH; - - assert(cur + newMatch.len < LZ4_OPT_NUM); - for ( ; ml <= matchML ; ml++) { - int const pos = cur + ml; - int const offset = newMatch.off; - int price; - int ll; - DEBUGLOG(7, "testing price rPos %i (last_match_pos=%i)", - pos, last_match_pos); - if (opt[cur].mlen == 1) { - ll = opt[cur].litlen; - price = ((cur > ll) ? opt[cur - ll].price : 0) - + LZ4HC_sequencePrice(ll, ml); - } else { - ll = 0; - price = opt[cur].price + LZ4HC_sequencePrice(0, ml); - } - - assert((U32)favorDecSpeed <= 1); - if (pos > last_match_pos+TRAILING_LITERALS - || price <= opt[pos].price - (int)favorDecSpeed) { - DEBUGLOG(7, "rPos:%3i => price:%3i (matchlen=%i)", - pos, price, ml); - assert(pos < LZ4_OPT_NUM); - if ( (ml == matchML) /* last pos of last match */ - && (last_match_pos < pos) ) - last_match_pos = pos; - opt[pos].mlen = ml; - opt[pos].off = offset; - opt[pos].litlen = ll; - opt[pos].price = price; - } } } - /* complete following positions with literals */ - { int addLit; - for (addLit = 1; addLit <= TRAILING_LITERALS; addLit ++) { - opt[last_match_pos+addLit].mlen = 1; /* literal */ - opt[last_match_pos+addLit].off = 0; - opt[last_match_pos+addLit].litlen = addLit; - opt[last_match_pos+addLit].price = opt[last_match_pos].price + LZ4HC_literalsPrice(addLit); - DEBUGLOG(7, "rPos:%3i => price:%3i (litlen=%i)", last_match_pos+addLit, opt[last_match_pos+addLit].price, addLit); - } } - } /* for (cur = 1; cur <= last_match_pos; cur++) */ - - assert(last_match_pos < LZ4_OPT_NUM + TRAILING_LITERALS); - best_mlen = opt[last_match_pos].mlen; - best_off = opt[last_match_pos].off; - cur = last_match_pos - best_mlen; - - encode: /* cur, last_match_pos, best_mlen, best_off must be set */ - assert(cur < LZ4_OPT_NUM); - assert(last_match_pos >= 1); /* == 1 when only one candidate */ - DEBUGLOG(6, "reverse traversal, looking for shortest path (last_match_pos=%i)", last_match_pos); - { int candidate_pos = cur; - int selected_matchLength = best_mlen; - int selected_offset = best_off; - while (1) { /* from end to beginning */ - int const next_matchLength = opt[candidate_pos].mlen; /* can be 1, means literal */ - int const next_offset = opt[candidate_pos].off; - DEBUGLOG(7, "pos %i: sequence length %i", candidate_pos, selected_matchLength); - opt[candidate_pos].mlen = selected_matchLength; - opt[candidate_pos].off = selected_offset; - selected_matchLength = next_matchLength; - selected_offset = next_offset; - if (next_matchLength > candidate_pos) break; /* last match elected, first match to encode */ - assert(next_matchLength > 0); /* can be 1, means literal */ - candidate_pos -= next_matchLength; - } } - - /* encode all recorded sequences in order */ - { int rPos = 0; /* relative position (to ip) */ - while (rPos < last_match_pos) { - int const ml = opt[rPos].mlen; - int const offset = opt[rPos].off; - if (ml == 1) { ip++; rPos++; continue; } /* literal; note: can end up with several literals, in which case, skip them */ - rPos += ml; - assert(ml >= MINMATCH); - assert((offset >= 1) && (offset <= LZ4_DISTANCE_MAX)); - opSaved = op; - if ( LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ml, ip - offset, limit, oend) ) /* updates ip, op and anchor */ - goto _dest_overflow; - } } - } /* while (ip <= mflimit) */ - - _last_literals: - /* Encode Last Literals */ - { size_t lastRunSize = (size_t)(iend - anchor); /* literals */ - size_t litLength = (lastRunSize + 255 - RUN_MASK) / 255; - size_t const totalSize = 1 + litLength + lastRunSize; - if (limit == fillOutput) oend += LASTLITERALS; /* restore correct value */ - if (limit && (op + totalSize > oend)) { - if (limit == limitedOutput) return 0; /* Check output limit */ - /* adapt lastRunSize to fill 'dst' */ - lastRunSize = (size_t)(oend - op) - 1; - litLength = (lastRunSize + 255 - RUN_MASK) / 255; - lastRunSize -= litLength; - } - ip = anchor + lastRunSize; - - if (lastRunSize >= RUN_MASK) { - size_t accumulator = lastRunSize - RUN_MASK; - *op++ = (RUN_MASK << ML_BITS); - for(; accumulator >= 255 ; accumulator -= 255) *op++ = 255; - *op++ = (BYTE) accumulator; - } else { - *op++ = (BYTE)(lastRunSize << ML_BITS); - } - memcpy(op, anchor, lastRunSize); - op += lastRunSize; - } - - /* End */ - *srcSizePtr = (int) (((const char*)ip) - source); - return (int) ((char*)op-dst); - - _dest_overflow: - if (limit == fillOutput) { - op = opSaved; /* restore correct out pointer */ - goto _last_literals; - } - return 0; - } diff --git a/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/compress/lz4/lz4hc.h b/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/compress/lz4/lz4hc.h deleted file mode 100644 index 44e35bbf6b..0000000000 --- a/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/compress/lz4/lz4hc.h +++ /dev/null @@ -1,438 +0,0 @@ -/* - LZ4 HC - High Compression Mode of LZ4 - Header File - Copyright (C) 2011-2017, Yann Collet. - BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above - copyright notice, this list of conditions and the following disclaimer - in the documentation and/or other materials provided with the - distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - You can contact the author at : - - LZ4 source repository : https://github.com/lz4/lz4 - - LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c -*/ -#ifndef LZ4_HC_H_19834876238432 -#define LZ4_HC_H_19834876238432 - -#if defined (__cplusplus) -extern "C" { -#endif - -/* --- Dependency --- */ -/* note : lz4hc requires lz4.h/lz4.c for compilation */ -#include "lz4.h" /* stddef, LZ4LIB_API, LZ4_DEPRECATED */ - - -/* --- Useful constants --- */ -#define LZ4HC_CLEVEL_MIN 3 -#define LZ4HC_CLEVEL_DEFAULT 9 -#define LZ4HC_CLEVEL_OPT_MIN 10 -#define LZ4HC_CLEVEL_MAX 12 - - -/*-************************************ - * Block Compression - **************************************/ -/*! LZ4_compress_HC() : - * Compress data from `src` into `dst`, using the powerful but slower "HC" algorithm. - * `dst` must be already allocated. - * Compression is guaranteed to succeed if `dstCapacity >= LZ4_compressBound(srcSize)` (see "lz4.h") - * Max supported `srcSize` value is LZ4_MAX_INPUT_SIZE (see "lz4.h") - * `compressionLevel` : any value between 1 and LZ4HC_CLEVEL_MAX will work. - * Values > LZ4HC_CLEVEL_MAX behave the same as LZ4HC_CLEVEL_MAX. - * @return : the number of bytes written into 'dst' - * or 0 if compression fails. - */ -LZ4LIB_API int LZ4_compress_HC (const char* src, char* dst, int srcSize, int dstCapacity, int compressionLevel); - - -/* Note : - * Decompression functions are provided within "lz4.h" (BSD license) - */ - - -/*! LZ4_compress_HC_extStateHC() : - * Same as LZ4_compress_HC(), but using an externally allocated memory segment for `state`. - * `state` size is provided by LZ4_sizeofStateHC(). - * Memory segment must be aligned on 8-bytes boundaries (which a normal malloc() should do properly). - */ -LZ4LIB_API int LZ4_sizeofStateHC(void); -LZ4LIB_API int LZ4_compress_HC_extStateHC(void* stateHC, const char* src, char* dst, int srcSize, int maxDstSize, int compressionLevel); - - -/*! LZ4_compress_HC_destSize() : v1.9.0+ - * Will compress as much data as possible from `src` - * to fit into `targetDstSize` budget. - * Result is provided in 2 parts : - * @return : the number of bytes written into 'dst' (necessarily <= targetDstSize) - * or 0 if compression fails. - * `srcSizePtr` : on success, *srcSizePtr is updated to indicate how much bytes were read from `src` - */ -LZ4LIB_API int LZ4_compress_HC_destSize(void* stateHC, - const char* src, char* dst, - int* srcSizePtr, int targetDstSize, - int compressionLevel); - - -/*-************************************ - * Streaming Compression - * Bufferless synchronous API - **************************************/ - typedef union LZ4_streamHC_u LZ4_streamHC_t; /* incomplete type (defined later) */ - -/*! LZ4_createStreamHC() and LZ4_freeStreamHC() : - * These functions create and release memory for LZ4 HC streaming state. - * Newly created states are automatically initialized. - * A same state can be used multiple times consecutively, - * starting with LZ4_resetStreamHC_fast() to start a new stream of blocks. - */ -LZ4LIB_API LZ4_streamHC_t* LZ4_createStreamHC(void); -LZ4LIB_API int LZ4_freeStreamHC (LZ4_streamHC_t* streamHCPtr); - -/* - These functions compress data in successive blocks of any size, - using previous blocks as dictionary, to improve compression ratio. - One key assumption is that previous blocks (up to 64 KB) remain read-accessible while compressing next blocks. - There is an exception for ring buffers, which can be smaller than 64 KB. - Ring-buffer scenario is automatically detected and handled within LZ4_compress_HC_continue(). - - Before starting compression, state must be allocated and properly initialized. - LZ4_createStreamHC() does both, though compression level is set to LZ4HC_CLEVEL_DEFAULT. - - Selecting the compression level can be done with LZ4_resetStreamHC_fast() (starts a new stream) - or LZ4_setCompressionLevel() (anytime, between blocks in the same stream) (experimental). - LZ4_resetStreamHC_fast() only works on states which have been properly initialized at least once, - which is automatically the case when state is created using LZ4_createStreamHC(). - - After reset, a first "fictional block" can be designated as initial dictionary, - using LZ4_loadDictHC() (Optional). - - Invoke LZ4_compress_HC_continue() to compress each successive block. - The number of blocks is unlimited. - Previous input blocks, including initial dictionary when present, - must remain accessible and unmodified during compression. - - It's allowed to update compression level anytime between blocks, - using LZ4_setCompressionLevel() (experimental). - - 'dst' buffer should be sized to handle worst case scenarios - (see LZ4_compressBound(), it ensures compression success). - In case of failure, the API does not guarantee recovery, - so the state _must_ be reset. - To ensure compression success - whenever `dst` buffer size cannot be made >= LZ4_compressBound(), - consider using LZ4_compress_HC_continue_destSize(). - - Whenever previous input blocks can't be preserved unmodified in-place during compression of next blocks, - it's possible to copy the last blocks into a more stable memory space, using LZ4_saveDictHC(). - Return value of LZ4_saveDictHC() is the size of dictionary effectively saved into 'safeBuffer' (<= 64 KB) - - After completing a streaming compression, - it's possible to start a new stream of blocks, using the same LZ4_streamHC_t state, - just by resetting it, using LZ4_resetStreamHC_fast(). -*/ - -LZ4LIB_API void LZ4_resetStreamHC_fast(LZ4_streamHC_t* streamHCPtr, int compressionLevel); /* v1.9.0+ */ -LZ4LIB_API int LZ4_loadDictHC (LZ4_streamHC_t* streamHCPtr, const char* dictionary, int dictSize); - -LZ4LIB_API int LZ4_compress_HC_continue (LZ4_streamHC_t* streamHCPtr, - const char* src, char* dst, - int srcSize, int maxDstSize); - -/*! LZ4_compress_HC_continue_destSize() : v1.9.0+ - * Similar to LZ4_compress_HC_continue(), - * but will read as much data as possible from `src` - * to fit into `targetDstSize` budget. - * Result is provided into 2 parts : - * @return : the number of bytes written into 'dst' (necessarily <= targetDstSize) - * or 0 if compression fails. - * `srcSizePtr` : on success, *srcSizePtr will be updated to indicate how much bytes were read from `src`. - * Note that this function may not consume the entire input. - */ -LZ4LIB_API int LZ4_compress_HC_continue_destSize(LZ4_streamHC_t* LZ4_streamHCPtr, - const char* src, char* dst, - int* srcSizePtr, int targetDstSize); - -LZ4LIB_API int LZ4_saveDictHC (LZ4_streamHC_t* streamHCPtr, char* safeBuffer, int maxDictSize); - - - -/*^********************************************** - * !!!!!! STATIC LINKING ONLY !!!!!! - ***********************************************/ - -/*-****************************************************************** - * PRIVATE DEFINITIONS : - * Do not use these definitions directly. - * They are merely exposed to allow static allocation of `LZ4_streamHC_t`. - * Declare an `LZ4_streamHC_t` directly, rather than any type below. - * Even then, only do so in the context of static linking, as definitions may change between versions. - ********************************************************************/ - -#define LZ4HC_DICTIONARY_LOGSIZE 16 -#define LZ4HC_MAXD (1<= 199901L) /* C99 */) -#include - -typedef struct LZ4HC_CCtx_internal LZ4HC_CCtx_internal; -struct LZ4HC_CCtx_internal -{ - uint32_t hashTable[LZ4HC_HASHTABLESIZE]; - uint16_t chainTable[LZ4HC_MAXD]; - const uint8_t* end; /* next block here to continue on current prefix */ - const uint8_t* base; /* All index relative to this position */ - const uint8_t* dictBase; /* alternate base for extDict */ - uint32_t dictLimit; /* below that point, need extDict */ - uint32_t lowLimit; /* below that point, no more dict */ - uint32_t nextToUpdate; /* index from which to continue dictionary update */ - short compressionLevel; - int8_t favorDecSpeed; /* favor decompression speed if this flag set, - otherwise, favor compression ratio */ - int8_t dirty; /* stream has to be fully reset if this flag is set */ - const LZ4HC_CCtx_internal* dictCtx; -}; - -#else - -typedef struct LZ4HC_CCtx_internal LZ4HC_CCtx_internal; -struct LZ4HC_CCtx_internal -{ - unsigned int hashTable[LZ4HC_HASHTABLESIZE]; - unsigned short chainTable[LZ4HC_MAXD]; - const unsigned char* end; /* next block here to continue on current prefix */ - const unsigned char* base; /* All index relative to this position */ - const unsigned char* dictBase; /* alternate base for extDict */ - unsigned int dictLimit; /* below that point, need extDict */ - unsigned int lowLimit; /* below that point, no more dict */ - unsigned int nextToUpdate; /* index from which to continue dictionary update */ - short compressionLevel; - char favorDecSpeed; /* favor decompression speed if this flag set, - otherwise, favor compression ratio */ - char dirty; /* stream has to be fully reset if this flag is set */ - const LZ4HC_CCtx_internal* dictCtx; -}; - -#endif - - -/* Do not use these definitions directly ! - * Declare or allocate an LZ4_streamHC_t instead. - */ -#define LZ4_STREAMHCSIZE (4*LZ4HC_HASHTABLESIZE + 2*LZ4HC_MAXD + 56 + ((sizeof(void*)==16) ? 56 : 0) /* AS400*/ ) /* 262200 or 262256*/ -#define LZ4_STREAMHCSIZE_SIZET (LZ4_STREAMHCSIZE / sizeof(size_t)) -union LZ4_streamHC_u { - size_t table[LZ4_STREAMHCSIZE_SIZET]; - LZ4HC_CCtx_internal internal_donotuse; -}; /* previously typedef'd to LZ4_streamHC_t */ - -/* LZ4_streamHC_t : - * This structure allows static allocation of LZ4 HC streaming state. - * This can be used to allocate statically, on state, or as part of a larger structure. - * - * Such state **must** be initialized using LZ4_initStreamHC() before first use. - * - * Note that invoking LZ4_initStreamHC() is not required when - * the state was created using LZ4_createStreamHC() (which is recommended). - * Using the normal builder, a newly created state is automatically initialized. - * - * Static allocation shall only be used in combination with static linking. - */ - -/* LZ4_initStreamHC() : v1.9.0+ - * Required before first use of a statically allocated LZ4_streamHC_t. - * Before v1.9.0 : use LZ4_resetStreamHC() instead - */ -LZ4LIB_API LZ4_streamHC_t* LZ4_initStreamHC (void* buffer, size_t size); - - -/*-************************************ -* Deprecated Functions -**************************************/ -/* see lz4.h LZ4_DISABLE_DEPRECATE_WARNINGS to turn off deprecation warnings */ - -/* deprecated compression functions */ -LZ4_DEPRECATED("use LZ4_compress_HC() instead") LZ4LIB_API int LZ4_compressHC (const char* source, char* dest, int inputSize); -LZ4_DEPRECATED("use LZ4_compress_HC() instead") LZ4LIB_API int LZ4_compressHC_limitedOutput (const char* source, char* dest, int inputSize, int maxOutputSize); -LZ4_DEPRECATED("use LZ4_compress_HC() instead") LZ4LIB_API int LZ4_compressHC2 (const char* source, char* dest, int inputSize, int compressionLevel); -LZ4_DEPRECATED("use LZ4_compress_HC() instead") LZ4LIB_API int LZ4_compressHC2_limitedOutput(const char* source, char* dest, int inputSize, int maxOutputSize, int compressionLevel); -LZ4_DEPRECATED("use LZ4_compress_HC_extStateHC() instead") LZ4LIB_API int LZ4_compressHC_withStateHC (void* state, const char* source, char* dest, int inputSize); -LZ4_DEPRECATED("use LZ4_compress_HC_extStateHC() instead") LZ4LIB_API int LZ4_compressHC_limitedOutput_withStateHC (void* state, const char* source, char* dest, int inputSize, int maxOutputSize); -LZ4_DEPRECATED("use LZ4_compress_HC_extStateHC() instead") LZ4LIB_API int LZ4_compressHC2_withStateHC (void* state, const char* source, char* dest, int inputSize, int compressionLevel); -LZ4_DEPRECATED("use LZ4_compress_HC_extStateHC() instead") LZ4LIB_API int LZ4_compressHC2_limitedOutput_withStateHC(void* state, const char* source, char* dest, int inputSize, int maxOutputSize, int compressionLevel); -LZ4_DEPRECATED("use LZ4_compress_HC_continue() instead") LZ4LIB_API int LZ4_compressHC_continue (LZ4_streamHC_t* LZ4_streamHCPtr, const char* source, char* dest, int inputSize); -LZ4_DEPRECATED("use LZ4_compress_HC_continue() instead") LZ4LIB_API int LZ4_compressHC_limitedOutput_continue (LZ4_streamHC_t* LZ4_streamHCPtr, const char* source, char* dest, int inputSize, int maxOutputSize); - -/* Obsolete streaming functions; degraded functionality; do not use! - * - * In order to perform streaming compression, these functions depended on data - * that is no longer tracked in the state. They have been preserved as well as - * possible: using them will still produce a correct output. However, use of - * LZ4_slideInputBufferHC() will truncate the history of the stream, rather - * than preserve a window-sized chunk of history. - */ -LZ4_DEPRECATED("use LZ4_createStreamHC() instead") LZ4LIB_API void* LZ4_createHC (const char* inputBuffer); -LZ4_DEPRECATED("use LZ4_saveDictHC() instead") LZ4LIB_API char* LZ4_slideInputBufferHC (void* LZ4HC_Data); -LZ4_DEPRECATED("use LZ4_freeStreamHC() instead") LZ4LIB_API int LZ4_freeHC (void* LZ4HC_Data); -LZ4_DEPRECATED("use LZ4_compress_HC_continue() instead") LZ4LIB_API int LZ4_compressHC2_continue (void* LZ4HC_Data, const char* source, char* dest, int inputSize, int compressionLevel); -LZ4_DEPRECATED("use LZ4_compress_HC_continue() instead") LZ4LIB_API int LZ4_compressHC2_limitedOutput_continue (void* LZ4HC_Data, const char* source, char* dest, int inputSize, int maxOutputSize, int compressionLevel); -LZ4_DEPRECATED("use LZ4_createStreamHC() instead") LZ4LIB_API int LZ4_sizeofStreamStateHC(void); -LZ4_DEPRECATED("use LZ4_initStreamHC() instead") LZ4LIB_API int LZ4_resetStreamStateHC(void* state, char* inputBuffer); - - -/* LZ4_resetStreamHC() is now replaced by LZ4_initStreamHC(). - * The intention is to emphasize the difference with LZ4_resetStreamHC_fast(), - * which is now the recommended function to start a new stream of blocks, - * but cannot be used to initialize a memory segment containing arbitrary garbage data. - * - * It is recommended to switch to LZ4_initStreamHC(). - * LZ4_resetStreamHC() will generate deprecation warnings in a future version. - */ -LZ4LIB_API void LZ4_resetStreamHC (LZ4_streamHC_t* streamHCPtr, int compressionLevel); - - -#if defined (__cplusplus) -} -#endif - -#endif /* LZ4_HC_H_19834876238432 */ - - -/*-************************************************** - * !!!!! STATIC LINKING ONLY !!!!! - * Following definitions are considered experimental. - * They should not be linked from DLL, - * as there is no guarantee of API stability yet. - * Prototypes will be promoted to "stable" status - * after successfull usage in real-life scenarios. - ***************************************************/ -#ifdef LZ4_HC_STATIC_LINKING_ONLY /* protection macro */ -#ifndef LZ4_HC_SLO_098092834 -#define LZ4_HC_SLO_098092834 - -#define LZ4_STATIC_LINKING_ONLY /* LZ4LIB_STATIC_API */ -#include "lz4.h" - -#if defined (__cplusplus) -extern "C" { -#endif - -/*! LZ4_setCompressionLevel() : v1.8.0+ (experimental) - * It's possible to change compression level - * between successive invocations of LZ4_compress_HC_continue*() - * for dynamic adaptation. - */ -LZ4LIB_STATIC_API void LZ4_setCompressionLevel( - LZ4_streamHC_t* LZ4_streamHCPtr, int compressionLevel); - -/*! LZ4_favorDecompressionSpeed() : v1.8.2+ (experimental) - * Opt. Parser will favor decompression speed over compression ratio. - * Only applicable to levels >= LZ4HC_CLEVEL_OPT_MIN. - */ -LZ4LIB_STATIC_API void LZ4_favorDecompressionSpeed( - LZ4_streamHC_t* LZ4_streamHCPtr, int favor); - -/*! LZ4_resetStreamHC_fast() : v1.9.0+ - * When an LZ4_streamHC_t is known to be in a internally coherent state, - * it can often be prepared for a new compression with almost no work, only - * sometimes falling back to the full, expensive reset that is always required - * when the stream is in an indeterminate state (i.e., the reset performed by - * LZ4_resetStreamHC()). - * - * LZ4_streamHCs are guaranteed to be in a valid state when: - * - returned from LZ4_createStreamHC() - * - reset by LZ4_resetStreamHC() - * - memset(stream, 0, sizeof(LZ4_streamHC_t)) - * - the stream was in a valid state and was reset by LZ4_resetStreamHC_fast() - * - the stream was in a valid state and was then used in any compression call - * that returned success - * - the stream was in an indeterminate state and was used in a compression - * call that fully reset the state (LZ4_compress_HC_extStateHC()) and that - * returned success - * - * Note: - * A stream that was last used in a compression call that returned an error - * may be passed to this function. However, it will be fully reset, which will - * clear any existing history and settings from the context. - */ -LZ4LIB_STATIC_API void LZ4_resetStreamHC_fast( - LZ4_streamHC_t* LZ4_streamHCPtr, int compressionLevel); - -/*! LZ4_compress_HC_extStateHC_fastReset() : - * A variant of LZ4_compress_HC_extStateHC(). - * - * Using this variant avoids an expensive initialization step. It is only safe - * to call if the state buffer is known to be correctly initialized already - * (see above comment on LZ4_resetStreamHC_fast() for a definition of - * "correctly initialized"). From a high level, the difference is that this - * function initializes the provided state with a call to - * LZ4_resetStreamHC_fast() while LZ4_compress_HC_extStateHC() starts with a - * call to LZ4_resetStreamHC(). - */ -LZ4LIB_STATIC_API int LZ4_compress_HC_extStateHC_fastReset ( - void* state, - const char* src, char* dst, - int srcSize, int dstCapacity, - int compressionLevel); - -/*! LZ4_attach_HC_dictionary() : - * This is an experimental API that allows for the efficient use of a - * static dictionary many times. - * - * Rather than re-loading the dictionary buffer into a working context before - * each compression, or copying a pre-loaded dictionary's LZ4_streamHC_t into a - * working LZ4_streamHC_t, this function introduces a no-copy setup mechanism, - * in which the working stream references the dictionary stream in-place. - * - * Several assumptions are made about the state of the dictionary stream. - * Currently, only streams which have been prepared by LZ4_loadDictHC() should - * be expected to work. - * - * Alternatively, the provided dictionary stream pointer may be NULL, in which - * case any existing dictionary stream is unset. - * - * A dictionary should only be attached to a stream without any history (i.e., - * a stream that has just been reset). - * - * The dictionary will remain attached to the working stream only for the - * current stream session. Calls to LZ4_resetStreamHC(_fast) will remove the - * dictionary context association from the working stream. The dictionary - * stream (and source buffer) must remain in-place / accessible / unchanged - * through the lifetime of the stream session. - */ -LZ4LIB_STATIC_API void LZ4_attach_HC_dictionary( - LZ4_streamHC_t *working_stream, - const LZ4_streamHC_t *dictionary_stream); - -#if defined (__cplusplus) -} -#endif - -#endif /* LZ4_HC_SLO_098092834 */ -#endif /* LZ4_HC_STATIC_LINKING_ONLY */ diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/CompressDecompressTester.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/CompressDecompressTester.java index ec42e4625e..c016ff0378 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/CompressDecompressTester.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/CompressDecompressTester.java @@ -473,8 +473,7 @@ public class CompressDecompressTester boolean isAvailable(TesterPair pair) { Compressor compressor = pair.compressor; - if (compressor.getClass().isAssignableFrom(Lz4Compressor.class) - && (NativeCodeLoader.isNativeCodeLoaded())) + if (compressor.getClass().isAssignableFrom(Lz4Compressor.class)) return true; else if (compressor.getClass().isAssignableFrom(BuiltInZlibDeflater.class) diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/TestCodec.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/TestCodec.java index 462225cebf..1cc5f8b867 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/TestCodec.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/TestCodec.java @@ -140,22 +140,16 @@ public class TestCodec { @Test public void testLz4Codec() throws IOException { - if (NativeCodeLoader.isNativeCodeLoaded()) { - if (Lz4Codec.isNativeCodeLoaded()) { - conf.setBoolean( - CommonConfigurationKeys.IO_COMPRESSION_CODEC_LZ4_USELZ4HC_KEY, - false); - codecTest(conf, seed, 0, "org.apache.hadoop.io.compress.Lz4Codec"); - codecTest(conf, seed, count, "org.apache.hadoop.io.compress.Lz4Codec"); - conf.setBoolean( - CommonConfigurationKeys.IO_COMPRESSION_CODEC_LZ4_USELZ4HC_KEY, - true); - codecTest(conf, seed, 0, "org.apache.hadoop.io.compress.Lz4Codec"); - codecTest(conf, seed, count, "org.apache.hadoop.io.compress.Lz4Codec"); - } else { - Assert.fail("Native hadoop library available but lz4 not"); - } - } + conf.setBoolean( + CommonConfigurationKeys.IO_COMPRESSION_CODEC_LZ4_USELZ4HC_KEY, + false); + codecTest(conf, seed, 0, "org.apache.hadoop.io.compress.Lz4Codec"); + codecTest(conf, seed, count, "org.apache.hadoop.io.compress.Lz4Codec"); + conf.setBoolean( + CommonConfigurationKeys.IO_COMPRESSION_CODEC_LZ4_USELZ4HC_KEY, + true); + codecTest(conf, seed, 0, "org.apache.hadoop.io.compress.Lz4Codec"); + codecTest(conf, seed, count, "org.apache.hadoop.io.compress.Lz4Codec"); } @Test diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/lz4/TestLz4CompressorDecompressor.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/lz4/TestLz4CompressorDecompressor.java index 6f3b076097..8be5ec3d3f 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/lz4/TestLz4CompressorDecompressor.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/lz4/TestLz4CompressorDecompressor.java @@ -27,17 +27,20 @@ import java.io.DataOutputStream; import java.io.IOException; import java.util.Random; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.DataInputBuffer; import org.apache.hadoop.io.DataOutputBuffer; +import org.apache.hadoop.io.SequenceFile; +import org.apache.hadoop.io.Writable; import org.apache.hadoop.io.compress.BlockCompressorStream; import org.apache.hadoop.io.compress.BlockDecompressorStream; import org.apache.hadoop.io.compress.CompressionInputStream; import org.apache.hadoop.io.compress.CompressionOutputStream; -import org.apache.hadoop.io.compress.Lz4Codec; import org.apache.hadoop.io.compress.lz4.Lz4Compressor; import org.apache.hadoop.io.compress.lz4.Lz4Decompressor; import org.apache.hadoop.test.MultithreadedTestUtil; -import org.junit.Before; import org.junit.Test; import static org.junit.Assume.*; @@ -45,12 +48,7 @@ public class TestLz4CompressorDecompressor { private static final Random rnd = new Random(12345l); - @Before - public void before() { - assumeTrue(Lz4Codec.isNativeCodeLoaded()); - } - - //test on NullPointerException in {@code compressor.setInput()} + //test on NullPointerException in {@code compressor.setInput()} @Test public void testCompressorSetInputNullPointerException() { try { @@ -330,4 +328,36 @@ public class TestLz4CompressorDecompressor { ctx.waitFor(60000); } + + @Test + public void testLz4Compatibility() throws Exception { + // The sequence file was created using native Lz4 codec before HADOOP-17292. + // After we use lz4-java for lz4 compression, this test makes sure we can + // decompress the sequence file correctly. + Path filePath = new Path(TestLz4CompressorDecompressor.class + .getResource("/lz4/sequencefile").toURI()); + + Configuration conf = new Configuration(); + conf.setInt("io.seqfile.compress.blocksize", 1000); + FileSystem fs = FileSystem.get(conf); + + int lines = 2000; + + SequenceFile.Reader reader = new SequenceFile.Reader(fs, filePath, conf); + + Writable key = (Writable)reader.getKeyClass().newInstance(); + Writable value = (Writable)reader.getValueClass().newInstance(); + + int lc = 0; + try { + while (reader.next(key, value)) { + assertEquals("key" + lc, key.toString()); + assertEquals("value" + lc, value.toString()); + lc++; + } + } finally { + reader.close(); + } + assertEquals(lines, lc); + } } diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestNativeCodeLoader.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestNativeCodeLoader.java index d3da6c1910..98b75bba47 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestNativeCodeLoader.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestNativeCodeLoader.java @@ -21,7 +21,6 @@ import org.junit.Test; import static org.junit.Assert.*; import org.apache.hadoop.crypto.OpensslCipher; -import org.apache.hadoop.io.compress.Lz4Codec; import org.apache.hadoop.io.compress.zlib.ZlibFactory; import org.apache.hadoop.util.NativeCodeLoader; import org.slf4j.Logger; @@ -54,7 +53,6 @@ public class TestNativeCodeLoader { if (NativeCodeLoader.buildSupportsOpenssl()) { assertFalse(OpensslCipher.getLibraryName().isEmpty()); } - assertFalse(Lz4Codec.getLibraryName().isEmpty()); LOG.info("TestNativeCodeLoader: libhadoop.so is loaded."); } } diff --git a/hadoop-common-project/hadoop-common/src/test/resources/lz4/.sequencefile.crc b/hadoop-common-project/hadoop-common/src/test/resources/lz4/.sequencefile.crc new file mode 100644 index 0000000000000000000000000000000000000000..b36bc54a7c59979f5d7bec757f692b855b4ea9f1 GIT binary patch literal 164 zcmV;V09*fKa$^7h00ICItTy}!wGpG>6v4Q|(OJ>Yxj76=rzgdnoL565MQ>b7SxrxF zRZ>2dmi0I5E{TP9!x)4|nH0Q=3}i*hh&?%1D=z0VbRoc-m{>2KsO=#~+;V=VTMVX_ zY=j0U)^Pwq z4hRwnBo6BlAaNZjAT9_-;sO#v^CwuG_!AHyP)I})ffT;4x@%lhQ`Pl)y-pbEb@z0= zL_2Rj{e7=q)%bJIerDzJ(cLc}+_-b&=Iz6S+c$0<9o;$j%F)5+55N75_{d;zE%;yE zJo@^byN55nc<||$s!trZtn1re2>;=6H$9%Yw zZRM~6R>CTnfYmSwQ!ClvNenZsgdfK$_;F0Yk7G6bI3}OWz9)0~@9E`PPlEee-a*nr-K>0#?E*n1IzV2~(@t6}|dgb~PWaW#@BP0V`n@ zOu%ZGgsHXcvFjLSS_wanRq*4OfFH+d_;E~L&&D!eH9?X^2-#;o2wb0mZ4$1lI|%jT z*H`IvJopeo{9D-e4fBOW@Y~cY*%jq*N<6BFY1~iPkF{Pg z*r=4|Pb<}fsDaDXopp-ZI=zku9|4>aQM>d?8}|rj+j=d#svIB*JQXpG`w7dnUi))a z<-_56b|Hrquo70m1gwThm|D-4z5o}d74YL&3BQO9*>1(8Lf~8ESPegpN%(P0!H;7aehJ&c__!Dj>BASp{6UOB+citu zR!iIZPi%zsdbU?F84&o+I99`tV-kKGQ}E-MhF`+YV|H*mMTfmm?HZktV zA6o=p-olrX9gfV9Pef)G?pN^T7zcRraeyDyrTOf273OQR=i`U}~UbR<}7^GxC&j>GHYdD%hJ_${s zanhA5<&b~aCFh6Dv!kLW(8@xFLHlqE+64<*?d2>3mkbCSf#!=2M_kA!Aucpd0&}$- z@(+9D+$X*67Wpp+d!_e4Op3aJ4k7CS-Mpri1!`XdS{VS%Es_a3(j$(}kk1LU#?_sL zYajU7d&Yw>Ql^3|^)QT@uW1OF2n-T-=2k?3?DP~Sbg)GUK=HZd1NGQ(@2_79YopoC85Wz?Q2pR=3$roLP`_7 zXW>qr0slwK{Mul*PCW)FB}Y|GsMS>wM$OkW1PrJpQYm4`uI_8n8s=e-{6bn2yk|jA zoqIe8lPYhZL%@1KhenZsZrwmvSCKxs(Kz7{Ntza`^+UQ6y60)j!sOPDCvxtrvBpig zk{*Ql!&nY0qD>A(m^Ku+CWD@BGbkbjl44m@I4F{^#R~qAxPhgHM1xnYwF8}nrLos zgBAb@uhZ5SMf$=#?2_Lc;4bX`wS_xz9+hTwK$Vfzfm!NoR|gO-bU8vzG=Gjjpq;kJ zC=wawVUPUgPr$OqTJ`UTQfla)O{1yv z2_A!yHp>J`GnWYvBq&-}=Cn&x1X@=o!B>>n`wpdF zDeWy2m=>C*7Mj1ufm&*sBxrWnrK{5xnT6#jn^FC97whhr&v*NNB+Lqd7LgSKzcNFl zz?hRcP5TUy!d-86KN7j+(bZ||%UpY`E9G_p?~X-apNMv80I>>z_7rWMc?x!p z*3&Ot#vTQ`b!HvYfIq?pW*VS{T^{h;XKq`ACZVKdq&X>VvEwJunArUGRjpT1v14O{ zvst2R_fC$@5)m7tmt+24xq1*ItfybPynz^P(Cc`BeFtY0Vzk^&0T88jeZX&;`SSu2 zGZG?SOk(d(ATqH@Ew+EGzchH3qO_4+%wYwrgjFyBt6>tRHnQbgaEaR#&75Yx#xM`o zW~Q!1h_GP;Z0$5~-TN0}HpR`qzTC!`cDNMGFO9Q}ihMZRsK|%2jf#9Y+o;F~TdAy# zW!Zc{X|iDx2k^7g0Cex);Ae<9{Nv}dDC&)DzhHhbCB7JrRq*4OfFH+d_;F0aueGTx z?MIZslotl&zbzl~hc%QHf7{_J}{$Zb-yJTZ-tblLA zriE@lEW#4XV_|w`GSkqLmx&FXhNcQKMD(QO;}Ob+m5<;#u<>uKgn2x2WfpsUW**3l zMFx?8ym!dHL8t;kgiK00IFdm=9LXRbM>aaIeU$Njs)Pka1=n%Hpm;Kqt9FO40YZ1= zB?86DMnST{JRsSfu56f%$UQRSxXKMOMbk_7kqO8Qg3^=(h7BE%7h5|JGms1;8wJS* z^MJgE|FWanm>Ww-#!ON;36gw3`LxPgdJvWq%S=&q?c)^=O+-&jMov&|1m(h3BLUe6 z?ji0-l4ISJ#D#XAqEIzsWd@M|GJ&enAS4`w2$`6SybH3ikQGTDDn+R9tsO0K3twM5tOlgcr=St`w+E(>t|%ii z50h%x6u$Uf0W%)^v~_mrVs+m%$3ynP1!1FM<+j+gW*Wd-5HVleuc6CfQ&f_Q|i0 zY$OvTB~XX%X=!U=goRI!Co^>!s)ea-=l@`vWX8&cJ^S(ydmobt|9(I&W{J@qVrk)&a>aN9T|iK zaCFqa=V=~}kZfd25DrP|(RtXX(MPEnu6`_BTa=yiw6cXTg2G4VjTW81H|WU9Eri{k zou8>lW2}bw+7Ns1>O$8wLo-YdkDDPC@SUh(Qvj=BumHaIcFW9bmKnqX%-H(Z?b`W^ z#-uV3fJ1Xzg`zPXc`h?Q6yFE#JB<4$R@APw%A zBM%E%&>gev2F_Dq3tz;74~kd#DnW&BsBIT2e7mH<9XoBA(3A+72keuYM|zw$g|E)J zg)f4F!Z*~m3l+YBNy%JwVaFL&hSiIZM7Z~iTxd##%)>tUxpYr+TlgX@e0n^Y(fEzp z+&@5Mx-_9}GbA%sF6`NrhOTQ@8oI6naxqJU?pQ1dI`_xen@w#zErYG!#ljbnfXw)v z-_W)hk{K%(_UuYS*R?ARUDpA*n598?jFA$j*mm8t@eqp;C{Ivt{0W%ZPqozUr}_bI z`~~*irh3sbQTQ5g=wB2fP*U=ds4m7@HD8`KKLBk0HmnLRwR@a?gvx3cf7{Epp1@z% ZU4xoF8|`1AFE8~RCnmyEt!zNG{vY#8DpUXf literal 0 HcmV?d00001 diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-nativetask/pom.xml b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-nativetask/pom.xml index f42110cfb2..6fd5787c23 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-nativetask/pom.xml +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-nativetask/pom.xml @@ -71,6 +71,11 @@ assertj-core test + + org.lz4 + lz4-java + test + @@ -156,9 +161,9 @@ - - diff --git a/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/compress/lz4/lz4.c b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-nativetask/src/main/native/lz4/lz4.c similarity index 100% rename from hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/compress/lz4/lz4.c rename to hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-nativetask/src/main/native/lz4/lz4.c diff --git a/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/compress/lz4/lz4.h b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-nativetask/src/main/native/lz4/lz4.h similarity index 100% rename from hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/compress/lz4/lz4.h rename to hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-nativetask/src/main/native/lz4/lz4.h diff --git a/hadoop-project/pom.xml b/hadoop-project/pom.xml index b44444eba7..3132281617 100644 --- a/hadoop-project/pom.xml +++ b/hadoop-project/pom.xml @@ -144,6 +144,7 @@ 3.10.6.Final 4.1.50.Final 1.1.8.1 + 1.7.1 0.5.1 @@ -1792,6 +1793,11 @@ snappy-java ${snappy-java.version} + + org.lz4 + lz4-java + ${lz4-java.version} +