diff --git a/BUILDING.txt b/BUILDING.txt index efd93d1b51..c7be7a32c9 100644 --- a/BUILDING.txt +++ b/BUILDING.txt @@ -74,6 +74,8 @@ Optional packages: * Snappy compression $ sudo apt-get install snappy libsnappy-dev +* Intel ISA-L library for erasure coding + Please refer to https://01.org/intel%C2%AE-storage-acceleration-library-open-source-version * Bzip2 $ sudo apt-get install bzip2 libbz2-dev * Jansson (C Library for JSON) @@ -179,6 +181,29 @@ Maven build goals: * -Dtest.exclude= * -Dtest.exclude.pattern=**/.java,**/.java + Intel ISA-L build options: + + Intel ISA-L is a erasure coding library that can be utilized by the native code. + It is currently an optional component, meaning that Hadoop can be built with + or without this dependency. Note the library is used via dynamic module. Please + reference the official site for the library details. + https://01.org/intel%C2%AE-storage-acceleration-library-open-source-version + + * Use -Drequire.isal to fail the build if libisal.so is not found. + If this option is not specified and the isal library is missing, + we silently build a version of libhadoop.so that cannot make use of ISA-L and + the native raw erasure coders. + This option is recommended if you plan on making use of native raw erasure + coders and want to get more repeatable builds. + * Use -Disal.prefix to specify a nonstandard location for the libisal + library files. You do not need this option if you have installed ISA-L to the + system library path. + * Use -Disal.lib to specify a nonstandard location for the libisal library + files. + * Use -Dbundle.isal to copy the contents of the isal.lib directory into + the final tar file. This option requires that -Disal.lib is also given, + and it ignores the -Disal.prefix option. + ---------------------------------------------------------------------------------- Building components separately diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt index dd70947408..85df389575 100644 --- a/hadoop-common-project/hadoop-common/CHANGES.txt +++ b/hadoop-common-project/hadoop-common/CHANGES.txt @@ -948,6 +948,9 @@ Release 2.8.0 - UNRELEASED HADOOP-12040. Adjust inputs order for the decode API in raw erasure coder. (Kai Zheng via yliu) + HADOOP-11887. Introduce Intel ISA-L erasure coding library for native + erasure encoding support (Kai Zheng via Colin P. McCabe) + OPTIMIZATIONS HADOOP-11785. Reduce the number of listStatus operation in distcp diff --git a/hadoop-common-project/hadoop-common/pom.xml b/hadoop-common-project/hadoop-common/pom.xml index 4735c6bc9d..502bbbfdcb 100644 --- a/hadoop-common-project/hadoop-common/pom.xml +++ b/hadoop-common-project/hadoop-common/pom.xml @@ -567,6 +567,9 @@ + false + + false true false @@ -620,6 +623,7 @@ org.apache.hadoop.io.compress.snappy.SnappyDecompressor org.apache.hadoop.io.compress.lz4.Lz4Compressor org.apache.hadoop.io.compress.lz4.Lz4Decompressor + org.apache.hadoop.io.erasurecode.ErasureCodeNative org.apache.hadoop.crypto.OpensslCipher org.apache.hadoop.crypto.random.OpensslSecureRandom org.apache.hadoop.util.NativeCrc32 @@ -642,7 +646,7 @@ - + @@ -664,7 +668,13 @@ - + + + + + + + @@ -684,6 +694,9 @@ + false + + false true @@ -737,6 +750,7 @@ org.apache.hadoop.io.compress.snappy.SnappyDecompressor org.apache.hadoop.io.compress.lz4.Lz4Compressor org.apache.hadoop.io.compress.lz4.Lz4Decompressor + org.apache.hadoop.io.erasurecode.ErasureCodeNative org.apache.hadoop.crypto.OpensslCipher org.apache.hadoop.crypto.random.OpensslSecureRandom org.apache.hadoop.util.NativeCrc32 @@ -790,6 +804,9 @@ /p:CustomOpensslLib=${openssl.lib} /p:CustomOpensslInclude=${openssl.include} /p:RequireOpenssl=${require.openssl} + /p:RequireIsal=${require.isal} + /p:CustomIsalPrefix=${isal.prefix} + /p:CustomIsalLib=${isal.lib} diff --git a/hadoop-common-project/hadoop-common/src/CMakeLists.txt b/hadoop-common-project/hadoop-common/src/CMakeLists.txt index c93bfe7854..63bb773fb2 100644 --- a/hadoop-common-project/hadoop-common/src/CMakeLists.txt +++ b/hadoop-common-project/hadoop-common/src/CMakeLists.txt @@ -94,6 +94,29 @@ else() endif() endif() +set(STORED_CMAKE_FIND_LIBRARY_SUFFIXES ${CMAKE_FIND_LIBRARY_SUFFIXES}) +hadoop_set_find_shared_library_version("2") +find_library(ISAL_LIBRARY + NAMES isal + PATHS ${CUSTOM_ISAL_PREFIX} ${CUSTOM_ISAL_PREFIX}/lib + ${CUSTOM_ISAL_PREFIX}/lib64 ${CUSTOM_ISAL_LIB}) +set(CMAKE_FIND_LIBRARY_SUFFIXES ${STORED_CMAKE_FIND_LIBRARY_SUFFIXES}) +if (ISAL_LIBRARY) + GET_FILENAME_COMPONENT(HADOOP_ISAL_LIBRARY ${ISAL_LIBRARY} NAME) + set(ISAL_INCLUDE_DIR ${SRC}/io/erasurecode/include) + set(ISAL_SOURCE_FILES + ${SRC}/io/erasurecode/erasure_code.c) + add_executable(erasure_code_test + ${SRC}/io/erasurecode/erasure_code.c + ${TST}/io/erasurecode/erasure_code_test.c + ) + target_link_libraries(erasure_code_test ${CMAKE_DL_LIBS}) +else (ISAL_LIBRARY) + IF(REQUIRE_ISAL) + MESSAGE(FATAL_ERROR "Required ISA-L library could not be found. ISAL_LIBRARY=${ISAL_LIBRARY}, CUSTOM_ISAL_PREFIX=${CUSTOM_ISAL_PREFIX}") + ENDIF(REQUIRE_ISAL) +endif (ISAL_LIBRARY) + # Build hardware CRC32 acceleration, if supported on the platform. if(CMAKE_SYSTEM_PROCESSOR MATCHES "^i.86$" OR CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" OR CMAKE_SYSTEM_PROCESSOR STREQUAL "amd64") set(BULK_CRC_ARCH_SOURCE_FIlE "${SRC}/util/bulk_crc32_x86.c") @@ -169,6 +192,7 @@ include_directories( ${ZLIB_INCLUDE_DIRS} ${BZIP2_INCLUDE_DIR} ${SNAPPY_INCLUDE_DIR} + ${ISAL_INCLUDE_DIR} ${OPENSSL_INCLUDE_DIR} ${SRC}/util ) @@ -181,6 +205,7 @@ hadoop_add_dual_library(hadoop ${SRC}/io/compress/lz4/Lz4Decompressor.c ${SRC}/io/compress/lz4/lz4.c ${SRC}/io/compress/lz4/lz4hc.c + ${ISAL_SOURCE_FILES} ${SNAPPY_SOURCE_FILES} ${OPENSSL_SOURCE_FILES} ${SRC}/io/compress/zlib/ZlibCompressor.c diff --git a/hadoop-common-project/hadoop-common/src/config.h.cmake b/hadoop-common-project/hadoop-common/src/config.h.cmake index d71271dd3e..445cc33e4d 100644 --- a/hadoop-common-project/hadoop-common/src/config.h.cmake +++ b/hadoop-common-project/hadoop-common/src/config.h.cmake @@ -22,6 +22,7 @@ #cmakedefine HADOOP_BZIP2_LIBRARY "@HADOOP_BZIP2_LIBRARY@" #cmakedefine HADOOP_SNAPPY_LIBRARY "@HADOOP_SNAPPY_LIBRARY@" #cmakedefine HADOOP_OPENSSL_LIBRARY "@HADOOP_OPENSSL_LIBRARY@" +#cmakedefine HADOOP_ISAL_LIBRARY "@HADOOP_ISAL_LIBRARY@" #cmakedefine HAVE_SYNC_FILE_RANGE #cmakedefine HAVE_POSIX_FADVISE diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/ErasureCodeNative.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/ErasureCodeNative.java new file mode 100644 index 0000000000..1c98f3c227 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/ErasureCodeNative.java @@ -0,0 +1,86 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.io.erasurecode; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.util.NativeCodeLoader; + +/** + * Erasure code native libraries (for now, Intel ISA-L) related utilities. + */ +public final class ErasureCodeNative { + + private static final Log LOG = + LogFactory.getLog(ErasureCodeNative.class.getName()); + + /** + * The reason why ISA-L library is not available, or null if it is available. + */ + private static final String LOADING_FAILURE_REASON; + + static { + if (!NativeCodeLoader.isNativeCodeLoaded()) { + LOADING_FAILURE_REASON = "hadoop native library cannot be loaded."; + } else if (!NativeCodeLoader.buildSupportsIsal()) { + LOADING_FAILURE_REASON = "libhadoop was built without ISA-L support"; + } else { + String problem = null; + try { + loadLibrary(); + } catch (Throwable t) { + problem = "Loading ISA-L failed: " + t.getMessage(); + LOG.error("Loading ISA-L failed", t); + } + LOADING_FAILURE_REASON = problem; + } + } + + private ErasureCodeNative() {} + + /** + * Are native libraries loaded? + */ + public static boolean isNativeCodeLoaded() { + return LOADING_FAILURE_REASON == null; + } + + /** + * Is the native ISA-L library loaded and initialized? Throw exception if not. + */ + public static void checkNativeCodeLoaded() { + if (LOADING_FAILURE_REASON != null) { + throw new RuntimeException(LOADING_FAILURE_REASON); + } + } + + /** + * Load native library available or supported. + */ + public static native void loadLibrary(); + + /** + * Get the native library name that's available or supported. + */ + public static native String getLibraryName(); + + public static String getLoadingFailureReason() { + return LOADING_FAILURE_REASON; + } +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/NativeCodeLoader.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/NativeCodeLoader.java index 79d4c0c327..dd04a19f48 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/NativeCodeLoader.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/NativeCodeLoader.java @@ -31,7 +31,7 @@ */ @InterfaceAudience.Private @InterfaceStability.Unstable -public class NativeCodeLoader { +public final class NativeCodeLoader { private static final Log LOG = LogFactory.getLog(NativeCodeLoader.class); @@ -62,6 +62,8 @@ public class NativeCodeLoader { } } + private NativeCodeLoader() {} + /** * Check if native-hadoop code is loaded for this platform. * @@ -76,7 +78,12 @@ public static boolean isNativeCodeLoaded() { * Returns true only if this build was compiled with support for snappy. */ public static native boolean buildSupportsSnappy(); - + + /** + * Returns true only if this build was compiled with support for ISA-L. + */ + public static native boolean buildSupportsIsal(); + /** * Returns true only if this build was compiled with support for openssl. */ diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/NativeLibraryChecker.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/NativeLibraryChecker.java index d8c68992a5..c31f85dda7 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/NativeLibraryChecker.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/NativeLibraryChecker.java @@ -18,6 +18,7 @@ package org.apache.hadoop.util; +import org.apache.hadoop.io.erasurecode.ErasureCodeNative; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.crypto.OpensslCipher; import org.apache.hadoop.io.compress.Lz4Codec; @@ -65,6 +66,7 @@ public static void main(String[] args) { boolean nativeHadoopLoaded = NativeCodeLoader.isNativeCodeLoaded(); boolean zlibLoaded = false; boolean snappyLoaded = false; + boolean isalLoaded = false; // lz4 is linked within libhadoop boolean lz4Loaded = nativeHadoopLoaded; boolean bzip2Loaded = Bzip2Factory.isNativeBzip2Loaded(conf); @@ -75,6 +77,7 @@ public static void main(String[] args) { String hadoopLibraryName = ""; String zlibLibraryName = ""; String snappyLibraryName = ""; + String isalDetail = ""; String lz4LibraryName = ""; String bzip2LibraryName = ""; String winutilsPath = null; @@ -85,18 +88,29 @@ public static void main(String[] args) { if (zlibLoaded) { zlibLibraryName = ZlibFactory.getLibraryName(); } + snappyLoaded = NativeCodeLoader.buildSupportsSnappy() && SnappyCodec.isNativeCodeLoaded(); if (snappyLoaded && NativeCodeLoader.buildSupportsSnappy()) { snappyLibraryName = SnappyCodec.getLibraryName(); } - if (OpensslCipher.getLoadingFailureReason() != null) { - openSslDetail = OpensslCipher.getLoadingFailureReason(); + + isalDetail = ErasureCodeNative.getLoadingFailureReason(); + if (isalDetail != null) { + isalLoaded = false; + } else { + isalDetail = ErasureCodeNative.getLibraryName(); + isalLoaded = true; + } + + openSslDetail = OpensslCipher.getLoadingFailureReason(); + if (openSslDetail != null) { openSslLoaded = false; } else { openSslDetail = OpensslCipher.getLibraryName(); openSslLoaded = true; } + if (lz4Loaded) { lz4LibraryName = Lz4Codec.getLibraryName(); } @@ -125,6 +139,8 @@ public static void main(String[] args) { System.out.printf("lz4: %b %s%n", lz4Loaded, lz4LibraryName); System.out.printf("bzip2: %b %s%n", bzip2Loaded, bzip2LibraryName); System.out.printf("openssl: %b %s%n", openSslLoaded, openSslDetail); + System.out.printf("ISA-L: %b %s%n", isalLoaded, isalDetail); + if (Shell.WINDOWS) { System.out.printf("winutils: %b %s%n", winutilsExists, winutilsPath); } diff --git a/hadoop-common-project/hadoop-common/src/main/native/native.vcxproj b/hadoop-common-project/hadoop-common/src/main/native/native.vcxproj index 0912c6ab29..17149f7bfb 100644 --- a/hadoop-common-project/hadoop-common/src/main/native/native.vcxproj +++ b/hadoop-common-project/hadoop-common/src/main/native/native.vcxproj @@ -17,7 +17,7 @@ limitations under the License. --> - + Release @@ -79,11 +79,22 @@ $(SnappyInclude);$(IncludePath) $(ZLIB_HOME);$(IncludePath) + + $(CustomIsalPrefix) + $(CustomIsalPrefix)\lib + $(CustomIsalLib) + true + + + + Level3 @@ -92,7 +103,7 @@ true true WIN32;NDEBUG;_WINDOWS;_USRDLL;NATIVE_EXPORTS;%(PreprocessorDefinitions) - ..\winutils\include;..\..\..\target\native\javah;%JAVA_HOME%\include;%JAVA_HOME%\include\win32;.\src;%(AdditionalIncludeDirectories) + ..\winutils\include;..\native\src\org\apache\hadoop\io\erasurecode\include;..\..\..\target\native\javah;%JAVA_HOME%\include;%JAVA_HOME%\include\win32;.\src;%(AdditionalIncludeDirectories) CompileAsC 4244 @@ -113,7 +124,7 @@ true true WIN32;NDEBUG;_WINDOWS;_USRDLL;NATIVE_EXPORTS;%(PreprocessorDefinitions) - ..\winutils\include;..\..\..\target\native\javah;%JAVA_HOME%\include;%JAVA_HOME%\include\win32;.\src;%(AdditionalIncludeDirectories) + ..\winutils\include;..\native\src\org\apache\hadoop\io\erasurecode\include;..\..\..\target\native\javah;%JAVA_HOME%\include;%JAVA_HOME%\include\win32;.\src;%(AdditionalIncludeDirectories) CompileAsC 4244 @@ -145,11 +156,16 @@ /D HADOOP_SNAPPY_LIBRARY=L\"snappy.dll\" + /D HADOOP_ISAL_LIBRARY=\"isa-l.dll\" src\org\apache\hadoop\io\nativeio;%(AdditionalIncludeDirectories) + + /D HADOOP_ISAL_LIBRARY=\"isa-l.dll\" + + diff --git a/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/erasurecode/coder/erasure_code_native.c b/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/erasurecode/coder/erasure_code_native.c new file mode 100644 index 0000000000..e84df9a731 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/erasurecode/coder/erasure_code_native.c @@ -0,0 +1,49 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +#include "org_apache_hadoop.h" +#include "../include/erasure_code.h" +#include "org_apache_hadoop_io_erasurecode_ErasureCodeNative.h" + +#ifdef UNIX +#include "config.h" +#endif + +JNIEXPORT void JNICALL +Java_org_apache_hadoop_io_erasurecode_ErasureCodeNative_loadLibrary +(JNIEnv *env, jclass myclass) { + char errMsg[1024]; + load_erasurecode_lib(errMsg, sizeof(errMsg)); + if (strlen(errMsg) > 0) { + THROW(env, "java/lang/UnsatisfiedLinkError", errMsg); + } +} + +JNIEXPORT jstring JNICALL +Java_org_apache_hadoop_io_erasurecode_ErasureCodeNative_getLibraryName +(JNIEnv *env, jclass myclass) { + char* libName = get_library_name(); + if (libName == NULL) { + libName = "Unavailable"; + } + return (*env)->NewStringUTF(env, libName); +} diff --git a/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/erasurecode/coder/org_apache_hadoop_io_erasurecode_ErasureCodeNative.h b/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/erasurecode/coder/org_apache_hadoop_io_erasurecode_ErasureCodeNative.h new file mode 100644 index 0000000000..d8ff3a0334 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/erasurecode/coder/org_apache_hadoop_io_erasurecode_ErasureCodeNative.h @@ -0,0 +1,29 @@ +/* DO NOT EDIT THIS FILE - it is machine generated */ +#include +/* Header for class org_apache_hadoop_io_erasurecode_ErasureCodeNative */ + +#ifndef _Included_org_apache_hadoop_io_erasurecode_ErasureCodeNative +#define _Included_org_apache_hadoop_io_erasurecode_ErasureCodeNative +#ifdef __cplusplus +extern "C" { +#endif +/* + * Class: org_apache_hadoop_io_erasurecode_ErasureCodeNative + * Method: loadLibrary + * Signature: ()V + */ +JNIEXPORT void JNICALL Java_org_apache_hadoop_io_erasurecode_ErasureCodeNative_loadLibrary + (JNIEnv *, jclass); + +/* + * Class: org_apache_hadoop_io_erasurecode_ErasureCodeNative + * Method: getLibraryName + * Signature: ()Ljava/lang/String; + */ +JNIEXPORT jstring JNICALL Java_org_apache_hadoop_io_erasurecode_ErasureCodeNative_getLibraryName + (JNIEnv *, jclass); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/erasurecode/erasure_code.c b/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/erasurecode/erasure_code.c new file mode 100644 index 0000000000..a6c099ad72 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/erasurecode/erasure_code.c @@ -0,0 +1,271 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include + +#include "org_apache_hadoop.h" +#include "../include/gf_util.h" +#include "../include/erasure_code.h" + +#ifdef UNIX +#include +#include +#include +#include + +#include "config.h" +#endif + +#ifdef WINDOWS +#include +#endif + +/** + * erasure_code.c + * Implementation erasure code utilities based on lib of erasure_code.so. + * Building of this codes won't rely on any ISA-L source codes, but running + * into this will rely on successfully loading of the dynamic library. + * + */ + +/** + * The loaded library handle. + */ +static void* libec = NULL; + +/** + * A helper function to dlsym a 'symbol' from a given library-handle. + */ + +#ifdef UNIX + +static __attribute__ ((unused)) +void *my_dlsym(void *handle, const char *symbol) { + void *func_ptr = dlsym(handle, symbol); + return func_ptr; +} + +/* A helper macro to dlsym the requisite dynamic symbol in NON-JNI env. */ +#define EC_LOAD_DYNAMIC_SYMBOL(func_ptr, handle, symbol) \ + if ((func_ptr = my_dlsym(handle, symbol)) == NULL) { \ + return "Failed to load symbol" symbol; \ + } + +#endif + +#ifdef WINDOWS + + + +static FARPROC WINAPI my_dlsym(HMODULE handle, LPCSTR symbol) { + FARPROC func_ptr = GetProcAddress(handle, symbol); + return func_ptr; +} + +/* A helper macro to dlsym the requisite dynamic symbol in NON-JNI env. */ +#define EC_LOAD_DYNAMIC_SYMBOL(func_type, func_ptr, handle, symbol) \ + if ((func_ptr = (func_type)my_dlsym(handle, symbol)) == NULL) { \ + return "Failed to load symbol" symbol; \ + } + +#endif + + +#ifdef UNIX +// For gf_util.h +static unsigned char (*d_gf_mul)(unsigned char, unsigned char); +static unsigned char (*d_gf_inv)(unsigned char); +static void (*d_gf_gen_rs_matrix)(unsigned char *, int, int); +static void (*d_gf_gen_cauchy_matrix)(unsigned char *, int, int); +static int (*d_gf_invert_matrix)(unsigned char *, unsigned char *, const int); +static int (*d_gf_vect_mul)(int, unsigned char *, void *, void *); + +// For erasure_code.h +static void (*d_ec_init_tables)(int, int, unsigned char*, unsigned char*); +static void (*d_ec_encode_data)(int, int, int, unsigned char*, + unsigned char**, unsigned char**); +static void (*d_ec_encode_data_update)(int, int, int, int, unsigned char*, + unsigned char*, unsigned char**); +#endif + +#ifdef WINDOWS +// For erasure_code.h +typedef unsigned char (__cdecl *__d_gf_mul)(unsigned char, unsigned char); +static __d_gf_mul d_gf_mul; +typedef unsigned char (__cdecl *__d_gf_inv)(unsigned char); +static __d_gf_inv d_gf_inv; +typedef void (__cdecl *__d_gf_gen_rs_matrix)(unsigned char *, int, int); +static __d_gf_gen_rs_matrix d_gf_gen_rs_matrix; +typedef void (__cdecl *__d_gf_gen_cauchy_matrix)(unsigned char *, int, int); +static __d_gf_gen_cauchy_matrix d_gf_gen_cauchy_matrix; +typedef int (__cdecl *__d_gf_invert_matrix)(unsigned char *, + unsigned char *, const int); +static __d_gf_invert_matrix d_gf_invert_matrix; +typedef int (__cdecl *__d_gf_vect_mul)(int, unsigned char *, void *, void *); +static __d_gf_vect_mul d_gf_vect_mul; + +// For erasure_code.h +typedef void (__cdecl *__d_ec_init_tables)(int, int, + unsigned char*, unsigned char*); +static __d_ec_init_tables d_ec_init_tables; +typedef void (__cdecl *__d_ec_encode_data)(int, int, int, unsigned char*, + unsigned char**, unsigned char**); +static __d_ec_encode_data d_ec_encode_data; +typedef void (__cdecl *__d_ec_encode_data_update)(int, int, int, int, unsigned char*, + unsigned char*, unsigned char**); +static __d_ec_encode_data_update d_ec_encode_data_update; +#endif + +static const char* load_functions(void* libec) { +#ifdef UNIX + EC_LOAD_DYNAMIC_SYMBOL(d_gf_mul, libec, "gf_mul"); + EC_LOAD_DYNAMIC_SYMBOL(d_gf_inv, libec, "gf_inv"); + EC_LOAD_DYNAMIC_SYMBOL(d_gf_gen_rs_matrix, libec, "gf_gen_rs_matrix"); + EC_LOAD_DYNAMIC_SYMBOL(d_gf_gen_cauchy_matrix, libec, "gf_gen_cauchy1_matrix"); + EC_LOAD_DYNAMIC_SYMBOL(d_gf_invert_matrix, libec, "gf_invert_matrix"); + EC_LOAD_DYNAMIC_SYMBOL(d_gf_vect_mul, libec, "gf_vect_mul"); + + EC_LOAD_DYNAMIC_SYMBOL(d_ec_init_tables, libec, "ec_init_tables"); + EC_LOAD_DYNAMIC_SYMBOL(d_ec_encode_data, libec, "ec_encode_data"); + EC_LOAD_DYNAMIC_SYMBOL(d_ec_encode_data_update, libec, "ec_encode_data_update"); +#endif + +#ifdef WINDOWS + EC_LOAD_DYNAMIC_SYMBOL(__d_gf_mul, d_gf_mul, libec, "gf_mul"); + EC_LOAD_DYNAMIC_SYMBOL(__d_gf_inv, d_gf_inv, libec, "gf_inv"); + EC_LOAD_DYNAMIC_SYMBOL(__d_gf_gen_rs_matrix, d_gf_gen_rs_matrix, libec, "gf_gen_rs_matrix"); + EC_LOAD_DYNAMIC_SYMBOL(__d_gf_gen_cauchy_matrix, d_gf_gen_cauchy_matrix, libec, "gf_gen_cauchy1_matrix"); + EC_LOAD_DYNAMIC_SYMBOL(__d_gf_invert_matrix, d_gf_invert_matrix, libec, "gf_invert_matrix"); + EC_LOAD_DYNAMIC_SYMBOL(__d_gf_vect_mul, d_gf_vect_mul, libec, "gf_vect_mul"); + + EC_LOAD_DYNAMIC_SYMBOL(__d_ec_init_tables, d_ec_init_tables, libec, "ec_init_tables"); + EC_LOAD_DYNAMIC_SYMBOL(__d_ec_encode_data, d_ec_encode_data, libec, "ec_encode_data"); + EC_LOAD_DYNAMIC_SYMBOL(__d_ec_encode_data_update, d_ec_encode_data_update, libec, "ec_encode_data_update"); +#endif + + return NULL; +} + +void load_erasurecode_lib(char* err, size_t err_len) { + const char* errMsg; + + err[0] = '\0'; + + if (libec != NULL) { + return; + } + + // Load Intel ISA-L + #ifdef UNIX + libec = dlopen(HADOOP_ISAL_LIBRARY, RTLD_LAZY | RTLD_GLOBAL); + if (libec == NULL) { + snprintf(err, err_len, "Failed to load %s (%s)", + HADOOP_ISAL_LIBRARY, dlerror()); + return; + } + // Clear any existing error + dlerror(); + #endif + + #ifdef WINDOWS + libec = LoadLibrary(HADOOP_ISAL_LIBRARY); + if (libec == NULL) { + snprintf(err, err_len, "Failed to load %s", HADOOP_ISAL_LIBRARY); + return; + } + #endif + + errMsg = load_functions(libec); + if (errMsg != NULL) { + snprintf(err, err_len, "Loading functions from ISA-L failed: %s", errMsg); + } +} + +int build_support_erasurecode() { +#ifdef HADOOP_ISAL_LIBRARY + return 1; +#else + return 0; +#endif +} + +const char* get_library_name() { +#ifdef UNIX + Dl_info dl_info; + + if (d_ec_encode_data == NULL) { + return HADOOP_ISAL_LIBRARY; + } + + if(dladdr(d_ec_encode_data, &dl_info)) { + return dl_info.dli_fname; + } +#else + LPTSTR filename = NULL; + + if (libec == NULL) { + return HADOOP_ISAL_LIBRARY; + } + + if (GetModuleFileName(libec, filename, 256) > 0) { + return filename; + } +#endif + + return NULL; +} + +unsigned char h_gf_mul(unsigned char a, unsigned char b) { + return d_gf_mul(a, b); +} + +unsigned char h_gf_inv(unsigned char a) { + return d_gf_inv(a); +} + +void h_gf_gen_rs_matrix(unsigned char *a, int m, int k) { + d_gf_gen_rs_matrix(a, m, k); +} + +void h_gf_gen_cauchy_matrix(unsigned char *a, int m, int k) { + d_gf_gen_cauchy_matrix(a, m, k); +} + +int h_gf_invert_matrix(unsigned char *in, unsigned char *out, const int n) { + return d_gf_invert_matrix(in, out, n); +} + +int h_gf_vect_mul(int len, unsigned char *gftbl, void *src, void *dest) { + return d_gf_vect_mul(len, gftbl, src, dest); +} + +void h_ec_init_tables(int k, int rows, unsigned char* a, unsigned char* gftbls) { + d_ec_init_tables(k, rows, a, gftbls); +} + +void h_ec_encode_data(int len, int k, int rows, unsigned char *gftbls, + unsigned char **data, unsigned char **coding) { + d_ec_encode_data(len, k, rows, gftbls, data, coding); +} + +void h_ec_encode_data_update(int len, int k, int rows, int vec_i, + unsigned char *gftbls, unsigned char *data, unsigned char **coding) { + d_ec_encode_data_update(len, k, rows, vec_i, gftbls, data, coding); +} \ No newline at end of file diff --git a/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/erasurecode/include/erasure_code.h b/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/erasurecode/include/erasure_code.h new file mode 100644 index 0000000000..123085ecb5 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/erasurecode/include/erasure_code.h @@ -0,0 +1,125 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef _ERASURE_CODE_H_ +#define _ERASURE_CODE_H_ + +#include + +/** + * Interface to functions supporting erasure code encode and decode. + * + * This file defines the interface to optimized functions used in erasure + * codes. Encode and decode of erasures in GF(2^8) are made by calculating the + * dot product of the symbols (bytes in GF(2^8)) across a set of buffers and a + * set of coefficients. Values for the coefficients are determined by the type + * of erasure code. Using a general dot product means that any sequence of + * coefficients may be used including erasure codes based on random + * coefficients. + * Multiple versions of dot product are supplied to calculate 1-6 output + * vectors in one pass. + * Base GF multiply and divide functions can be sped up by defining + * GF_LARGE_TABLES at the expense of memory size. + * + */ + +/** + * Return 0 if not support, 1 otherwise. + */ +int build_support_erasurecode(); + +/** + * Get the library name possibly of full path. + */ +const char* get_library_name(); + +/** + * Initialize and load erasure code library, returning error message if any. + * + * @param err The err message buffer. + * @param err_len The length of the message buffer. + */ +void load_erasurecode_lib(char* err, size_t err_len); + +/** + * Initialize tables for fast Erasure Code encode and decode. + * + * Generates the expanded tables needed for fast encode or decode for erasure + * codes on blocks of data. 32bytes is generated for each input coefficient. + * + * @param k The number of vector sources or rows in the generator matrix + * for coding. + * @param rows The number of output vectors to concurrently encode/decode. + * @param a Pointer to sets of arrays of input coefficients used to encode + * or decode data. + * @param gftbls Pointer to start of space for concatenated output tables + * generated from input coefficients. Must be of size 32*k*rows. + * @returns none + */ +void h_ec_init_tables(int k, int rows, unsigned char* a, unsigned char* gftbls); + +/** + * Generate or decode erasure codes on blocks of data, runs appropriate version. + * + * Given a list of source data blocks, generate one or multiple blocks of + * encoded data as specified by a matrix of GF(2^8) coefficients. When given a + * suitable set of coefficients, this function will perform the fast generation + * or decoding of Reed-Solomon type erasure codes. + * + * This function determines what instruction sets are enabled and + * selects the appropriate version at runtime. + * + * @param len Length of each block of data (vector) of source or dest data. + * @param k The number of vector sources or rows in the generator matrix + * for coding. + * @param rows The number of output vectors to concurrently encode/decode. + * @param gftbls Pointer to array of input tables generated from coding + * coefficients in ec_init_tables(). Must be of size 32*k*rows + * @param data Array of pointers to source input buffers. + * @param coding Array of pointers to coded output buffers. + * @returns none + */ +void h_ec_encode_data(int len, int k, int rows, unsigned char *gftbls, + unsigned char **data, unsigned char **coding); + +/** + * @brief Generate update for encode or decode of erasure codes from single + * source, runs appropriate version. + * + * Given one source data block, update one or multiple blocks of encoded data as + * specified by a matrix of GF(2^8) coefficients. When given a suitable set of + * coefficients, this function will perform the fast generation or decoding of + * Reed-Solomon type erasure codes from one input source at a time. + * + * This function determines what instruction sets are enabled and selects the + * appropriate version at runtime. + * + * @param len Length of each block of data (vector) of source or dest data. + * @param k The number of vector sources or rows in the generator matrix + * for coding. + * @param rows The number of output vectors to concurrently encode/decode. + * @param vec_i The vector index corresponding to the single input source. + * @param gftbls Pointer to array of input tables generated from coding + * coefficients in ec_init_tables(). Must be of size 32*k*rows + * @param data Pointer to single input source used to update output parity. + * @param coding Array of pointers to coded output buffers. + * @returns none + */ +void h_ec_encode_data_update(int len, int k, int rows, int vec_i, + unsigned char *gftbls, unsigned char *data, unsigned char **coding); + +#endif //_ERASURE_CODE_H_ diff --git a/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/erasurecode/include/gf_util.h b/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/erasurecode/include/gf_util.h new file mode 100644 index 0000000000..2be8328458 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/erasurecode/include/gf_util.h @@ -0,0 +1,111 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef _GF_UTIL_H +#define _GF_UTIL_H + +/** + * gf_util.h + * Interface to functions for vector (block) multiplication in GF(2^8). + * + * This file defines the interface to routines used in fast RAID rebuild and + * erasure codes. + */ + + +/** + * Single element GF(2^8) multiply. + * + * @param a Multiplicand a + * @param b Multiplicand b + * @returns Product of a and b in GF(2^8) + */ +unsigned char h_gf_mul(unsigned char a, unsigned char b); + +/** + * Single element GF(2^8) inverse. + * + * @param a Input element + * @returns Field element b such that a x b = {1} + */ +unsigned char h_gf_inv(unsigned char a); + +/** + * Generate a matrix of coefficients to be used for encoding. + * + * Vandermonde matrix example of encoding coefficients where high portion of + * matrix is identity matrix I and lower portion is constructed as 2^{i*(j-k+1)} + * i:{0,k-1} j:{k,m-1}. Commonly used method for choosing coefficients in + * erasure encoding but does not guarantee invertable for every sub matrix. For + * large k it is possible to find cases where the decode matrix chosen from + * sources and parity not in erasure are not invertable. Users may want to + * adjust for k > 5. + * + * @param a [mxk] array to hold coefficients + * @param m number of rows in matrix corresponding to srcs + parity. + * @param k number of columns in matrix corresponding to srcs. + * @returns none + */ +void h_gf_gen_rs_matrix(unsigned char *a, int m, int k); + +/** + * Generate a Cauchy matrix of coefficients to be used for encoding. + * + * Cauchy matrix example of encoding coefficients where high portion of matrix + * is identity matrix I and lower portion is constructed as 1/(i + j) | i != j, + * i:{0,k-1} j:{k,m-1}. Any sub-matrix of a Cauchy matrix should be invertable. + * + * @param a [mxk] array to hold coefficients + * @param m number of rows in matrix corresponding to srcs + parity. + * @param k number of columns in matrix corresponding to srcs. + * @returns none + */ +void h_gf_gen_cauchy_matrix(unsigned char *a, int m, int k); + +/** + * Invert a matrix in GF(2^8) + * + * @param in input matrix + * @param out output matrix such that [in] x [out] = [I] - identity matrix + * @param n size of matrix [nxn] + * @returns 0 successful, other fail on singular input matrix + */ +int h_gf_invert_matrix(unsigned char *in, unsigned char *out, const int n); + +/** + * GF(2^8) vector multiply by constant, runs appropriate version. + * + * Does a GF(2^8) vector multiply b = Ca where a and b are arrays and C + * is a single field element in GF(2^8). Can be used for RAID6 rebuild + * and partial write functions. Function requires pre-calculation of a + * 32-element constant array based on constant C. gftbl(C) = {C{00}, + * C{01}, C{02}, ... , C{0f} }, {C{00}, C{10}, C{20}, ... , C{f0} }. + * Len and src must be aligned to 32B. + * + * This function determines what instruction sets are enabled + * and selects the appropriate version at runtime. + * + * @param len Length of vector in bytes. Must be aligned to 32B. + * @param gftbl Pointer to 32-byte array of pre-calculated constants based on C. + * @param src Pointer to src data array. Must be aligned to 32B. + * @param dest Pointer to destination data array. Must be aligned to 32B. + * @returns 0 pass, other fail + */ +int h_gf_vect_mul(int len, unsigned char *gftbl, void *src, void *dest); + + +#endif //_GF_UTIL_H diff --git a/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/util/NativeCodeLoader.c b/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/util/NativeCodeLoader.c index 3625112311..ae8263aac6 100644 --- a/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/util/NativeCodeLoader.c +++ b/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/util/NativeCodeLoader.c @@ -49,6 +49,16 @@ JNIEXPORT jboolean JNICALL Java_org_apache_hadoop_util_NativeCodeLoader_buildSup #endif } +JNIEXPORT jboolean JNICALL Java_org_apache_hadoop_util_NativeCodeLoader_buildSupportsIsal + (JNIEnv *env, jclass clazz) +{ +#ifdef HADOOP_ISAL_LIBRARY + return JNI_TRUE; +#else + return JNI_FALSE; +#endif +} + JNIEXPORT jstring JNICALL Java_org_apache_hadoop_util_NativeCodeLoader_getLibraryName (JNIEnv *env, jclass clazz) { diff --git a/hadoop-common-project/hadoop-common/src/main/native/src/test/org/apache/hadoop/io/erasurecode/erasure_code_test.c b/hadoop-common-project/hadoop-common/src/main/native/src/test/org/apache/hadoop/io/erasurecode/erasure_code_test.c new file mode 100644 index 0000000000..9817a7685b --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/native/src/test/org/apache/hadoop/io/erasurecode/erasure_code_test.c @@ -0,0 +1,310 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * This is a lightweight version of the same file in Intel ISA-L library to test + * and verify the basic functions of ISA-L integration. Note it's not serving as + * a complete ISA-L library test nor as any sample to write an erasure coder + * using the library. A sample is to be written and provided separately. + */ + +#include "org_apache_hadoop.h" +#include "erasure_code.h" +#include "gf_util.h" + +#include +#include +#include + +#define TEST_LEN 8192 +#define TEST_SOURCES 127 +#define MMAX TEST_SOURCES +#define KMAX TEST_SOURCES +#define TEST_SEED 11 + +static void dump(unsigned char *buf, int len) +{ + int i; + for (i = 0; i < len;) { + printf(" %2x", 0xff & buf[i++]); + if (i % 32 == 0) + printf("\n"); + } + printf("\n"); +} + +static void dump_matrix(unsigned char **s, int k, int m) +{ + int i, j; + for (i = 0; i < k; i++) { + for (j = 0; j < m; j++) { + printf(" %2x", s[i][j]); + } + printf("\n"); + } + printf("\n"); +} + +static void dump_u8xu8(unsigned char *s, int k, int m) +{ + int i, j; + for (i = 0; i < k; i++) { + for (j = 0; j < m; j++) { + printf(" %2x", 0xff & s[j + (i * m)]); + } + printf("\n"); + } + printf("\n"); +} + +// Generate Random errors +static void gen_err_list(unsigned char *src_err_list, + unsigned char *src_in_err, int *pnerrs, int *pnsrcerrs, int k, int m) +{ + int i, err; + int nerrs = 0, nsrcerrs = 0; + + for (i = 0, nerrs = 0, nsrcerrs = 0; i < m && nerrs < m - k; i++) { + err = 1 & rand(); + src_in_err[i] = err; + if (err) { + src_err_list[nerrs++] = i; + if (i < k) { + nsrcerrs++; + } + } + } + if (nerrs == 0) { // should have at least one error + while ((err = (rand() % KMAX)) >= m) ; + src_err_list[nerrs++] = err; + src_in_err[err] = 1; + if (err < k) + nsrcerrs = 1; + } + *pnerrs = nerrs; + *pnsrcerrs = nsrcerrs; + return; +} + +#define NO_INVERT_MATRIX -2 +// Generate decode matrix from encode matrix +static int gf_gen_decode_matrix(unsigned char *encode_matrix, + unsigned char *decode_matrix, + unsigned char *invert_matrix, + unsigned int *decode_index, + unsigned char *src_err_list, + unsigned char *src_in_err, + int nerrs, int nsrcerrs, int k, int m) +{ + int i, j, p; + int r; + unsigned char *backup, *b, s; + int incr = 0; + + b = malloc(MMAX * KMAX); + backup = malloc(MMAX * KMAX); + + if (b == NULL || backup == NULL) { + printf("Test failure! Error with malloc\n"); + free(b); + free(backup); + return -1; + } + // Construct matrix b by removing error rows + for (i = 0, r = 0; i < k; i++, r++) { + while (src_in_err[r]) + r++; + for (j = 0; j < k; j++) { + b[k * i + j] = encode_matrix[k * r + j]; + backup[k * i + j] = encode_matrix[k * r + j]; + } + decode_index[i] = r; + } + incr = 0; + while (h_gf_invert_matrix(b, invert_matrix, k) < 0) { + if (nerrs == (m - k)) { + free(b); + free(backup); + printf("BAD MATRIX\n"); + return NO_INVERT_MATRIX; + } + incr++; + memcpy(b, backup, MMAX * KMAX); + for (i = nsrcerrs; i < nerrs - nsrcerrs; i++) { + if (src_err_list[i] == (decode_index[k - 1] + incr)) { + // skip the erased parity line + incr++; + continue; + } + } + if (decode_index[k - 1] + incr >= m) { + free(b); + free(backup); + printf("BAD MATRIX\n"); + return NO_INVERT_MATRIX; + } + decode_index[k - 1] += incr; + for (j = 0; j < k; j++) + b[k * (k - 1) + j] = encode_matrix[k * decode_index[k - 1] + j]; + + }; + + for (i = 0; i < nsrcerrs; i++) { + for (j = 0; j < k; j++) { + decode_matrix[k * i + j] = invert_matrix[k * src_err_list[i] + j]; + } + } + /* src_err_list from encode_matrix * invert of b for parity decoding */ + for (p = nsrcerrs; p < nerrs; p++) { + for (i = 0; i < k; i++) { + s = 0; + for (j = 0; j < k; j++) + s ^= h_gf_mul(invert_matrix[j * k + i], + encode_matrix[k * src_err_list[p] + j]); + + decode_matrix[k * p + i] = s; + } + } + free(b); + free(backup); + return 0; +} + +int main(int argc, char *argv[]) +{ + char err[256]; + size_t err_len = sizeof(err); + int re, i, j, p, m, k; + int nerrs, nsrcerrs; + unsigned int decode_index[MMAX]; + unsigned char *temp_buffs[TEST_SOURCES], *buffs[TEST_SOURCES]; + unsigned char *encode_matrix, *decode_matrix, *invert_matrix, *g_tbls; + unsigned char src_in_err[TEST_SOURCES], src_err_list[TEST_SOURCES]; + unsigned char *recov[TEST_SOURCES]; + + if (0 == build_support_erasurecode()) { + printf("The native library isn't available, skipping this test\n"); + return 0; // Normal, not an error + } + + load_erasurecode_lib(err, err_len); + if (strlen(err) > 0) { + printf("Loading erasurecode library failed: %s\n", err); + return -1; + } + + printf("Performing erasure code test\n"); + srand(TEST_SEED); + + // Allocate the arrays + for (i = 0; i < TEST_SOURCES; i++) { + buffs[i] = malloc(TEST_LEN); + } + + for (i = 0; i < TEST_SOURCES; i++) { + temp_buffs[i] = malloc(TEST_LEN); + } + + // Test erasure code by encode and recovery + + encode_matrix = malloc(MMAX * KMAX); + decode_matrix = malloc(MMAX * KMAX); + invert_matrix = malloc(MMAX * KMAX); + g_tbls = malloc(KMAX * TEST_SOURCES * 32); + if (encode_matrix == NULL || decode_matrix == NULL + || invert_matrix == NULL || g_tbls == NULL) { + snprintf(err, err_len, "%s", "allocating test matrix buffers error"); + return -1; + } + + m = 9; + k = 5; + if (m > MMAX || k > KMAX) + return -1; + + // Make random data + for (i = 0; i < k; i++) + for (j = 0; j < TEST_LEN; j++) + buffs[i][j] = rand(); + + // The matrix generated by gf_gen_cauchy1_matrix + // is always invertable. + h_gf_gen_cauchy_matrix(encode_matrix, m, k); + + // Generate g_tbls from encode matrix encode_matrix + h_ec_init_tables(k, m - k, &encode_matrix[k * k], g_tbls); + + // Perform matrix dot_prod for EC encoding + // using g_tbls from encode matrix encode_matrix + h_ec_encode_data(TEST_LEN, k, m - k, g_tbls, buffs, &buffs[k]); + + // Choose random buffers to be in erasure + memset(src_in_err, 0, TEST_SOURCES); + gen_err_list(src_err_list, src_in_err, &nerrs, &nsrcerrs, k, m); + + // Generate decode matrix + re = gf_gen_decode_matrix(encode_matrix, decode_matrix, + invert_matrix, decode_index, src_err_list, src_in_err, + nerrs, nsrcerrs, k, m); + if (re != 0) { + snprintf(err, err_len, "%s", "gf_gen_decode_matrix failed"); + return -1; + } + // Pack recovery array as list of valid sources + // Its order must be the same as the order + // to generate matrix b in gf_gen_decode_matrix + for (i = 0; i < k; i++) { + recov[i] = buffs[decode_index[i]]; + } + + // Recover data + h_ec_init_tables(k, nerrs, decode_matrix, g_tbls); + h_ec_encode_data(TEST_LEN, k, nerrs, g_tbls, recov, &temp_buffs[k]); + for (i = 0; i < nerrs; i++) { + if (0 != memcmp(temp_buffs[k + i], buffs[src_err_list[i]], TEST_LEN)) { + snprintf(err, err_len, "%s", "Error recovery failed"); + printf("Fail error recovery (%d, %d, %d)\n", m, k, nerrs); + + printf(" - erase list = "); + for (j = 0; j < nerrs; j++) { + printf(" %d", src_err_list[j]); + } + + printf(" - Index = "); + for (p = 0; p < k; p++) { + printf(" %d", decode_index[p]); + } + + printf("\nencode_matrix:\n"); + dump_u8xu8((unsigned char *) encode_matrix, m, k); + printf("inv b:\n"); + dump_u8xu8((unsigned char *) invert_matrix, k, k); + printf("\ndecode_matrix:\n"); + dump_u8xu8((unsigned char *) decode_matrix, m, k); + printf("recov %d:", src_err_list[i]); + dump(temp_buffs[k + i], 25); + printf("orig :"); + dump(buffs[src_err_list[i]], 25); + + return -1; + } + } + + printf("done EC tests: Pass\n"); + return 0; +} diff --git a/hadoop-project-dist/pom.xml b/hadoop-project-dist/pom.xml index 8df266f5f9..81773d66cf 100644 --- a/hadoop-project-dist/pom.xml +++ b/hadoop-project-dist/pom.xml @@ -41,6 +41,7 @@ UNDEF false false + true false false @@ -332,14 +333,22 @@ mkdir -p $${TARGET_DIR} cd $${LIB_DIR} $$TAR lib* | (cd $${TARGET_DIR}/; $$UNTAR) - if [ "${bundle.snappy}" = "true" ] ; then + if [ "X${bundle.snappy}" = "Xtrue" ] ; then cd "${snappy.lib}" $$TAR *snappy* | (cd $${TARGET_DIR}/; $$UNTAR) fi - if [ "${bundle.openssl}" = "true" ] ; then + if [ "X${bundle.openssl}" = "Xtrue" ] ; then cd "${openssl.lib}" $$TAR *crypto* | (cd $${TARGET_DIR}/; $$UNTAR) fi + if [ "X${bundle.isal}" = "Xtrue" ] ; then + if [ "X${isal.lib}" != "X" ]; then + cd "${isal.lib}" + $$TAR *isa* | (cd $${TARGET_DIR}/; $$UNTAR) + else + echo "The required option isal.lib isn't given, bundling ISA-L skipped" + fi + fi fi BIN_DIR="${BUILD_DIR}/bin" if [ -d $${BIN_DIR} ] ; then diff --git a/hadoop-project/pom.xml b/hadoop-project/pom.xml index efc3a7dd58..7a8425db3b 100644 --- a/hadoop-project/pom.xml +++ b/hadoop-project/pom.xml @@ -1245,6 +1245,7 @@ ${env.PATH};${hadoop.common.build.dir}/bin;${snappy.lib} ${env.PATH};${hadoop.common.build.dir}/bin;${openssl.lib} + ${env.PATH};${hadoop.common.build.dir}/bin;${isal.lib}