From 1fafb1e1e0bf76c124fdf7db3402cc576668428d Mon Sep 17 00:00:00 2001 From: Chris Nauroth Date: Sun, 11 Aug 2013 05:51:34 +0000 Subject: [PATCH] HADOOP-9802. Support Snappy codec on Windows. Contributed by Chris Nauroth. git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1512872 13f79535-47bb-0310-9956-ffa450edef68 --- .../hadoop-common/CHANGES.txt | 2 + hadoop-common-project/hadoop-common/pom.xml | 11 ++++ .../src/main/native/native.vcxproj | 28 +++++++++- .../io/compress/snappy/SnappyCompressor.c | 55 ++++++++++++++++--- .../io/compress/snappy/SnappyDecompressor.c | 34 +++++++++++- .../org_apache_hadoop_io_compress_snappy.h | 4 ++ .../org/apache/hadoop/util/NativeCodeLoader.c | 40 +++++--------- .../src/main/winutils/include/winutils.h | 4 +- .../src/main/winutils/libwinutils.c | 48 ++++++++++++++++ hadoop-project-dist/pom.xml | 7 +++ hadoop-project/pom.xml | 3 +- 11 files changed, 195 insertions(+), 41 deletions(-) diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt index c7c2633763..5a08f24db7 100644 --- a/hadoop-common-project/hadoop-common/CHANGES.txt +++ b/hadoop-common-project/hadoop-common/CHANGES.txt @@ -340,6 +340,8 @@ Release 2.1.1-beta - UNRELEASED HADOOP-8814. Replace string equals "" by String#isEmpty(). (Brandon Li via suresh) + HADOOP-9802. Support Snappy codec on Windows. (cnauroth) + OPTIMIZATIONS BUG FIXES diff --git a/hadoop-common-project/hadoop-common/pom.xml b/hadoop-common-project/hadoop-common/pom.xml index e29c3a5dfc..a59157d5d3 100644 --- a/hadoop-common-project/hadoop-common/pom.xml +++ b/hadoop-common-project/hadoop-common/pom.xml @@ -586,6 +586,13 @@ Windows + + + + + false + true + @@ -670,6 +677,10 @@ /nologo /p:Configuration=Release /p:OutDir=${project.build.directory}/bin/ + /p:CustomSnappyPrefix=${snappy.prefix} + /p:CustomSnappyLib=${snappy.lib} + /p:CustomSnappyInclude=${snappy.include} + /p:RequireSnappy=${require.snappy} diff --git a/hadoop-common-project/hadoop-common/src/main/native/native.vcxproj b/hadoop-common-project/hadoop-common/src/main/native/native.vcxproj index 312660285a..724e2a20c8 100644 --- a/hadoop-common-project/hadoop-common/src/main/native/native.vcxproj +++ b/hadoop-common-project/hadoop-common/src/main/native/native.vcxproj @@ -17,7 +17,7 @@ limitations under the License. --> - + Release @@ -49,6 +49,21 @@ ..\..\..\target\native\$(Configuration)\ hadoop + + $(CustomSnappyPrefix) + $(CustomSnappyPrefix)\lib + $(CustomSnappyLib) + $(CustomSnappyPrefix) + $(CustomSnappyPrefix)\include + $(CustomSnappyInclude) + true + $(SnappyInclude);$(IncludePath) + + + + Level3 @@ -71,6 +86,12 @@ + + /D HADOOP_SNAPPY_LIBRARY=L\"snappy.dll\" + + + /D HADOOP_SNAPPY_LIBRARY=L\"snappy.dll\" + @@ -79,12 +100,15 @@ - + + /D HADOOP_SNAPPY_LIBRARY=L\"snappy.dll\" + + diff --git a/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/compress/snappy/SnappyCompressor.c b/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/compress/snappy/SnappyCompressor.c index c988f85f27..fe827f02de 100644 --- a/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/compress/snappy/SnappyCompressor.c +++ b/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/compress/snappy/SnappyCompressor.c @@ -30,6 +30,10 @@ #include "config.h" #endif // UNIX +#ifdef WINDOWS +#include "winutils.h" +#endif + #include "org_apache_hadoop_io_compress_snappy_SnappyCompressor.h" #define JINT_MAX 0x7fffffff @@ -40,11 +44,18 @@ static jfieldID SnappyCompressor_uncompressedDirectBufLen; static jfieldID SnappyCompressor_compressedDirectBuf; static jfieldID SnappyCompressor_directBufferSize; +#ifdef UNIX static snappy_status (*dlsym_snappy_compress)(const char*, size_t, char*, size_t*); +#endif + +#ifdef WINDOWS +typedef snappy_status (__cdecl *__dlsym_snappy_compress)(const char*, size_t, char*, size_t*); +static __dlsym_snappy_compress dlsym_snappy_compress; +#endif JNIEXPORT void JNICALL Java_org_apache_hadoop_io_compress_snappy_SnappyCompressor_initIDs (JNIEnv *env, jclass clazz){ - +#ifdef UNIX // Load libsnappy.so void *libsnappy = dlopen(HADOOP_SNAPPY_LIBRARY, RTLD_LAZY | RTLD_GLOBAL); if (!libsnappy) { @@ -53,10 +64,25 @@ JNIEXPORT void JNICALL Java_org_apache_hadoop_io_compress_snappy_SnappyCompresso THROW(env, "java/lang/UnsatisfiedLinkError", msg); return; } +#endif + +#ifdef WINDOWS + HMODULE libsnappy = LoadLibrary(HADOOP_SNAPPY_LIBRARY); + if (!libsnappy) { + THROW(env, "java/lang/UnsatisfiedLinkError", "Cannot load snappy.dll"); + return; + } +#endif // Locate the requisite symbols from libsnappy.so +#ifdef UNIX dlerror(); // Clear any existing error LOAD_DYNAMIC_SYMBOL(dlsym_snappy_compress, env, libsnappy, "snappy_compress"); +#endif + +#ifdef WINDOWS + LOAD_DYNAMIC_SYMBOL(__dlsym_snappy_compress, dlsym_snappy_compress, env, libsnappy, "snappy_compress"); +#endif SnappyCompressor_clazz = (*env)->GetStaticFieldID(env, clazz, "clazz", "Ljava/lang/Class;"); @@ -74,6 +100,9 @@ JNIEXPORT void JNICALL Java_org_apache_hadoop_io_compress_snappy_SnappyCompresso JNIEXPORT jint JNICALL Java_org_apache_hadoop_io_compress_snappy_SnappyCompressor_compressBytesDirect (JNIEnv *env, jobject thisj){ + const char* uncompressed_bytes; + char* compressed_bytes; + snappy_status ret; // Get members of SnappyCompressor jobject clazz = (*env)->GetStaticObjectField(env, thisj, SnappyCompressor_clazz); jobject uncompressed_direct_buf = (*env)->GetObjectField(env, thisj, SnappyCompressor_uncompressedDirectBuf); @@ -84,7 +113,7 @@ JNIEXPORT jint JNICALL Java_org_apache_hadoop_io_compress_snappy_SnappyCompresso // Get the input direct buffer LOCK_CLASS(env, clazz, "SnappyCompressor"); - const char* uncompressed_bytes = (const char*)(*env)->GetDirectBufferAddress(env, uncompressed_direct_buf); + uncompressed_bytes = (const char*)(*env)->GetDirectBufferAddress(env, uncompressed_direct_buf); UNLOCK_CLASS(env, clazz, "SnappyCompressor"); if (uncompressed_bytes == 0) { @@ -93,7 +122,7 @@ JNIEXPORT jint JNICALL Java_org_apache_hadoop_io_compress_snappy_SnappyCompresso // Get the output direct buffer LOCK_CLASS(env, clazz, "SnappyCompressor"); - char* compressed_bytes = (char *)(*env)->GetDirectBufferAddress(env, compressed_direct_buf); + compressed_bytes = (char *)(*env)->GetDirectBufferAddress(env, compressed_direct_buf); UNLOCK_CLASS(env, clazz, "SnappyCompressor"); if (compressed_bytes == 0) { @@ -102,8 +131,8 @@ JNIEXPORT jint JNICALL Java_org_apache_hadoop_io_compress_snappy_SnappyCompresso /* size_t should always be 4 bytes or larger. */ buf_len = (size_t)compressed_direct_buf_len; - snappy_status ret = dlsym_snappy_compress(uncompressed_bytes, - uncompressed_direct_buf_len, compressed_bytes, &buf_len); + ret = dlsym_snappy_compress(uncompressed_bytes, uncompressed_direct_buf_len, + compressed_bytes, &buf_len); if (ret != SNAPPY_OK){ THROW(env, "java/lang/InternalError", "Could not compress data. Buffer length is too small."); return 0; @@ -128,8 +157,18 @@ Java_org_apache_hadoop_io_compress_snappy_SnappyCompressor_getLibraryName(JNIEnv return (*env)->NewStringUTF(env, dl_info.dli_fname); } } -#endif - return (*env)->NewStringUTF(env, HADOOP_SNAPPY_LIBRARY); -} + return (*env)->NewStringUTF(env, HADOOP_SNAPPY_LIBRARY); +#endif + +#ifdef WINDOWS + LPWSTR filename = NULL; + GetLibraryName(dlsym_snappy_compress, &filename); + if (filename != NULL) { + return (*env)->NewString(env, filename, (jsize) wcslen(filename)); + } else { + return (*env)->NewStringUTF(env, "Unavailable"); + } +#endif +} #endif //define HADOOP_SNAPPY_LIBRARY diff --git a/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/compress/snappy/SnappyDecompressor.c b/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/compress/snappy/SnappyDecompressor.c index 43ec58fc77..d1fd13c9e5 100644 --- a/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/compress/snappy/SnappyDecompressor.c +++ b/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/compress/snappy/SnappyDecompressor.c @@ -37,12 +37,20 @@ static jfieldID SnappyDecompressor_compressedDirectBufLen; static jfieldID SnappyDecompressor_uncompressedDirectBuf; static jfieldID SnappyDecompressor_directBufferSize; +#ifdef UNIX static snappy_status (*dlsym_snappy_uncompress)(const char*, size_t, char*, size_t*); +#endif + +#ifdef WINDOWS +typedef snappy_status (__cdecl *__dlsym_snappy_uncompress)(const char*, size_t, char*, size_t*); +static __dlsym_snappy_uncompress dlsym_snappy_uncompress; +#endif JNIEXPORT void JNICALL Java_org_apache_hadoop_io_compress_snappy_SnappyDecompressor_initIDs (JNIEnv *env, jclass clazz){ // Load libsnappy.so +#ifdef UNIX void *libsnappy = dlopen(HADOOP_SNAPPY_LIBRARY, RTLD_LAZY | RTLD_GLOBAL); if (!libsnappy) { char* msg = (char*)malloc(1000); @@ -50,11 +58,27 @@ JNIEXPORT void JNICALL Java_org_apache_hadoop_io_compress_snappy_SnappyDecompres THROW(env, "java/lang/UnsatisfiedLinkError", msg); return; } +#endif + +#ifdef WINDOWS + HMODULE libsnappy = LoadLibrary(HADOOP_SNAPPY_LIBRARY); + if (!libsnappy) { + THROW(env, "java/lang/UnsatisfiedLinkError", "Cannot load snappy.dll"); + return; + } +#endif // Locate the requisite symbols from libsnappy.so +#ifdef UNIX dlerror(); // Clear any existing error LOAD_DYNAMIC_SYMBOL(dlsym_snappy_uncompress, env, libsnappy, "snappy_uncompress"); +#endif + +#ifdef WINDOWS + LOAD_DYNAMIC_SYMBOL(__dlsym_snappy_uncompress, dlsym_snappy_uncompress, env, libsnappy, "snappy_uncompress"); +#endif + SnappyDecompressor_clazz = (*env)->GetStaticFieldID(env, clazz, "clazz", "Ljava/lang/Class;"); SnappyDecompressor_compressedDirectBuf = (*env)->GetFieldID(env,clazz, @@ -71,6 +95,9 @@ JNIEXPORT void JNICALL Java_org_apache_hadoop_io_compress_snappy_SnappyDecompres JNIEXPORT jint JNICALL Java_org_apache_hadoop_io_compress_snappy_SnappyDecompressor_decompressBytesDirect (JNIEnv *env, jobject thisj){ + const char* compressed_bytes = NULL; + char* uncompressed_bytes = NULL; + snappy_status ret; // Get members of SnappyDecompressor jobject clazz = (*env)->GetStaticObjectField(env,thisj, SnappyDecompressor_clazz); jobject compressed_direct_buf = (*env)->GetObjectField(env,thisj, SnappyDecompressor_compressedDirectBuf); @@ -80,7 +107,7 @@ JNIEXPORT jint JNICALL Java_org_apache_hadoop_io_compress_snappy_SnappyDecompres // Get the input direct buffer LOCK_CLASS(env, clazz, "SnappyDecompressor"); - const char* compressed_bytes = (const char*)(*env)->GetDirectBufferAddress(env, compressed_direct_buf); + compressed_bytes = (const char*)(*env)->GetDirectBufferAddress(env, compressed_direct_buf); UNLOCK_CLASS(env, clazz, "SnappyDecompressor"); if (compressed_bytes == 0) { @@ -89,14 +116,15 @@ JNIEXPORT jint JNICALL Java_org_apache_hadoop_io_compress_snappy_SnappyDecompres // Get the output direct buffer LOCK_CLASS(env, clazz, "SnappyDecompressor"); - char* uncompressed_bytes = (char *)(*env)->GetDirectBufferAddress(env, uncompressed_direct_buf); + uncompressed_bytes = (char *)(*env)->GetDirectBufferAddress(env, uncompressed_direct_buf); UNLOCK_CLASS(env, clazz, "SnappyDecompressor"); if (uncompressed_bytes == 0) { return (jint)0; } - snappy_status ret = dlsym_snappy_uncompress(compressed_bytes, compressed_direct_buf_len, uncompressed_bytes, &uncompressed_direct_buf_len); + ret = dlsym_snappy_uncompress(compressed_bytes, compressed_direct_buf_len, + uncompressed_bytes, &uncompressed_direct_buf_len); if (ret == SNAPPY_BUFFER_TOO_SMALL){ THROW(env, "java/lang/InternalError", "Could not decompress data. Buffer length is too small."); } else if (ret == SNAPPY_INVALID_INPUT){ diff --git a/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/compress/snappy/org_apache_hadoop_io_compress_snappy.h b/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/compress/snappy/org_apache_hadoop_io_compress_snappy.h index 3e99d5d20d..8394efe477 100644 --- a/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/compress/snappy/org_apache_hadoop_io_compress_snappy.h +++ b/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/compress/snappy/org_apache_hadoop_io_compress_snappy.h @@ -21,7 +21,11 @@ #define ORG_APACHE_HADOOP_IO_COMPRESS_SNAPPY_SNAPPY_H #include "org_apache_hadoop.h" + +#ifdef UNIX #include +#endif + #include #include #include diff --git a/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/util/NativeCodeLoader.c b/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/util/NativeCodeLoader.c index 146e160091..d03050c591 100644 --- a/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/util/NativeCodeLoader.c +++ b/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/util/NativeCodeLoader.c @@ -23,6 +23,10 @@ #include "config.h" #endif // UNIX +#ifdef WINDOWS +#include "winutils.h" +#endif + #include JNIEXPORT jboolean JNICALL Java_org_apache_hadoop_util_NativeCodeLoader_buildSupportsSnappy @@ -47,32 +51,16 @@ JNIEXPORT jstring JNICALL Java_org_apache_hadoop_util_NativeCodeLoader_getLibrar #endif #ifdef WINDOWS - SIZE_T ret = 0; - DWORD size = MAX_PATH; LPWSTR filename = NULL; - HMODULE mod = NULL; - DWORD err = ERROR_SUCCESS; - - MEMORY_BASIC_INFORMATION mbi; - ret = VirtualQuery(Java_org_apache_hadoop_util_NativeCodeLoader_getLibraryName, - &mbi, sizeof(mbi)); - if (ret == 0) goto cleanup; - mod = mbi.AllocationBase; - - do { - filename = (LPWSTR) realloc(filename, size * sizeof(WCHAR)); - if (filename == NULL) goto cleanup; - GetModuleFileName(mod, filename, size); - size <<= 1; - err = GetLastError(); - } while (err == ERROR_INSUFFICIENT_BUFFER); - - if (err != ERROR_SUCCESS) goto cleanup; - - return (*env)->NewString(env, filename, (jsize) wcslen(filename)); - -cleanup: - if (filename != NULL) free(filename); - return (*env)->NewStringUTF(env, "Unavailable"); + GetLibraryName(Java_org_apache_hadoop_util_NativeCodeLoader_getLibraryName, + &filename); + if (filename != NULL) + { + return (*env)->NewString(env, filename, (jsize) wcslen(filename)); + } + else + { + return (*env)->NewStringUTF(env, "Unavailable"); + } #endif } diff --git a/hadoop-common-project/hadoop-common/src/main/winutils/include/winutils.h b/hadoop-common-project/hadoop-common/src/main/winutils/include/winutils.h index 753fb849db..1c0007a6da 100644 --- a/hadoop-common-project/hadoop-common/src/main/winutils/include/winutils.h +++ b/hadoop-common-project/hadoop-common/src/main/winutils/include/winutils.h @@ -153,4 +153,6 @@ DWORD ChangeFileModeByMask(__in LPCWSTR path, INT mode); DWORD GetLocalGroupsForUser(__in LPCWSTR user, __out LPLOCALGROUP_USERS_INFO_0 *groups, __out LPDWORD entries); -BOOL EnablePrivilege(__in LPCWSTR privilegeName); \ No newline at end of file +BOOL EnablePrivilege(__in LPCWSTR privilegeName); + +void GetLibraryName(__in LPCVOID lpAddress, __out LPWSTR *filename); diff --git a/hadoop-common-project/hadoop-common/src/main/winutils/libwinutils.c b/hadoop-common-project/hadoop-common/src/main/winutils/libwinutils.c index 3e0768a4b1..391247fccd 100644 --- a/hadoop-common-project/hadoop-common/src/main/winutils/libwinutils.c +++ b/hadoop-common-project/hadoop-common/src/main/winutils/libwinutils.c @@ -1709,3 +1709,51 @@ void ReportErrorCode(LPCWSTR func, DWORD err) } if (msg != NULL) LocalFree(msg); } + +//---------------------------------------------------------------------------- +// Function: GetLibraryName +// +// Description: +// Given an address, get the file name of the library from which it was loaded. +// +// Returns: +// None +// +// Notes: +// - The function allocates heap memory and points the filename out parameter to +// the newly allocated memory, which will contain the name of the file. +// +// - If there is any failure, then the function frees the heap memory it +// allocated and sets the filename out parameter to NULL. +// +void GetLibraryName(LPCVOID lpAddress, LPWSTR *filename) +{ + SIZE_T ret = 0; + DWORD size = MAX_PATH; + HMODULE mod = NULL; + DWORD err = ERROR_SUCCESS; + + MEMORY_BASIC_INFORMATION mbi; + ret = VirtualQuery(lpAddress, &mbi, sizeof(mbi)); + if (ret == 0) goto cleanup; + mod = mbi.AllocationBase; + + do { + *filename = (LPWSTR) realloc(*filename, size * sizeof(WCHAR)); + if (*filename == NULL) goto cleanup; + GetModuleFileName(mod, *filename, size); + size <<= 1; + err = GetLastError(); + } while (err == ERROR_INSUFFICIENT_BUFFER); + + if (err != ERROR_SUCCESS) goto cleanup; + + return; + +cleanup: + if (*filename != NULL) + { + free(*filename); + *filename = NULL; + } +} diff --git a/hadoop-project-dist/pom.xml b/hadoop-project-dist/pom.xml index 342fbfec33..103bcc52a0 100644 --- a/hadoop-project-dist/pom.xml +++ b/hadoop-project-dist/pom.xml @@ -40,6 +40,7 @@ UNDEF false + false @@ -355,6 +356,12 @@ mkdir -p $${TARGET_BIN_DIR} cd $${BIN_DIR} $$TAR * | (cd $${TARGET_BIN_DIR}/; $$UNTAR) + if [ "${bundle.snappy.in.bin}" = "true" ] ; then + if [ "${bundle.snappy}" = "true" ] ; then + cd ${snappy.lib} + $$TAR *snappy* | (cd $${TARGET_BIN_DIR}/; $$UNTAR) + fi + fi fi diff --git a/hadoop-project/pom.xml b/hadoop-project/pom.xml index 4b4ba629df..dc928d2748 100644 --- a/hadoop-project/pom.xml +++ b/hadoop-project/pom.xml @@ -892,6 +892,7 @@ file:/dev/urandom + true @@ -901,7 +902,7 @@ - ${env.PATH};${hadoop.common.build.dir}/bin + ${env.PATH};${hadoop.common.build.dir}/bin;${snappy.lib}