From 0f51d2a4ec17bad754beb17048409811a151be53 Mon Sep 17 00:00:00 2001 From: Vinayakumar B Date: Mon, 18 Mar 2024 10:53:21 +0530 Subject: [PATCH] HADOOP-14451. Deadlock in NativeIO (#6632) --- .../apache/hadoop/io/nativeio/NativeIO.java | 26 ++-- .../org/apache/hadoop/io/nativeio/NativeIO.c | 126 ++++++++++++------ .../hadoop/io/nativeio/TestNativeIoInit.java | 87 ++++++++++++ 3 files changed, 191 insertions(+), 48 deletions(-) create mode 100644 hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/nativeio/TestNativeIoInit.java diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/nativeio/NativeIO.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/nativeio/NativeIO.java index 5cf820c50c..0a469e3024 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/nativeio/NativeIO.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/nativeio/NativeIO.java @@ -220,6 +220,9 @@ public long getLength() { } } + /** Initialize the JNI method ID and class ID cache. */ + private static native void initNativePosix(boolean doThreadsafeWorkaround); + /** * JNI wrapper of persist memory operations. */ @@ -331,11 +334,11 @@ public boolean verifyCanMlock() { if (NativeCodeLoader.isNativeCodeLoaded()) { try { Configuration conf = new Configuration(); - workaroundNonThreadSafePasswdCalls = conf.getBoolean( - WORKAROUND_NON_THREADSAFE_CALLS_KEY, - WORKAROUND_NON_THREADSAFE_CALLS_DEFAULT); + boolean workaroundNonThreadSafePasswdCalls = conf.getBoolean( + WORKAROUND_NON_THREADSAFE_CALLS_KEY, + WORKAROUND_NON_THREADSAFE_CALLS_DEFAULT); - initNative(); + initNativePosix(workaroundNonThreadSafePasswdCalls); nativeLoaded = true; cacheTimeout = conf.getLong( @@ -679,9 +682,6 @@ public static native void munmap(long addr, long length) throws IOException; } - private static boolean workaroundNonThreadSafePasswdCalls = false; - - public static class Windows { // Flags for CreateFile() call on Windows public static final long GENERIC_READ = 0x80000000L; @@ -833,7 +833,9 @@ public static boolean access(String path, AccessRight desiredAccess) static { if (NativeCodeLoader.isNativeCodeLoaded()) { try { - initNative(); + initNativeWindows(false); + // As of now there is no change between initNative() + // and initNativeWindows() native impls. nativeLoaded = true; } catch (Throwable t) { // This can happen if the user has an older version of libhadoop.so @@ -843,6 +845,10 @@ public static boolean access(String path, AccessRight desiredAccess) } } } + + /** Initialize the JNI method ID and class ID cache. */ + private static native void initNativeWindows( + boolean doThreadsafeWorkaround); } private static final Logger LOG = LoggerFactory.getLogger(NativeIO.class); @@ -852,7 +858,7 @@ public static boolean access(String path, AccessRight desiredAccess) static { if (NativeCodeLoader.isNativeCodeLoaded()) { try { - initNative(); + initNative(false); nativeLoaded = true; } catch (Throwable t) { // This can happen if the user has an older version of libhadoop.so @@ -871,7 +877,7 @@ public static boolean isAvailable() { } /** Initialize the JNI method ID and class ID cache */ - private static native void initNative(); + private static native void initNative(boolean doThreadsafeWorkaround); /** * Get the maximum number of bytes that can be locked into memory at any diff --git a/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/nativeio/NativeIO.c b/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/nativeio/NativeIO.c index 1d7c508d85..f720d99377 100644 --- a/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/nativeio/NativeIO.c +++ b/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/nativeio/NativeIO.c @@ -103,24 +103,6 @@ extern void throw_ioe(JNIEnv* env, int errnum); static ssize_t get_pw_buflen(); #endif -/** - * Returns non-zero if the user has specified that the system - * has non-threadsafe implementations of getpwuid_r or getgrgid_r. - **/ -static int workaround_non_threadsafe_calls(JNIEnv *env, jclass clazz) { - jboolean result; - jfieldID needs_workaround_field = (*env)->GetStaticFieldID( - env, clazz, - "workaroundNonThreadSafePasswdCalls", - "Z"); - PASS_EXCEPTIONS_RET(env, 0); - assert(needs_workaround_field); - - result = (*env)->GetStaticBooleanField( - env, clazz, needs_workaround_field); - return result; -} - /** * Sets a static boolean field to the specified value. */ @@ -201,10 +183,9 @@ static void consts_init(JNIEnv *env) { } #endif -static void stat_init(JNIEnv *env, jclass nativeio_class) { +static void stat_init(JNIEnv *env) { jclass clazz = NULL; - jclass obj_class = NULL; - jmethodID obj_ctor = NULL; + if (stat_ctor2 != NULL) return; //Already inited // Init Stat clazz = (*env)->FindClass(env, NATIVE_IO_STAT_CLASS); if (!clazz) { @@ -224,6 +205,20 @@ static void stat_init(JNIEnv *env, jclass nativeio_class) { if (!stat_ctor2) { return; // exception has been raised } +} + +static void stat_deinit(JNIEnv *env) { + if (stat_clazz != NULL) { + (*env)->DeleteGlobalRef(env, stat_clazz); + stat_clazz = NULL; + } +} + +static void workaround_non_threadsafe_calls_init(JNIEnv *env){ + jclass obj_class = NULL; + jmethodID obj_ctor = NULL; + if (pw_lock_object != NULL) return; // Already inited + obj_class = (*env)->FindClass(env, "java/lang/Object"); if (!obj_class) { return; // exception has been raised @@ -233,21 +228,13 @@ static void stat_init(JNIEnv *env, jclass nativeio_class) { if (!obj_ctor) { return; // exception has been raised } - - if (workaround_non_threadsafe_calls(env, nativeio_class)) { - pw_lock_object = (*env)->NewObject(env, obj_class, obj_ctor); - PASS_EXCEPTIONS(env); - pw_lock_object = (*env)->NewGlobalRef(env, pw_lock_object); - - PASS_EXCEPTIONS(env); - } + pw_lock_object = (*env)->NewObject(env, obj_class, obj_ctor); + PASS_EXCEPTIONS(env); + pw_lock_object = (*env)->NewGlobalRef(env, pw_lock_object); + PASS_EXCEPTIONS(env); } -static void stat_deinit(JNIEnv *env) { - if (stat_clazz != NULL) { - (*env)->DeleteGlobalRef(env, stat_clazz); - stat_clazz = NULL; - } +static void workaround_non_threadsafe_calls_deinit(JNIEnv *env){ if (pw_lock_object != NULL) { (*env)->DeleteGlobalRef(env, pw_lock_object); pw_lock_object = NULL; @@ -255,6 +242,7 @@ static void stat_deinit(JNIEnv *env) { } static void nioe_init(JNIEnv *env) { + if (nioe_ctor != NULL) return; // Already inited // Init NativeIOException nioe_clazz = (*env)->FindClass( env, "org/apache/hadoop/io/nativeio/NativeIOException"); @@ -349,17 +337,53 @@ static void pmem_region_deinit(JNIEnv *env) { */ JNIEXPORT void JNICALL Java_org_apache_hadoop_io_nativeio_NativeIO_initNative( - JNIEnv *env, jclass clazz) { + JNIEnv *env, jclass clazz, jboolean doThreadsafeWorkaround) { + nioe_init(env); + PASS_EXCEPTIONS_GOTO(env, error); + fd_init(env); + PASS_EXCEPTIONS_GOTO(env, error); +#ifdef UNIX + errno_enum_init(env); + PASS_EXCEPTIONS_GOTO(env, error); +#endif + if (doThreadsafeWorkaround) { + workaround_non_threadsafe_calls_init(env); + PASS_EXCEPTIONS_GOTO(env, error); + } + return; +error: + // these are all idempotent and safe to call even if the + // class wasn't inited yet + nioe_deinit(env); + fd_deinit(env); +#ifdef UNIX + errno_enum_deinit(env); +#endif + if (doThreadsafeWorkaround) { + workaround_non_threadsafe_calls_deinit(env); + } +} + +/* + * private static native void initNativePosix(); + */ +JNIEXPORT void JNICALL +Java_org_apache_hadoop_io_nativeio_NativeIO_00024POSIX_initNativePosix( + JNIEnv *env, jclass clazz, jboolean doThreadsafeWorkaround) { #ifdef UNIX consts_init(env); PASS_EXCEPTIONS_GOTO(env, error); #endif - stat_init(env, clazz); + stat_init(env); PASS_EXCEPTIONS_GOTO(env, error); nioe_init(env); PASS_EXCEPTIONS_GOTO(env, error); fd_init(env); PASS_EXCEPTIONS_GOTO(env, error); + if (doThreadsafeWorkaround) { + workaround_non_threadsafe_calls_init(env); + PASS_EXCEPTIONS_GOTO(env, error); + } #ifdef UNIX errno_enum_init(env); PASS_EXCEPTIONS_GOTO(env, error); @@ -373,17 +397,43 @@ Java_org_apache_hadoop_io_nativeio_NativeIO_initNative( error: // these are all idempodent and safe to call even if the // class wasn't initted yet -#ifdef UNIX stat_deinit(env); #ifdef HADOOP_PMDK_LIBRARY pmem_region_deinit(env); -#endif #endif nioe_deinit(env); fd_deinit(env); #ifdef UNIX errno_enum_deinit(env); #endif + if (doThreadsafeWorkaround) { + workaround_non_threadsafe_calls_deinit(env); + } +} + +/* + * private static native void initNativeWindows(); + */ +JNIEXPORT void JNICALL +Java_org_apache_hadoop_io_nativeio_NativeIO_00024Windows_initNativeWindows( + JNIEnv *env, jclass clazz, jboolean doThreadsafeWorkaround) { + nioe_init(env); + PASS_EXCEPTIONS_GOTO(env, error); + fd_init(env); + PASS_EXCEPTIONS_GOTO(env, error); + if (doThreadsafeWorkaround) { + workaround_non_threadsafe_calls_init(env); + PASS_EXCEPTIONS_GOTO(env, error); + } + return; +error: + // these are all idempodent and safe to call even if the + // class wasn't initted yet + nioe_deinit(env); + fd_deinit(env); + if (doThreadsafeWorkaround) { + workaround_non_threadsafe_calls_deinit(env); + } } /* diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/nativeio/TestNativeIoInit.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/nativeio/TestNativeIoInit.java new file mode 100644 index 0000000000..bdc295f252 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/nativeio/TestNativeIoInit.java @@ -0,0 +1,87 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.io.nativeio; + +import static org.junit.Assume.assumeTrue; + +import java.io.IOException; + +import org.apache.hadoop.fs.Path; +import org.junit.Test; + +/** + * Separate class to ensure forked Tests load the static blocks again. + */ +public class TestNativeIoInit { + + /** + * Refer HADOOP-14451 + * Scenario: + * 1. One thread calls a static method of NativeIO, which loads static block + * of NativeIo. + * 2. Second thread calls a static method of NativeIo.POSIX, which loads a + * static block of NativeIO.POSIX class + *

+ * Expected: Loading these two static blocks separately should not result in + * deadlock. + */ + @Test(timeout = 10000) + public void testDeadlockLinux() throws Exception { + Thread one = new Thread() { + @Override + public void run() { + NativeIO.isAvailable(); + } + }; + Thread two = new Thread() { + @Override + public void run() { + NativeIO.POSIX.isAvailable(); + } + }; + two.start(); + one.start(); + one.join(); + two.join(); + } + + @Test(timeout = 10000) + public void testDeadlockWindows() throws Exception { + assumeTrue("Expected windows", Path.WINDOWS); + Thread one = new Thread() { + @Override + public void run() { + NativeIO.isAvailable(); + } + }; + Thread two = new Thread() { + @Override + public void run() { + try { + NativeIO.Windows.extendWorkingSetSize(100); + } catch (IOException e) { + //igored + } + } + }; + two.start(); + one.start(); + one.join(); + two.join(); + } +}