Skip to content

HADOOP-14451. Deadlock in NativeIO #3961

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -331,11 +331,11 @@ public boolean verifyCanMlock() {
if (NativeCodeLoader.isNativeCodeLoaded()) {
try {
Configuration conf = new Configuration();
workaroundNonThreadSafePasswdCalls = conf.getBoolean(
WORKAROUND_NON_THREADSAFE_CALLS_KEY,
WORKAROUND_NON_THREADSAFE_CALLS_DEFAULT);
boolean workaroundNonThreadSafePasswdCalls = conf.getBoolean(
WORKAROUND_NON_THREADSAFE_CALLS_KEY,
WORKAROUND_NON_THREADSAFE_CALLS_DEFAULT);

initNative();
initNativePosix(workaroundNonThreadSafePasswdCalls);
nativeLoaded = true;

cacheTimeout = conf.getLong(
Expand All @@ -354,6 +354,9 @@ public boolean verifyCanMlock() {
}
}

/** Initialize the JNI method ID and class ID cache. */
private static native void initNativePosix(boolean doThreadsafeWorkaround);

/**
* Return true if the JNI-based native IO extensions are available.
*/
Expand Down Expand Up @@ -668,9 +671,6 @@ public static native void munmap(long addr, long length)
throws IOException;
}

private static boolean workaroundNonThreadSafePasswdCalls = false;


public static class Windows {
// Flags for CreateFile() call on Windows
public static final long GENERIC_READ = 0x80000000L;
Expand Down Expand Up @@ -809,7 +809,9 @@ public static boolean access(String path, AccessRight desiredAccess)
static {
if (NativeCodeLoader.isNativeCodeLoaded()) {
try {
initNative();
initNativeWindows(false);
//As of now there is no change between initNative()
// and initNativeWindows() native impls.
nativeLoaded = true;
} catch (Throwable t) {
// This can happen if the user has an older version of libhadoop.so
Expand All @@ -819,6 +821,10 @@ public static boolean access(String path, AccessRight desiredAccess)
}
}
}

/** Initialize the JNI method ID and class ID cache. */
private static native void initNativeWindows(
boolean doThreadsafeWorkaround);
}

private static final Logger LOG = LoggerFactory.getLogger(NativeIO.class);
Expand All @@ -828,7 +834,7 @@ public static boolean access(String path, AccessRight desiredAccess)
static {
if (NativeCodeLoader.isNativeCodeLoaded()) {
try {
initNative();
initNative(false);
nativeLoaded = true;
} catch (Throwable t) {
// This can happen if the user has an older version of libhadoop.so
Expand All @@ -847,7 +853,7 @@ public static boolean isAvailable() {
}

/** Initialize the JNI method ID and class ID cache */
private static native void initNative();
private static native void initNative(boolean doThreadsafeWorkaround);

/**
* Get the maximum number of bytes that can be locked into memory at any
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -103,24 +103,6 @@ extern void throw_ioe(JNIEnv* env, int errnum);
static ssize_t get_pw_buflen();
#endif

/**
* Returns non-zero if the user has specified that the system
* has non-threadsafe implementations of getpwuid_r or getgrgid_r.
**/
static int workaround_non_threadsafe_calls(JNIEnv *env, jclass clazz) {
jboolean result;
jfieldID needs_workaround_field = (*env)->GetStaticFieldID(
env, clazz,
"workaroundNonThreadSafePasswdCalls",
"Z");
PASS_EXCEPTIONS_RET(env, 0);
assert(needs_workaround_field);

result = (*env)->GetStaticBooleanField(
env, clazz, needs_workaround_field);
return result;
}

/**
* Sets a static boolean field to the specified value.
*/
Expand Down Expand Up @@ -201,10 +183,9 @@ static void consts_init(JNIEnv *env) {
}
#endif

static void stat_init(JNIEnv *env, jclass nativeio_class) {
static void stat_init(JNIEnv *env) {
jclass clazz = NULL;
jclass obj_class = NULL;
jmethodID obj_ctor = NULL;
if (stat_ctor2 != NULL) return; //Already inited
// Init Stat
clazz = (*env)->FindClass(env, NATIVE_IO_STAT_CLASS);
if (!clazz) {
Expand All @@ -224,6 +205,20 @@ static void stat_init(JNIEnv *env, jclass nativeio_class) {
if (!stat_ctor2) {
return; // exception has been raised
}
}

static void stat_deinit(JNIEnv *env) {
if (stat_clazz != NULL) {
(*env)->DeleteGlobalRef(env, stat_clazz);
stat_clazz = NULL;
}
}

static void workaround_non_threadsafe_calls_init(JNIEnv *env){
jclass obj_class = NULL;
jmethodID obj_ctor = NULL;
if (pw_lock_object != NULL) return; // Already inited

obj_class = (*env)->FindClass(env, "java/lang/Object");
if (!obj_class) {
return; // exception has been raised
Expand All @@ -233,28 +228,21 @@ static void stat_init(JNIEnv *env, jclass nativeio_class) {
if (!obj_ctor) {
return; // exception has been raised
}

if (workaround_non_threadsafe_calls(env, nativeio_class)) {
pw_lock_object = (*env)->NewObject(env, obj_class, obj_ctor);
PASS_EXCEPTIONS(env);
pw_lock_object = (*env)->NewGlobalRef(env, pw_lock_object);

PASS_EXCEPTIONS(env);
}
pw_lock_object = (*env)->NewObject(env, obj_class, obj_ctor);
PASS_EXCEPTIONS(env);
pw_lock_object = (*env)->NewGlobalRef(env, pw_lock_object);
PASS_EXCEPTIONS(env);
}

static void stat_deinit(JNIEnv *env) {
if (stat_clazz != NULL) {
(*env)->DeleteGlobalRef(env, stat_clazz);
stat_clazz = NULL;
}
static void workaround_non_threadsafe_calls_deinit(JNIEnv *env) {
if (pw_lock_object != NULL) {
(*env)->DeleteGlobalRef(env, pw_lock_object);
pw_lock_object = NULL;
}
}

static void nioe_init(JNIEnv *env) {
if (nioe_ctor != NULL) return; // Already inited
// Init NativeIOException
nioe_clazz = (*env)->FindClass(
env, "org/apache/hadoop/io/nativeio/NativeIOException");
Expand Down Expand Up @@ -349,17 +337,78 @@ static void pmem_region_deinit(JNIEnv *env) {
*/
JNIEXPORT void JNICALL
Java_org_apache_hadoop_io_nativeio_NativeIO_initNative(
JNIEnv *env, jclass clazz) {
JNIEnv *env, jclass clazz, jboolean doThreadsafeWorkaround) {
nioe_init(env);
PASS_EXCEPTIONS_GOTO(env, error);
fd_init(env);
PASS_EXCEPTIONS_GOTO(env, error);
#ifdef UNIX
errno_enum_init(env);
PASS_EXCEPTIONS_GOTO(env, error);
#endif
if (doThreadsafeWorkaround) {
workaround_non_threadsafe_calls_init(env);
PASS_EXCEPTIONS_GOTO(env, error);
}
return;
error:
// these are all idempodent and safe to call even if the
// class wasn't initted yet
nioe_deinit(env);
fd_deinit(env);
#ifdef UNIX
errno_enum_deinit(env);
#endif
if (doThreadsafeWorkaround) {
workaround_non_threadsafe_calls_deinit(env);
}
}

/*
* private static native void initNativeWindows();
*/
JNIEXPORT void JNICALL
Java_org_apache_hadoop_io_nativeio_NativeIO_00024Windows_initNativeWindows(
JNIEnv *env, jclass clazz, jboolean doThreadsafeWorkaround) {
nioe_init(env);
PASS_EXCEPTIONS_GOTO(env, error);
fd_init(env);
PASS_EXCEPTIONS_GOTO(env, error);
if (doThreadsafeWorkaround) {
workaround_non_threadsafe_calls_init(env);
PASS_EXCEPTIONS_GOTO(env, error);
}
return;
error:
// these are all idempodent and safe to call even if the
// class wasn't initted yet
nioe_deinit(env);
fd_deinit(env);
if (doThreadsafeWorkaround) {
workaround_non_threadsafe_calls_deinit(env);
}
}

/*
* private static native void initNativePosix();
*/
JNIEXPORT void JNICALL
Java_org_apache_hadoop_io_nativeio_NativeIO_00024POSIX_initNativePosix(
JNIEnv *env, jclass clazz, jboolean doThreadsafeWorkaround) {
#ifdef UNIX
consts_init(env);
PASS_EXCEPTIONS_GOTO(env, error);
#endif
stat_init(env, clazz);
stat_init(env);
PASS_EXCEPTIONS_GOTO(env, error);
nioe_init(env);
PASS_EXCEPTIONS_GOTO(env, error);
fd_init(env);
PASS_EXCEPTIONS_GOTO(env, error);
if (doThreadsafeWorkaround) {
workaround_non_threadsafe_calls_init(env);
PASS_EXCEPTIONS_GOTO(env, error);
}
#ifdef UNIX
errno_enum_init(env);
PASS_EXCEPTIONS_GOTO(env, error);
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
* <p>
* http://www.apache.org/licenses/LICENSE-2.0
* <p>
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.io.nativeio;

import static org.junit.Assume.assumeTrue;

import java.io.IOException;

import org.apache.hadoop.fs.Path;

import org.junit.Test;

/**
* Separate class to ensure forked Tests load the static blocks again.
*/
public class TestNativeIoInit {

/**
* Refer HADOOP-14451
* Scenario:
* 1. One thread calls a static method of NativeIO, which loads static block
* of NativeIo.
* 2. Second thread calls a static method of NativeIo.POSIX, which loads a
* static block of NativeIO.POSIX class
* <p>
* Expected: Loading these two static blocks separately should not result in
* deadlock.
*/
@Test(timeout = 10000)
public void testDeadlockLinux() throws Exception {
Thread one = new Thread() {
@Override
public void run() {
NativeIO.isAvailable();
}
};
Thread two = new Thread() {
@Override
public void run() {
NativeIO.POSIX.isAvailable();
}
};
two.start();
one.start();
one.join();
two.join();
}

@Test(timeout = 10000)
public void testDeadlockWindows() throws Exception {
assumeTrue("Expected windows", Path.WINDOWS);
Thread one = new Thread() {
@Override
public void run() {
NativeIO.isAvailable();
}
};
Thread two = new Thread() {
@Override
public void run() {
try {
NativeIO.Windows.extendWorkingSetSize(100);
} catch (IOException e) {
//igored
}
}
};
two.start();
one.start();
one.join();
two.join();
}
}