Skip to content

Commit 76592db

Browse files
authored
Close eventfd shutdown/wakeup race by closely tracking epoll edges (#9586)
Motivation This is another iteration of #9476. Modifications Instead of maintaining a count of all writes performed and then using reads during shutdown to ensure all are accounted for, just set a flag after each write and don't reset it until the corresponding event has been returned from epoll_wait. This requires that while a write is still pending we don't reset wakenUp, i.e. continue to block writes from the wakeup() method. Result Race condition eliminated. Fixes #9362 Co-authored-by: Norman Maurer <norman_maurer@apple.com>
1 parent 0a2d85f commit 76592db

File tree

3 files changed

+88
-59
lines changed

3 files changed

+88
-59
lines changed

transport-native-epoll/src/main/c/netty_epoll_native.c

+16-41
Original file line numberDiff line numberDiff line change
@@ -197,9 +197,8 @@ static void netty_epoll_native_timerFdSetTime(JNIEnv* env, jclass clazz, jint ti
197197
}
198198
}
199199

200-
static jint netty_epoll_native_epollWaitNoTimeout(JNIEnv* env, jclass clazz, jint efd, jlong address, jint len, jboolean immediatePoll) {
200+
static jint netty_epoll_native_epollWait(JNIEnv* env, jclass clazz, jint efd, jlong address, jint len, jint timeout) {
201201
struct epoll_event *ev = (struct epoll_event*) (intptr_t) address;
202-
const int timeout = immediatePoll ? 0 : -1;
203202
int result, err;
204203

205204
do {
@@ -213,47 +212,23 @@ static jint netty_epoll_native_epollWaitNoTimeout(JNIEnv* env, jclass clazz, jin
213212

214213
// This method is deprecated!
215214
static jint netty_epoll_native_epollWait0(JNIEnv* env, jclass clazz, jint efd, jlong address, jint len, jint timerFd, jint tvSec, jint tvNsec) {
216-
struct epoll_event *ev = (struct epoll_event*) (intptr_t) address;
217-
int result, err;
218-
219215
if (tvSec == 0 && tvNsec == 0) {
220216
// Zeros = poll (aka return immediately).
221-
do {
222-
result = epoll_wait(efd, ev, len, 0);
223-
if (result >= 0) {
224-
return result;
225-
}
226-
} while((err = errno) == EINTR);
227-
} else {
228-
// only reschedule the timer if there is a newer event.
229-
// -1 is a special value used by EpollEventLoop.
230-
if (tvSec != ((jint) -1) && tvNsec != ((jint) -1)) {
231-
struct itimerspec ts;
232-
memset(&ts.it_interval, 0, sizeof(struct timespec));
233-
ts.it_value.tv_sec = tvSec;
234-
ts.it_value.tv_nsec = tvNsec;
235-
if (timerfd_settime(timerFd, 0, &ts, NULL) < 0) {
236-
netty_unix_errors_throwChannelExceptionErrorNo(env, "timerfd_settime() failed: ", errno);
237-
return -1;
238-
}
239-
}
240-
do {
241-
result = epoll_wait(efd, ev, len, -1);
242-
if (result > 0) {
243-
// Detect timeout, and preserve the epoll_wait API.
244-
if (result == 1 && ev[0].data.fd == timerFd) {
245-
// We assume that timerFD is in ET mode. So we must consume this event to ensure we are notified
246-
// of future timer events because ET mode only notifies a single time until the event is consumed.
247-
uint64_t timerFireCount;
248-
// We don't care what the result is. We just want to consume the wakeup event and reset ET.
249-
result = read(timerFd, &timerFireCount, sizeof(uint64_t));
250-
return 0;
251-
}
252-
return result;
253-
}
254-
} while((err = errno) == EINTR);
217+
return netty_epoll_native_epollWait(env, clazz, efd, address, len, 0);
255218
}
256-
return -err;
219+
// only reschedule the timer if there is a newer event.
220+
// -1 is a special value used by EpollEventLoop.
221+
if (tvSec != ((jint) -1) && tvNsec != ((jint) -1)) {
222+
struct itimerspec ts;
223+
memset(&ts.it_interval, 0, sizeof(struct timespec));
224+
ts.it_value.tv_sec = tvSec;
225+
ts.it_value.tv_nsec = tvNsec;
226+
if (timerfd_settime(timerFd, 0, &ts, NULL) < 0) {
227+
netty_unix_errors_throwChannelExceptionErrorNo(env, "timerfd_settime() failed: ", errno);
228+
return -1;
229+
}
230+
}
231+
return netty_epoll_native_epollWait(env, clazz, efd, address, len, -1);
257232
}
258233

259234
static inline void cpu_relax() {
@@ -524,7 +499,7 @@ static const JNINativeMethod fixed_method_table[] = {
524499
{ "timerFdSetTime", "(III)V", (void *) netty_epoll_native_timerFdSetTime },
525500
{ "epollCreate", "()I", (void *) netty_epoll_native_epollCreate },
526501
{ "epollWait0", "(IJIIII)I", (void *) netty_epoll_native_epollWait0 }, // This method is deprecated!
527-
{ "epollWaitNoTimeout", "(IJIZ)I", (void *) netty_epoll_native_epollWaitNoTimeout },
502+
{ "epollWait", "(IJII)I", (void *) netty_epoll_native_epollWait },
528503
{ "epollBusyWait0", "(IJI)I", (void *) netty_epoll_native_epollBusyWait0 },
529504
{ "epollCtlAdd0", "(III)I", (void *) netty_epoll_native_epollCtlAdd0 },
530505
{ "epollCtlMod0", "(III)I", (void *) netty_epoll_native_epollCtlMod0 },

transport-native-epoll/src/main/java/io/netty/channel/epoll/EpollEventLoop.java

+63-16
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@
3636
import java.util.BitSet;
3737
import java.util.Queue;
3838
import java.util.concurrent.Executor;
39-
import java.util.concurrent.atomic.AtomicIntegerFieldUpdater;
39+
import java.util.concurrent.atomic.AtomicInteger;
4040

4141
import static java.lang.Math.min;
4242

@@ -45,8 +45,6 @@
4545
*/
4646
class EpollEventLoop extends SingleThreadEventLoop {
4747
private static final InternalLogger logger = InternalLoggerFactory.getInstance(EpollEventLoop.class);
48-
private static final AtomicIntegerFieldUpdater<EpollEventLoop> WAKEN_UP_UPDATER =
49-
AtomicIntegerFieldUpdater.newUpdater(EpollEventLoop.class, "wakenUp");
5048

5149
static {
5250
// Ensure JNI is initialized by the time this class is loaded by this time!
@@ -76,8 +74,8 @@ public int get() throws Exception {
7674
return epollWaitNow();
7775
}
7876
};
79-
@SuppressWarnings("unused") // AtomicIntegerFieldUpdater
80-
private volatile int wakenUp;
77+
private final AtomicInteger wakenUp = new AtomicInteger(1);
78+
private boolean pendingWakeup;
8179
private volatile int ioRatio = 50;
8280

8381
// See http://man7.org/linux/man-pages/man2/timerfd_create.2.html.
@@ -180,7 +178,7 @@ NativeDatagramPacketArray cleanDatagramPacketArray() {
180178

181179
@Override
182180
protected void wakeup(boolean inEventLoop) {
183-
if (!inEventLoop && WAKEN_UP_UPDATER.getAndSet(this, 1) == 0) {
181+
if (!inEventLoop && wakenUp.getAndSet(1) == 0) {
184182
// write to the evfd which will then wake-up epoll_wait(...)
185183
Native.eventFdWrite(eventFd.intValue(), 1L);
186184
}
@@ -307,13 +305,18 @@ private int epollWait() throws IOException {
307305
}
308306

309307
private int epollWaitNow() throws IOException {
310-
return Native.epollWait(epollFd, events, timerFd, 0, 0);
308+
return Native.epollWait(epollFd, events, true);
311309
}
312310

313311
private int epollBusyWait() throws IOException {
314312
return Native.epollBusyWait(epollFd, events);
315313
}
316314

315+
private int epollWaitTimeboxed() throws IOException {
316+
// Wait with 1 second "safeguard" timeout
317+
return Native.epollWait(epollFd, events, 1000);
318+
}
319+
317320
@Override
318321
protected void run() {
319322
for (;;) {
@@ -329,11 +332,34 @@ protected void run() {
329332
break;
330333

331334
case SelectStrategy.SELECT:
332-
if (wakenUp == 1) {
333-
wakenUp = 0;
335+
if (pendingWakeup) {
336+
// We are going to be immediately woken so no need to reset wakenUp
337+
// or check for timerfd adjustment.
338+
strategy = epollWaitTimeboxed();
339+
if (strategy != 0) {
340+
break;
341+
}
342+
// We timed out so assume that we missed the write event due to an
343+
// abnormally failed syscall (the write itself or a prior epoll_wait)
344+
logger.warn("Missed eventfd write (not seen after > 1 second)");
345+
pendingWakeup = false;
346+
if (hasTasks()) {
347+
break;
348+
}
349+
// fall-through
334350
}
335-
if (!hasTasks()) {
336-
strategy = epollWait();
351+
352+
wakenUp.set(0);
353+
try {
354+
if (!hasTasks()) {
355+
strategy = epollWait();
356+
}
357+
} finally {
358+
// Try get() first to avoid much more expensive CAS in the case we
359+
// were woken via the wakeup() method (submitted task)
360+
if (wakenUp.get() == 1 || wakenUp.getAndSet(1) == 1) {
361+
pendingWakeup = true;
362+
}
337363
}
338364
// fallthrough
339365
default:
@@ -417,7 +443,9 @@ private void closeAll() {
417443
private void processReady(EpollEventArray events, int ready) {
418444
for (int i = 0; i < ready; i ++) {
419445
final int fd = events.fd(i);
420-
if (fd == eventFd.intValue() || fd == timerFd.intValue()) {
446+
if (fd == eventFd.intValue()) {
447+
pendingWakeup = false;
448+
} else if (fd == timerFd.intValue()) {
421449
// Just ignore as we use ET mode for the eventfd and timerfd.
422450
//
423451
// See also https://stackoverflow.com/a/12492308/1074097
@@ -479,10 +507,23 @@ private void processReady(EpollEventArray events, int ready) {
479507
@Override
480508
protected void cleanup() {
481509
try {
482-
try {
483-
epollFd.close();
484-
} catch (IOException e) {
485-
logger.warn("Failed to close the epoll fd.", e);
510+
// Ensure any in-flight wakeup writes have been performed prior to closing eventFd.
511+
while (pendingWakeup) {
512+
try {
513+
int count = epollWaitTimeboxed();
514+
if (count == 0) {
515+
// We timed-out so assume that the write we're expecting isn't coming
516+
break;
517+
}
518+
for (int i = 0; i < count; i++) {
519+
if (events.fd(i) == eventFd.intValue()) {
520+
pendingWakeup = false;
521+
break;
522+
}
523+
}
524+
} catch (IOException ignore) {
525+
// ignore
526+
}
486527
}
487528
try {
488529
eventFd.close();
@@ -494,6 +535,12 @@ protected void cleanup() {
494535
} catch (IOException e) {
495536
logger.warn("Failed to close the timer fd.", e);
496537
}
538+
539+
try {
540+
epollFd.close();
541+
} catch (IOException e) {
542+
logger.warn("Failed to close the epoll fd.", e);
543+
}
497544
} finally {
498545
// release native memory
499546
if (iovArray != null) {

transport-native-epoll/src/main/java/io/netty/channel/epoll/Native.java

+9-2
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,14 @@ public static int epollWait(FileDescriptor epollFd, EpollEventArray events, File
107107
}
108108

109109
static int epollWait(FileDescriptor epollFd, EpollEventArray events, boolean immediatePoll) throws IOException {
110-
int ready = epollWaitNoTimeout(epollFd.intValue(), events.memoryAddress(), events.length(), immediatePoll);
110+
return epollWait(epollFd, events, immediatePoll ? 0 : -1);
111+
}
112+
113+
/**
114+
* This uses epoll's own timeout and does not reset/re-arm any timerfd
115+
*/
116+
static int epollWait(FileDescriptor epollFd, EpollEventArray events, int timeoutMillis) throws IOException {
117+
int ready = epollWait(epollFd.intValue(), events.memoryAddress(), events.length(), timeoutMillis);
111118
if (ready < 0) {
112119
throw newIOException("epoll_wait", ready);
113120
}
@@ -128,7 +135,7 @@ public static int epollBusyWait(FileDescriptor epollFd, EpollEventArray events)
128135
}
129136

130137
private static native int epollWait0(int efd, long address, int len, int timerFd, int timeoutSec, int timeoutNs);
131-
private static native int epollWaitNoTimeout(int efd, long address, int len, boolean immediatePoll);
138+
private static native int epollWait(int efd, long address, int len, int timeout);
132139
private static native int epollBusyWait0(int efd, long address, int len);
133140

134141
public static void epollCtlAdd(int efd, final int fd, final int flags) throws IOException {

0 commit comments

Comments
 (0)