Skip to content

Commit 89d15a8

Browse files
committed
linux: futex v2 API updates
* `futex2_waitv` always takes a 64-bit timespec. Perhaps the `kernel_timespec` should be renamed `timespec64`? Its used in iouring, too. * Add `packed struct` for futex v2 flags and parameters. * Add very basic "tests" for the futex v2 syscalls (just to ensure the code compiles). * Update the stale or broken comments. (I could also just delete these they're not really documenting Zig-specific behavior.) Given that the futex2 APIs are not used by Zig's library (they're a bit too new), and the fact that these are very specialized syscalls, and they currently provide no benefit over the existing v1 API, I wonder if instead of fixing these up, we should just replace them with a stub that says 'use a 3rd party library'.
1 parent cfe5def commit 89d15a8

File tree

2 files changed

+219
-67
lines changed

2 files changed

+219
-67
lines changed

lib/std/os/linux.zig

Lines changed: 89 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -703,58 +703,57 @@ pub fn futex_4arg(uaddr: *const anyopaque, futex_op: FUTEX_OP, val: u32, timeout
703703
return syscall4(.futex, @intFromPtr(uaddr), @as(u32, @bitCast(futex_op)), val, @intFromPtr(timeout));
704704
}
705705

706-
/// Given an array of `futex_waitv`, wait on each uaddr.
706+
/// Given an array of `futex2_waitone`, wait on each uaddr.
707707
/// The thread wakes if a futex_wake() is performed at any uaddr.
708-
/// The syscall returns immediately if any waiter has *uaddr != val.
709-
/// timeout is an optional timeout value for the operation.
710-
/// Each waiter has individual flags.
711-
/// The `flags` argument for the syscall should be used solely for specifying
712-
/// the timeout as realtime, if needed.
713-
/// Flags for private futexes, sizes, etc. should be used on the
714-
/// individual flags of each waiter.
708+
/// The syscall returns immediately if any futex has *uaddr != val.
709+
/// timeout is an optional, absolute timeout value for the operation.
710+
/// The `flags` argument is for future use and currently should be `.{}`.
711+
/// Flags for private futexes, sizes, etc. should be set on the
712+
/// individual flags of each `futex2_waitone`.
715713
///
716714
/// Returns the array index of one of the woken futexes.
717715
/// No further information is provided: any number of other futexes may also
718716
/// have been woken by the same event, and if more than one futex was woken,
719717
/// the returned index may refer to any one of them.
720718
/// (It is not necessaryily the futex with the smallest index, nor the one
721719
/// most recently woken, nor...)
720+
///
721+
/// Requires at least kernel v5.16.
722722
pub fn futex2_waitv(
723-
/// List of futexes to wait on.
724-
waiters: [*]futex_waitv,
725-
/// Length of `waiters`.
723+
futexes: [*]const futex2_waitone,
724+
/// Length of `futexes`. Max of FUTEX2_WAITONE_MAX.
726725
nr_futexes: u32,
727-
/// Flag for timeout (monotonic/realtime).
728-
flags: u32,
729-
/// Optional absolute timeout.
730-
timeout: ?*const timespec,
726+
flags: FUTEX2_FLAGS_WAITV,
727+
/// Optional absolute timeout. Always 64-bit, even on 32-bit platforms.
728+
timeout: ?*const kernel_timespec,
731729
/// Clock to be used for the timeout, realtime or monotonic.
732730
clockid: clockid_t,
733731
) usize {
734732
return syscall5(
735733
.futex_waitv,
736-
@intFromPtr(waiters),
734+
@intFromPtr(futexes),
737735
nr_futexes,
738-
flags,
736+
@as(u32, @bitCast(flags)),
739737
@intFromPtr(timeout),
740-
@bitCast(@as(isize, @intFromEnum(clockid))),
738+
@intFromEnum(clockid),
741739
);
742740
}
743741

744-
/// Wait on a futex.
745-
/// Identical to the traditional `FUTEX.FUTEX_WAIT_BITSET` op, except it is part of the
746-
/// futex2 familiy of calls.
742+
/// Wait on a single futex.
743+
/// Identical to the futex v1 `FUTEX.FUTEX_WAIT_BITSET` op, except it is part of the
744+
/// futex2 family of calls.
745+
///
746+
/// Requires at least kernel v6.7.
747747
pub fn futex2_wait(
748748
/// Address of the futex to wait on.
749749
uaddr: *const anyopaque,
750750
/// Value of `uaddr`.
751751
val: usize,
752-
/// Bitmask.
752+
/// Bitmask to match against incoming wakeup masks. Must not be zero.
753753
mask: usize,
754-
/// `FUTEX2` flags.
755-
flags: u32,
756-
/// Optional absolute timeout.
757-
timeout: ?*const timespec,
754+
flags: FUTEX2_FLAGS,
755+
/// Optional absolute timeout. Always 64-bit, even on 32-bit platforms.
756+
timeout: ?*const kernel_timespec,
758757
/// Clock to be used for the timeout, realtime or monotonic.
759758
clockid: clockid_t,
760759
) usize {
@@ -763,52 +762,55 @@ pub fn futex2_wait(
763762
@intFromPtr(uaddr),
764763
val,
765764
mask,
766-
flags,
765+
@as(u32, @bitCast(flags)),
767766
@intFromPtr(timeout),
768-
@bitCast(@as(isize, @intFromEnum(clockid))),
767+
@intFromEnum(clockid),
769768
);
770769
}
771770

772-
/// Wake a number of futexes.
773-
/// Identical to the traditional `FUTEX.FUTEX_WAIT_BITSET` op, except it is part of the
771+
/// Wake (subset of) waiters on given futex.
772+
/// Identical to the traditional `FUTEX.FUTEX_WAKE_BITSET` op, except it is part of the
774773
/// futex2 family of calls.
774+
///
775+
/// Requires at least kernel v6.7.
775776
pub fn futex2_wake(
776-
/// Address of the futex(es) to wake.
777+
/// Futex to wake
777778
uaddr: *const anyopaque,
778-
/// Bitmask
779+
/// Bitmask to match against waiters.
779780
mask: usize,
780-
/// Number of the futexes to wake.
781-
nr: i32,
782-
/// `FUTEX2` flags.
783-
flags: u32,
781+
/// Maximum number of waiters on the futex to wake.
782+
nr_wake: i32,
783+
flags: FUTEX2_FLAGS,
784784
) usize {
785785
return syscall4(
786786
.futex_wake,
787787
@intFromPtr(uaddr),
788788
mask,
789-
@bitCast(@as(isize, nr)),
790-
flags,
789+
@as(u32, @bitCast(nr_wake)),
790+
@as(u32, @bitCast(flags)),
791791
);
792792
}
793793

794-
/// Requeue a waiter from one futex to another.
794+
/// Wake and/or requeue waiter(s) from one futex to another.
795795
/// Identical to `FUTEX.CMP_REQUEUE`, except it is part of the futex2 family of calls.
796+
///
797+
/// Requires at least kernel v6.7.
796798
pub fn futex2_requeue(
797-
/// Array describing the source and destination futex.
798-
waiters: [*]futex_waitv,
799-
/// Unused.
800-
flags: u32,
801-
/// Number of futexes to wake.
799+
/// The source and destination futexes. Must be a 2-element array.
800+
waiters: [*]const futex2_waitone,
801+
/// Currently unused.
802+
flags: FUTEX2_FLAGS_REQUEUE,
803+
/// Maximum number of waiters to wake on the source futex.
802804
nr_wake: i32,
803-
/// Number of futexes to requeue.
805+
/// Maximum number of waiters to transfer to the destination futex.
804806
nr_requeue: i32,
805807
) usize {
806808
return syscall4(
807809
.futex_requeue,
808810
@intFromPtr(waiters),
809-
flags,
810-
@bitCast(@as(isize, nr_wake)),
811-
@bitCast(@as(isize, nr_requeue)),
811+
@as(u32, @bitCast(flags)),
812+
@as(u32, @bitCast(nr_wake)),
813+
@as(u32, @bitCast(nr_requeue)),
812814
);
813815
}
814816

@@ -3407,16 +3409,6 @@ pub const FALLOC = struct {
34073409
pub const FL_UNSHARE_RANGE = 0x40;
34083410
};
34093411

3410-
pub const FUTEX2 = struct {
3411-
pub const SIZE_U8 = 0x00;
3412-
pub const SIZE_U16 = 0x01;
3413-
pub const SIZE_U32 = 0x02;
3414-
pub const SIZE_U64 = 0x03;
3415-
pub const NUMA = 0x04;
3416-
3417-
pub const PRIVATE = FUTEX.PRIVATE_FLAG;
3418-
};
3419-
34203412
// Futex v1 API commands. See futex man page for each command's
34213413
// interpretation of the futex arguments.
34223414
pub const FUTEX_COMMAND = enum(u7) {
@@ -3477,8 +3469,38 @@ pub const FUTEX_WAKE_OP_CMP = enum(u4) {
34773469
GE = 5,
34783470
};
34793471

3480-
/// Max numbers of elements in a `futex_waitv` array.
3481-
pub const FUTEX2_WAITV_MAX = 128;
3472+
/// Max numbers of elements in a `futex2_waitone` array.
3473+
pub const FUTEX2_WAITONE_MAX = 128;
3474+
3475+
/// For futex v2 API, the size of the futex at the uaddr. v1 futex are
3476+
/// always implicitly U32. As of kernel v6.14, only U32 is implemented
3477+
/// for v2 futexes.
3478+
pub const FUTEX2_SIZE = enum(u2) {
3479+
U8 = 0,
3480+
U16 = 1,
3481+
U32 = 2,
3482+
U64 = 3,
3483+
};
3484+
3485+
/// As of kernel 6.14 there are no defined flags to futex2_waitv.
3486+
pub const FUTEX2_FLAGS_WAITV = packed struct(u32) {
3487+
_reserved: u32 = 0,
3488+
};
3489+
3490+
/// As of kernel 6.14 there are no defined flags to futex2_requeue.
3491+
pub const FUTEX2_FLAGS_REQUEUE = packed struct(u32) {
3492+
_reserved: u32 = 0,
3493+
};
3494+
3495+
/// Flags for futex v2 APIs (futex2_wait, futex2_wake, futex2_requeue, but
3496+
/// not the futex2_waitv syscall, but also used in the futex2_waitone struct).
3497+
pub const FUTEX2_FLAGS = packed struct(u32) {
3498+
size: FUTEX2_SIZE,
3499+
numa: bool = false,
3500+
_reserved: u4 = 0,
3501+
private: bool,
3502+
_undefined: u24 = 0,
3503+
};
34823504

34833505
pub const PROT = struct {
34843506
/// page can not be accessed
@@ -9343,17 +9365,17 @@ pub const PTRACE = struct {
93439365
pub const GET_SYSCALL_INFO = 0x420e;
93449366
};
93459367

9346-
/// A waiter for vectorized wait.
9347-
pub const futex_waitv = extern struct {
9348-
// Expected value at uaddr
9368+
/// For futex2_waitv and futex2_requeue. Arrays of `futex2_waitone` allow
9369+
/// waiting on multiple futexes in one call.
9370+
pub const futex2_waitone = extern struct {
9371+
/// Expected value at uaddr, should match size of futex.
93499372
val: u64,
9350-
/// User address to wait on.
9373+
/// User address to wait on. Top-bits must be 0 on 32-bit.
93519374
uaddr: u64,
93529375
/// Flags for this waiter.
9353-
flags: u32,
9376+
flags: FUTEX2_FLAGS,
93549377
/// Reserved member to preserve alignment.
9355-
/// Should be 0.
9356-
__reserved: u32,
9378+
__reserved: u32 = 0,
93579379
};
93589380

93599381
pub const cache_stat_range = extern struct {

lib/std/os/linux/test.zig

Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -297,6 +297,136 @@ test "futex v1" {
297297
}
298298
}
299299

300+
comptime {
301+
std.debug.assert(2 == @as(u32, @bitCast(linux.FUTEX2_FLAGS{ .size = .U32, .private = false })));
302+
std.debug.assert(128 == @as(u32, @bitCast(linux.FUTEX2_FLAGS{ .size = @enumFromInt(0), .private = true })));
303+
}
304+
305+
test "futex2_waitv" {
306+
const locks = [_]std.atomic.Value(u32){
307+
std.atomic.Value(u32).init(1),
308+
std.atomic.Value(u32).init(1),
309+
std.atomic.Value(u32).init(1),
310+
};
311+
312+
const futexes = [_]linux.futex2_waitone{
313+
.{
314+
.val = 1,
315+
.uaddr = @intFromPtr(&locks[0].raw),
316+
.flags = .{ .size = .U32, .private = true },
317+
},
318+
.{
319+
.val = 1,
320+
.uaddr = @intFromPtr(&locks[1].raw),
321+
.flags = .{ .size = .U32, .private = true },
322+
},
323+
.{
324+
.val = 1,
325+
.uaddr = @intFromPtr(&locks[2].raw),
326+
.flags = .{ .size = .U32, .private = true },
327+
},
328+
};
329+
330+
const timeout = linux.kernel_timespec{ .sec = 0, .nsec = 2 }; // absolute timeout, so this is 1970...
331+
const rc = linux.futex2_waitv(&futexes, futexes.len, .{}, &timeout, .MONOTONIC);
332+
switch (linux.E.init(rc)) {
333+
.NOSYS => return error.SkipZigTest, // futex2_waitv added in kernel v5.16
334+
else => |err| try expectEqual(.TIMEDOUT, err),
335+
}
336+
}
337+
338+
// Futex v2 API is only supported on recent kernels (v6.7), so skip tests if the syscalls
339+
// return ENOSYS.
340+
fn futex2_skip_if_unsupported() !void {
341+
const lock: u32 = 0;
342+
const rc = linux.futex2_wake(&lock, 0, 1, .{ .size = .U32, .private = true });
343+
if (linux.E.init(rc) == .NOSYS) {
344+
return error.SkipZigTest;
345+
}
346+
}
347+
348+
test "futex2_wait" {
349+
var lock: std.atomic.Value(u32) = std.atomic.Value(u32).init(1);
350+
var rc: usize = 0;
351+
const mask = 0x1;
352+
353+
try futex2_skip_if_unsupported();
354+
355+
// The API for 8,16,64 bit futexes is defined, but as of kernel v6.14
356+
// (at least) they're not implemented.
357+
if (false) {
358+
rc = linux.futex2_wait(&lock.raw, 1, mask, .{ .size = .U8, .private = true }, null, .MONOTONIC);
359+
try expectEqual(.INVAL, linux.E.init(rc));
360+
361+
rc = linux.futex2_wait(&lock.raw, 1, mask, .{ .size = .U16, .private = true }, null, .MONOTONIC);
362+
try expectEqual(.INVAL, linux.E.init(rc));
363+
364+
rc = linux.futex2_wait(&lock.raw, 1, mask, .{ .size = .U64, .private = true }, null, .MONOTONIC);
365+
try expectEqual(.INVAL, linux.E.init(rc));
366+
}
367+
368+
const flags = linux.FUTEX2_FLAGS{ .size = .U32, .private = true };
369+
// no-wait, lock state mismatch
370+
rc = linux.futex2_wait(&lock.raw, 2, mask, flags, null, .MONOTONIC);
371+
try expectEqual(.AGAIN, linux.E.init(rc));
372+
373+
// hit timeout on wait
374+
rc = linux.futex2_wait(&lock.raw, 1, mask, flags, &.{ .sec = 0, .nsec = 2 }, .MONOTONIC);
375+
try expectEqual(.TIMEDOUT, linux.E.init(rc));
376+
377+
// timeout is absolute
378+
{
379+
var curr: linux.timespec = undefined;
380+
rc = linux.clock_gettime(.MONOTONIC, &curr); // gettime() uses platform timespec
381+
try expectEqual(0, rc);
382+
383+
// ... but futex2_wait always uses 64-bit timespec
384+
var timeout: linux.kernel_timespec = .{
385+
.sec = curr.sec,
386+
.nsec = curr.nsec + 2,
387+
};
388+
rc = linux.futex2_wait(&lock.raw, 1, mask, flags, &timeout, .MONOTONIC);
389+
try expectEqual(.TIMEDOUT, linux.E.init(rc));
390+
}
391+
392+
rc = linux.futex2_wait(&lock.raw, 1, mask, flags, &.{ .sec = 0, .nsec = 2 }, .REALTIME);
393+
try expectEqual(.TIMEDOUT, linux.E.init(rc));
394+
}
395+
396+
test "futex2_wake" {
397+
var lock: std.atomic.Value(u32) = std.atomic.Value(u32).init(1);
398+
399+
try futex2_skip_if_unsupported();
400+
401+
const rc = linux.futex2_wake(&lock.raw, 0xFF, 1, .{ .size = .U32, .private = true });
402+
try expectEqual(0, rc);
403+
}
404+
405+
test "futex2_requeue" {
406+
try futex2_skip_if_unsupported();
407+
408+
const locks = [_]std.atomic.Value(u32){
409+
std.atomic.Value(u32).init(1),
410+
std.atomic.Value(u32).init(1),
411+
};
412+
413+
const futexes = [_]linux.futex2_waitone{
414+
.{
415+
.val = 1,
416+
.uaddr = @intFromPtr(&locks[0].raw),
417+
.flags = .{ .size = .U32, .private = true },
418+
},
419+
.{
420+
.val = 1,
421+
.uaddr = @intFromPtr(&locks[1].raw),
422+
.flags = .{ .size = .U32, .private = true },
423+
},
424+
};
425+
426+
const rc = linux.futex2_requeue(&futexes, .{}, 2, 2);
427+
try expectEqual(0, rc);
428+
}
429+
300430
test {
301431
_ = linux.IoUring;
302432
}

0 commit comments

Comments
 (0)