Skip to content

sysbox-fs fuse-related deadlock #1002

@okhowang

Description

@okhowang

kernel version 6.6
sysbox version 0.6.7
docker version 28.5.1

all command docker run --runtime sysbox-runc will be stucked.

      1 3985280 3985280    8257 ?             -1 Sl       0   0:01 /usr/bin/containerd-shim-runc-v2 -namespace moby -id 20a69783071597167d651dc3fa0ab9cb584d38f9c107ae79d759488b69594306 -address /run
3985280 3985290 3985280    8257 ?             -1 Sl       0   0:00  \_ /usr/bin/sysbox-runc --root /var/run/docker/runtime-runc/moby --log /run/containerd/io.containerd.runtime.v2.task/moby/20a69783
3985290 3985321 3985321 3985321 ?             -1 Ds   100000   0:00      \_ /usr/bin/sysbox-runc init
      1 3986699 3986699    8257 ?             -1 Sl       0   0:01 /usr/bin/containerd-shim-runc-v2 -namespace moby -id 1130aa43e4bd489b7b2c869761564025ef8d2c916859283d401fabf0d1e2c028 -address /run
3986699 3986714 3986699    8257 ?             -1 Sl       0   0:00  \_ /usr/bin/sysbox-runc --root /var/run/docker/runtime-runc/moby --log /run/containerd/io.containerd.runtime.v2.task/moby/1130aa43
3986714 3986767 3986767 3986767 ?             -1 Ds   100000   0:00      \_ /usr/bin/sysbox-runc init
      1 3986821 3986821    8257 ?             -1 Sl       0   0:01 /usr/bin/containerd-shim-runc-v2 -namespace moby -id cc86c620e705498d46091820904c7dc8eafd9ae2fed3d90a4ffd44073a9fa13b -address /run
3986821 3986831 3986821    8257 ?             -1 Sl       0   0:00  \_ /usr/bin/sysbox-runc --root /var/run/docker/runtime-runc/moby --log /run/containerd/io.containerd.runtime.v2.task/moby/cc86c620
3986831 3986859 3986859 3986859 ?             -1 Ds   100000   0:00      \_ /usr/bin/sysbox-runc init

I found sysbox-fs was blocked on nsenter

      1    8940    8940    8940 ?             -1 Ssl      0 248:07 /usr/bin/sysbox-fs
   8940 3942647 3942647 3942647 ?             -1 Ds   100000   0:00  \_ /usr/bin/sysbox-fs nsenter

and nsenter blocked in kernel

crash> bt 3942647
PID: 3942647  TASK: ffff91fb91b70000  CPU: 28   COMMAND: "runc:[2:INIT]"
 #0 [ffffa5c9724579b8] __schedule at ffffffff8fcccabf
 #1 [ffffa5c972457a20] schedule at ffffffff8fcccf43
 #2 [ffffa5c972457a38] request_wait_answer at ffffffffc088cef0 [fuse]
 #3 [ffffa5c972457a90] fuse_simple_request at ffffffffc088e45c [fuse]
 #4 [ffffa5c972457ac0] fuse_lookup_name at ffffffffc0890e18 [fuse]
 #5 [ffffa5c972457b98] fuse_lookup at ffffffffc0890fe6 [fuse]
 #6 [ffffa5c972457c68] __lookup_slow at ffffffff8f449cc5
 #7 [ffffa5c972457cc0] walk_component at ffffffff8f44b64b
 #8 [ffffa5c972457cf0] link_path_walk at ffffffff8f44c336
 #9 [ffffa5c972457d50] path_openat at ffffffff8f44ef27
#10 [ffffa5c972457da8] do_filp_open at ffffffff8f451014
#11 [ffffa5c972457ec8] do_sys_openat2 at ffffffff8f437101
#12 [ffffa5c972457f10] __x64_sys_openat at ffffffff8f4375ba
#13 [ffffa5c972457f38] do_syscall_64 at ffffffff8fcc1f5f
#14 [ffffa5c972457f50] entry_SYSCALL_64_after_hwframe at ffffffff8fe00130
    RIP: 000000000040e56e  RSP: 000000c0004fcbf0  RFLAGS: 00000212
    RAX: ffffffffffffffda  RBX: ffffffffffffff9c  RCX: 000000000040e56e
    RDX: 0000000000080000  RSI: 000000c000351400  RDI: ffffffffffffff9c
    RBP: 000000c0004fcc30   R8: 0000000000000000   R9: 0000000000000000
    R10: 0000000000000000  R11: 0000000000000212  R12: 000000c000351400
    R13: 0000000000000100  R14: 000000c000002380  R15: 000000c0004d4a50
    ORIG_RAX: 0000000000000101  CS: 0033  SS: 002b

sysbox-fs seem like doing a nsenter request

(dlv) bt
 0  0x000000000040e56e in internal/runtime/syscall.Syscall6
    at internal/runtime/syscall/asm_linux_amd64.s:36
 1  0x00000000004dacad in syscall.RawSyscall6
    at syscall/syscall_linux.go:64
 2  0x00000000004dad06 in syscall.Syscall
    at syscall/syscall_linux.go:86
 3  0x00000000004d8f18 in syscall.read
    at syscall/zsyscall_linux_amd64.go:736
 4  0x00000000004fa54e in syscall.Read
    at syscall/syscall_unix.go:183
 5  0x00000000004fa54e in internal/poll.ignoringEINTRIO
    at internal/poll/fd_unix.go:745
 6  0x00000000004fa54e in internal/poll.(*FD).Read
    at internal/poll/fd_unix.go:161
 7  0x000000000050610f in os.(*File).read
    at os/file_posix.go:29
 8  0x000000000050610f in os.(*File).Read
    at os/file.go:124
 9  0x0000000000569968 in encoding/json.(*Decoder).refill
    at encoding/json/stream.go:165
10  0x0000000000569565 in encoding/json.(*Decoder).readValue
    at encoding/json/stream.go:140
11  0x0000000000569315 in encoding/json.(*Decoder).Decode
    at encoding/json/stream.go:63
12  0x000000000092c0ee in github.com/nestybox/sysbox-fs/nsenter.(*NSenterEvent).processResponse
    at github.com/nestybox/sysbox-fs/nsenter/event.go:153
13  0x000000000092f045 in github.com/nestybox/sysbox-fs/nsenter.(*NSenterEvent).SendRequest
    at github.com/nestybox/sysbox-fs/nsenter/event.go:657
14  0x00000000009363c2 in github.com/nestybox/sysbox-fs/nsenter.(*nsenterService).SendRequestEvent
    at github.com/nestybox/sysbox-fs/nsenter/eventService.go:66
15  0x000000000093cef3 in github.com/nestybox/sysbox-fs/seccomp.(*mountSyscallInfo).processOverlayMount
    at github.com/nestybox/sysbox-fs/seccomp/mount.go:474
16  0x000000000093adc5 in github.com/nestybox/sysbox-fs/seccomp.(*mountSyscallInfo).process
    at github.com/nestybox/sysbox-fs/seccomp/mount.go:77
17  0x00000000009496ff in github.com/nestybox/sysbox-fs/seccomp.(*syscallTracer).processMount
    at github.com/nestybox/sysbox-fs/seccomp/tracer.go:663
18  0x0000000000948512 in github.com/nestybox/sysbox-fs/seccomp.(*syscallTracer).processSyscall
    at github.com/nestybox/sysbox-fs/seccomp/tracer.go:502
19  0x00000000009482b0 in github.com/nestybox/sysbox-fs/seccomp.(*syscallTracer).process
    at github.com/nestybox/sysbox-fs/seccomp/tracer.go:467
20  0x00000000009481ef in github.com/nestybox/sysbox-fs/seccomp.(*syscallTracer).connHandler.gowrap1
    at github.com/nestybox/sysbox-fs/seccomp/tracer.go:448
21  0x00000000004824e1 in runtime.goexit
    at runtime/asm_amd64.s:1700
(dlv) p e.ReqMsg
("*github.com/nestybox/sysbox-fs/domain.NSenterMessage")(0xc00178db80)
*github.com/nestybox/sysbox-fs/domain.NSenterMessage {
        Type: "mountSyscallRequest",
        Payload: interface {}(*[]*github.com/nestybox/sysbox-fs/domain.MountSyscallPayload) *[
                *(*"github.com/nestybox/sysbox-fs/domain.MountSyscallPayload")(0xc001dab580),
        ],}
(dlv) p *(*"github.com/nestybox/sysbox-fs/domain.MountSyscallPayload")(0xc001dab580)
github.com/nestybox/sysbox-fs/domain.MountSyscallPayload {
        Header: github.com/nestybox/sysbox-fs/domain.NSenterMsgHeader {
                Pid: 3869668,
                Uid: 100000,
                Gid: 100000,
                Root: "/",
                Cwd: "/",
                Capabilities: [2]uint32 [4294967295,511],},
        Mount: github.com/nestybox/sysbox-fs/domain.Mount {
                Source: "overlay",
                Target: "/var/lib/docker/buildkit/containerd-overlayfs/cachemounts/buildk...+12 more",
                FsType: "overlay",
                Flags: 0,
                Data: "userxattr,index=off,workdir=/var/lib/docker/containerd/daemon/io...+690 more",},}

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type
    No fields configured for issues without a type.

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions