Skip to content

teach LLVM to optimize across async functions #4974

Open
@andrewrk

Description

@andrewrk

Here's a nice simple example:

https://godbolt.org/z/Ca9_Wd

var frame1: anyframe = undefined;
var frame2: anyframe = undefined;

export fn test1(a: i32, b: i32) i32 {
    var f = async foo(a, b);
    resume frame1;
    resume frame2;
    return noasync await f;
}

fn foo(a: i32, b: i32) i32 {
    return bar(a, b) + baz(a, b);
}

fn bar(a: i32, b: i32) i32 {
    frame1 = @frame();
    suspend;
    return a;
}

fn baz(a: i32, b: i32) i32 {
     frame2 = @frame();
    suspend;
    return b;   
}

export fn test2(a: i32, b: i32) i32 {
    return a + b;
}

With --single-threaded, test1 and test2 have identical semantics, and should produce identical machine code. Instead:

test1:
        sub     rsp, 168
        lea     rax, [rsp + 40]
        mov     qword ptr [rsp], offset foo
        mov     ecx, offset foo
        mov     qword ptr [rsp + 24], rax
        mov     dword ptr [rsp + 44], edi
        mov     dword ptr [rsp + 48], esi
        lea     rax, [rsp + 64]
        mov     dword ptr [rsp + 128], edi
        mov     dword ptr [rsp + 132], esi
        lea     rdx, [rsp + 104]
        mov     qword ptr [rsp + 96], 0
        mov     qword ptr [rsp + 64], offset bar
        mov     qword ptr [rsp + 88], rdx
        mov     dword ptr [rsp + 108], edi
        mov     dword ptr [rsp + 112], esi
        mov     edx, 2
        vmovq   xmm0, rdx
        vmovdqu xmmword ptr [rsp + 8], xmm0
        mov     qword ptr [rip + frame1], rax
        mov     qword ptr [rsp + 72], 2
        mov     dword ptr [rsp + 104], edi
        mov     rdi, rsp
        mov     rax, rdi
        not     rax
        mov     qword ptr [rsp + 80], rax
        mov     rsi, -2
        call    rcx
        mov     rdi, qword ptr [rip + frame2]
        mov     rsi, -1
        call    qword ptr [rdi]
        mov     rax, qword ptr [rsp + 24]
        mov     eax, dword ptr [rax]
        add     rsp, 168
        ret

foo:
        mov     rax, qword ptr [rdi + 8]
        cmp     rax, 3
        je      .LBB1_6
        lea     r8, [rdi + 64]
        cmp     rax, 2
        je      .LBB1_5
        mov     ecx, dword ptr [rdi + 44]
        mov     edx, dword ptr [rdi + 48]
        mov     dword ptr [rdi + 128], ecx
        mov     dword ptr [rdi + 132], edx
        lea     r9, [rdi + 104]
        mov     qword ptr [rdi + 96], 0
        mov     qword ptr [rdi + 64], offset bar
        mov     eax, offset frame1
        mov     esi, 2
        jmp     .LBB1_3
.LBB1_6:
        mov     rcx, qword ptr [rdi + 24]
        mov     eax, dword ptr [rdi + 104]
        mov     dword ptr [rdi + 148], eax
        add     eax, dword ptr [rdi + 136]
        mov     dword ptr [rdi + 152], eax
        mov     dword ptr [rcx], eax
        mov     rax, qword ptr [rdi + 16]
        mov     rdx, rax
        not     rdx
        mov     qword ptr [rdi + 16], rdx
        test    rax, rax
        je      .LBB1_4
        mov     rdx, qword ptr [rdi + 32]
        test    rdx, rdx
        je      .LBB1_9
        mov     ecx, dword ptr [rcx]
        mov     dword ptr [rdx], ecx
.LBB1_9:
        mov     rcx, qword ptr [rax]
        mov     rdi, rax
        mov     rsi, -2
        jmp     rcx
.LBB1_5:
        lea     r9, [rdi + 104]
        mov     eax, dword ptr [rdi + 104]
        mov     dword ptr [rdi + 136], eax
        mov     ecx, dword ptr [rdi + 44]
        mov     edx, dword ptr [rdi + 48]
        mov     dword ptr [rdi + 140], ecx
        mov     dword ptr [rdi + 144], edx
        mov     qword ptr [rdi + 96], 0
        mov     qword ptr [rdi + 64], offset baz
        mov     eax, offset frame2
        mov     esi, 3
.LBB1_3:
        mov     qword ptr [rdi + 80], rdi
        mov     qword ptr [rdi + 88], r9
        mov     dword ptr [rdi + 108], ecx
        mov     dword ptr [rdi + 112], edx
        mov     qword ptr [rdi + 8], rsi
        mov     qword ptr [rax], r8
        mov     qword ptr [rdi + 72], 2
.LBB1_4:
        ret

test2:
        lea     eax, [rdi + rsi]
        ret

bar:
        cmp     qword ptr [rdi + 8], 0
        je      .LBB3_5
        mov     rcx, qword ptr [rdi + 24]
        mov     eax, dword ptr [rdi + 44]
        mov     dword ptr [rcx], eax
        mov     rax, qword ptr [rdi + 16]
        mov     rdx, rax
        not     rdx
        mov     qword ptr [rdi + 16], rdx
        test    rax, rax
        je      .LBB3_6
        mov     rdx, qword ptr [rdi + 32]
        test    rdx, rdx
        je      .LBB3_4
        mov     ecx, dword ptr [rcx]
        mov     dword ptr [rdx], ecx
.LBB3_4:
        mov     rcx, qword ptr [rax]
        mov     rdi, rax
        mov     rsi, -2
        jmp     rcx
.LBB3_5:
        mov     qword ptr [rip + frame1], rdi
        mov     qword ptr [rdi + 8], 2
.LBB3_6:
        ret

baz:
        cmp     qword ptr [rdi + 8], 0
        je      .LBB4_5
        mov     rcx, qword ptr [rdi + 24]
        mov     eax, dword ptr [rdi + 48]
        mov     dword ptr [rcx], eax
        mov     rax, qword ptr [rdi + 16]
        mov     rdx, rax
        not     rdx
        mov     qword ptr [rdi + 16], rdx
        test    rax, rax
        je      .LBB4_6
        mov     rdx, qword ptr [rdi + 32]
        test    rdx, rdx
        je      .LBB4_4
        mov     ecx, dword ptr [rcx]
        mov     dword ptr [rdx], ecx
.LBB4_4:
        mov     rcx, qword ptr [rax]
        mov     rdi, rax
        mov     rsi, -2
        jmp     rcx
.LBB4_5:
        mov     qword ptr [rip + frame2], rdi
        mov     qword ptr [rdi + 8], 2
.LBB4_6:
        ret

Metadata

Metadata

Assignees

No one assigned

    Labels

    contributor friendlyThis issue is limited in scope and/or knowledge of Zig internals.optimizationupstreamAn issue with a third party project that Zig uses.

    Type

    No type

    Projects

    No projects

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions