Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Computed goto codegen is inconsistent on aarch64. #14444

Closed
fubark opened this issue Jan 24, 2023 · 2 comments
Closed

Computed goto codegen is inconsistent on aarch64. #14444

fubark opened this issue Jan 24, 2023 · 2 comments
Labels
arch-aarch64 64-bit ARM optimization upstream An issue with a third party project that Zig uses.
Milestone

Comments

@fubark
Copy link
Contributor

fubark commented Jan 24, 2023

Zig Version

0.11.0-dev.1436+59d9afcb5

Steps to Reproduce and Observed Behavior

On compiler explorer with the args -O ReleaseFast -target aarch64-macos.12-none

extern fn foo() void;
extern fn foo2() void;
extern fn foo3() void;

const Op = enum {
    add,
    sub,
    mul,
    ret,
};

export fn eval(insts: [*]const u8) void {
    var pc: usize = 0;
    while (true) {
        switch (@intToEnum(Op, insts[pc])) {
            .add => {
                foo();
                pc += 1;
                continue;
            },
            .sub => {
                foo2();
                pc += 2;
                continue;
            },
            .mul => {
                foo3();
                pc += 3;
                continue;
            },
            .ret => {
                return;
            },
        }
    }
}

Generates the following:

_eval:
        stp     x22, x21, [sp, #-48]!
        stp     x20, x19, [sp, #16]
        stp     x29, x30, [sp, #32]
        mov     x19, x0
        mov     x20, xzr
        adrp    x21, LJTI0_0@PAGE
        add     x21, x21, LJTI0_0@PAGEOFF
        b       LBB0_2
LBB0_1:
        bl      _foo3
        mov     w8, #3
        add     x20, x20, x8
LBB0_2:
        ldrb    w8, [x19, x20]
        and     x8, x8, #0x3
        adr     x9, LBB0_1
        ldrb    w10, [x21, x8]
        add     x9, x9, x10, lsl #2
        br      x9
LBB0_3:
        bl      _foo
        mov     w8, #1
        add     x20, x20, x8
        b       LBB0_2
LBB0_4:
        bl      _foo2
        mov     w8, #2
        add     x20, x20, x8
        b       LBB0_2
LBB0_5:
        ldp     x29, x30, [sp, #32]
        ldp     x20, x19, [sp, #16]
        ldp     x22, x21, [sp], #48
        ret
LJTI0_0:
        .byte   (LBB0_3-LBB0_1)>>2
        .byte   (LBB0_4-LBB0_1)>>2
        .byte   (LBB0_1-LBB0_1)>>2
        .byte   (LBB0_5-LBB0_1)>>2

Expected Behavior

I expected the codegen to be similar to x86_64 which inlines the jump to the next switch case. Instead on aarch64, it does an indirect jump first and then does the table lookup. Here is the x86_64 output for comparison:

eval:
        push    r14
        push    rbx
        push    rax
        mov     r14, rdi
        xor     ebx, ebx
        movzx   eax, byte ptr [rdi + rbx]
        and     eax, 3
        jmp     qword ptr [8*rax + .LJTI0_0]
.LBB0_1:
        call    foo@PLT
        mov     eax, 1
        add     rbx, rax
        movzx   eax, byte ptr [r14 + rbx]
        and     eax, 3
        jmp     qword ptr [8*rax + .LJTI0_0]
.LBB0_2:
        call    foo2@PLT
        mov     eax, 2
        add     rbx, rax
        movzx   eax, byte ptr [r14 + rbx]
        and     eax, 3
        jmp     qword ptr [8*rax + .LJTI0_0]
.LBB0_3:
        call    foo3@PLT
        mov     eax, 3
        add     rbx, rax
        movzx   eax, byte ptr [r14 + rbx]
        and     eax, 3
        jmp     qword ptr [8*rax + .LJTI0_0]
.LBB0_4:
        add     rsp, 8
        pop     rbx
        pop     r14
        ret
.LJTI0_0:
        .quad   .LBB0_1
        .quad   .LBB0_2
        .quad   .LBB0_3
        .quad   .LBB0_4
@fubark fubark added the bug Observed behavior contradicts documented or intended behavior label Jan 24, 2023
@Vexu Vexu added optimization upstream An issue with a third party project that Zig uses. arch-aarch64 64-bit ARM and removed bug Observed behavior contradicts documented or intended behavior labels Jan 25, 2023
@Vexu Vexu added this to the 1.0.0 milestone Jan 25, 2023
@ifreund
Copy link
Member

ifreund commented Jan 28, 2023

Related: #8220

@xdBronch
Copy link
Contributor

xdBronch commented Sep 9, 2024

looks like #21257 fixed this
https://godbolt.org/z/31EGc8sz9

@Vexu Vexu closed this as completed Sep 9, 2024
@Vexu Vexu removed this from the 1.0.0 milestone Sep 9, 2024
@andrewrk andrewrk added this to the 0.14.0 milestone Sep 10, 2024
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
arch-aarch64 64-bit ARM optimization upstream An issue with a third party project that Zig uses.
Projects
None yet
Development

No branches or pull requests

5 participants