Closed
Description
Zig Version
0.11.0-dev.1436+59d9afcb5
Steps to Reproduce and Observed Behavior
On compiler explorer with the args -O ReleaseFast -target aarch64-macos.12-none
extern fn foo() void;
extern fn foo2() void;
extern fn foo3() void;
const Op = enum {
add,
sub,
mul,
ret,
};
export fn eval(insts: [*]const u8) void {
var pc: usize = 0;
while (true) {
switch (@intToEnum(Op, insts[pc])) {
.add => {
foo();
pc += 1;
continue;
},
.sub => {
foo2();
pc += 2;
continue;
},
.mul => {
foo3();
pc += 3;
continue;
},
.ret => {
return;
},
}
}
}
Generates the following:
_eval:
stp x22, x21, [sp, #-48]!
stp x20, x19, [sp, #16]
stp x29, x30, [sp, #32]
mov x19, x0
mov x20, xzr
adrp x21, LJTI0_0@PAGE
add x21, x21, LJTI0_0@PAGEOFF
b LBB0_2
LBB0_1:
bl _foo3
mov w8, #3
add x20, x20, x8
LBB0_2:
ldrb w8, [x19, x20]
and x8, x8, #0x3
adr x9, LBB0_1
ldrb w10, [x21, x8]
add x9, x9, x10, lsl #2
br x9
LBB0_3:
bl _foo
mov w8, #1
add x20, x20, x8
b LBB0_2
LBB0_4:
bl _foo2
mov w8, #2
add x20, x20, x8
b LBB0_2
LBB0_5:
ldp x29, x30, [sp, #32]
ldp x20, x19, [sp, #16]
ldp x22, x21, [sp], #48
ret
LJTI0_0:
.byte (LBB0_3-LBB0_1)>>2
.byte (LBB0_4-LBB0_1)>>2
.byte (LBB0_1-LBB0_1)>>2
.byte (LBB0_5-LBB0_1)>>2
Expected Behavior
I expected the codegen to be similar to x86_64 which inlines the jump to the next switch case. Instead on aarch64, it does an indirect jump first and then does the table lookup. Here is the x86_64 output for comparison:
eval:
push r14
push rbx
push rax
mov r14, rdi
xor ebx, ebx
movzx eax, byte ptr [rdi + rbx]
and eax, 3
jmp qword ptr [8*rax + .LJTI0_0]
.LBB0_1:
call foo@PLT
mov eax, 1
add rbx, rax
movzx eax, byte ptr [r14 + rbx]
and eax, 3
jmp qword ptr [8*rax + .LJTI0_0]
.LBB0_2:
call foo2@PLT
mov eax, 2
add rbx, rax
movzx eax, byte ptr [r14 + rbx]
and eax, 3
jmp qword ptr [8*rax + .LJTI0_0]
.LBB0_3:
call foo3@PLT
mov eax, 3
add rbx, rax
movzx eax, byte ptr [r14 + rbx]
and eax, 3
jmp qword ptr [8*rax + .LJTI0_0]
.LBB0_4:
add rsp, 8
pop rbx
pop r14
ret
.LJTI0_0:
.quad .LBB0_1
.quad .LBB0_2
.quad .LBB0_3
.quad .LBB0_4