Open
Description
I decided to look at the generated assembly for the left_child function (inlining everything it calls):
example::left_child:
push rbp
mov rbp, rsp
xor eax, eax
test dil, 1
jne .LBB0_2
mov rdx, rdi
pop rbp
ret
.LBB0_2:
mov rdx, rdi
mov rcx, rdx
.LBB0_3:
shr rcx
add rax, 1
test dl, 2
mov rdx, rcx
jne .LBB0_3
test rax, rax
je .LBB0_5
lea ecx, [rax + 1]
shr rdi, cl
add rdi, rdi
lea rdx, [rax - 1]
mov ecx, eax
shl rdi, cl
mov esi, 1
mov ecx, edx
shl rsi, cl
mov eax, 1
add rsi, -1
or rdi, rsi
mov rdx, rdi
pop rbp
ret
.LBB0_5:
mov eax, 1
mov rdx, rdi
pop rbp
ret
By rewriting it to store each level in its entirety consecutively (level 1, then 2, then 3, etc), I got this (since then changed, will update later, but wasn't much longer):
example::left_child:
push rbp
mov rbp, rsp
lea rax, [rdi + rdi]
add rax, -1
pop rbp
ret
I understand that the algorithm is taken from the JS project, but I still think that this algorithm is quite slow...
Metadata
Metadata
Assignees
Labels
No labels