Skip to content

Badly optimized small bytes array search #75659

Closed
@leonardo-m

Description

@leonardo-m
type T = u8;

pub fn foo1(x: T, data: &[T; 1]) -> bool {
    data.contains(&x)
}

pub fn foo2(x: T, data: &[T; 2]) -> bool {
    data.contains(&x)
}

pub fn foo3(x: T, data: &[T; 3]) -> bool {
    data.contains(&x)
}

pub fn foo4(x: T, data: &[T; 4]) -> bool {
    data.contains(&x)
}

pub fn foo16(x: T, data: &[T; 16]) -> bool {
    data.contains(&x)
}

When T is u16, u32, u64 or u128 the generated code is reasonable, like for u32 (rustc 1.47.0-nightly 7e6d6e5 2020-08-16, using -C opt-level=3 and more):

example::foo1:
        cmp     dword ptr [rsi], edi
        sete    al
        ret

example::foo2:
        cmp     dword ptr [rsi], edi
        sete    cl
        cmp     dword ptr [rsi + 4], edi
        sete    al
        or      al, cl
        ret

example::foo3:
        cmp     dword ptr [rsi], edi
        jne     .LBB2_2
        mov     al, 1
        ret
.LBB2_2:
        cmp     dword ptr [rsi + 4], edi
        sete    cl
        cmp     dword ptr [rsi + 8], edi
        sete    al
        or      al, cl
        ret

example::foo4:
        cmp     dword ptr [rsi], edi
        je      .LBB3_3
        cmp     dword ptr [rsi + 4], edi
        jne     .LBB3_2
.LBB3_3:
        mov     al, 1
        ret
.LBB3_2:
        cmp     dword ptr [rsi + 8], edi
        sete    cl
        cmp     dword ptr [rsi + 12], edi
        sete    al
        or      al, cl
        ret

example::foo16:
        xor     eax, eax
.LBB4_1:
        cmp     rax, 64
        je      .LBB4_2
        cmp     dword ptr [rsi + rax], edi
        lea     rax, [rax + 4]
        jne     .LBB4_1
        mov     al, 1
        ret
.LBB4_2:
        xor     eax, eax
        ret

But when T is u8 or i8 it looks sub-optional for small arrays, because for such tiny arrays the overhead of calling another functions is bad:

example::foo1:
        push    rax
        mov     byte ptr [rsp + 7], dil
        lea     rdi, [rsp + 7]
        mov     edx, 1
        call    qword ptr [rip + <u8 as core::slice::SliceContains>::slice_contains@GOTPCREL]
        pop     rcx
        ret

example::foo2:
        push    rax
        mov     byte ptr [rsp + 7], dil
        lea     rdi, [rsp + 7]
        mov     edx, 2
        call    qword ptr [rip + <u8 as core::slice::SliceContains>::slice_contains@GOTPCREL]
        pop     rcx
        ret

example::foo3:
        push    rax
        mov     byte ptr [rsp + 7], dil
        lea     rdi, [rsp + 7]
        mov     edx, 3
        call    qword ptr [rip + <u8 as core::slice::SliceContains>::slice_contains@GOTPCREL]
        pop     rcx
        ret

example::foo4:
        push    rax
        mov     byte ptr [rsp + 7], dil
        lea     rdi, [rsp + 7]
        mov     edx, 4
        call    qword ptr [rip + <u8 as core::slice::SliceContains>::slice_contains@GOTPCREL]
        pop     rcx
        ret

example::foo16:
        push    rax
        mov     byte ptr [rsp + 7], dil
        lea     rdi, [rsp + 7]
        mov     edx, 16
        call    qword ptr [rip + <u8 as core::slice::SliceContains>::slice_contains@GOTPCREL]
        pop     rcx
        ret

Metadata

Metadata

Assignees

No one assigned

    Labels

    I-slowIssue: Problems and improvements with respect to performance of generated code.

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions