Skip to content

Array bound test with two simple loops #102136

Open
@leonardo-m

Description

@leonardo-m

LLVM seems unable to see the number of N*M items generated by the loops fits inside the slice (https://rust.godbolt.org/z/x1YMo93Pv):

const N: usize = 50;
const M: usize = 60;

pub fn foo(arr: &mut [usize; N * M]) {
    let mut pos = 0;
    for _ in 0 .. N {
        for _ in 0 .. M {
            arr[pos] = pos;
            pos += 1;
        }
    }
}

Gives the asm:

.LCPI0_0:
        .quad   0
        .quad   1
        .quad   2
        .quad   3
.LCPI0_1:
        .quad   4
        .quad   5
        .quad   6
        .quad   7
.LCPI0_2:
        .quad   8
        .quad   9
        .quad   10
        .quad   11
.LCPI0_3:
        .quad   12
        .quad   13
        .quad   14
        .quad   15
.LCPI0_4:
        .quad   16
        .quad   17
        .quad   18
        .quad   19
.LCPI0_5:
        .quad   20
        .quad   21
        .quad   22
        .quad   23
.LCPI0_6:
        .quad   24
        .quad   25
        .quad   26
        .quad   27
.LCPI0_7:
        .quad   28
        .quad   29
        .quad   30
        .quad   31
.LCPI0_8:
        .quad   32
        .quad   33
        .quad   34
        .quad   35
.LCPI0_9:
        .quad   36
        .quad   37
        .quad   38
        .quad   39
.LCPI0_10:
        .quad   40
        .quad   41
        .quad   42
        .quad   43
.LCPI0_11:
        .quad   44
        .quad   45
        .quad   46
        .quad   47
.LCPI0_12:
        .quad   48
        .quad   49
        .quad   50
        .quad   51
.LCPI0_13:
        .quad   52
        .quad   53
        .quad   54
        .quad   55
foo:
        push    rax
        vmovdqa ymm14, ymmword ptr [rip + .LCPI0_0]
        vmovdqa ymm1, ymmword ptr [rip + .LCPI0_1]
        vmovdqa ymm2, ymmword ptr [rip + .LCPI0_2]
        vmovdqa ymm3, ymmword ptr [rip + .LCPI0_3]
        vmovdqa ymm4, ymmword ptr [rip + .LCPI0_4]
        vmovdqa ymm5, ymmword ptr [rip + .LCPI0_5]
        vmovdqa ymm6, ymmword ptr [rip + .LCPI0_6]
        vmovdqa ymm7, ymmword ptr [rip + .LCPI0_7]
        vmovdqa ymm8, ymmword ptr [rip + .LCPI0_8]
        vmovdqa ymm9, ymmword ptr [rip + .LCPI0_9]
        vmovdqa ymm10, ymmword ptr [rip + .LCPI0_10]
        vmovdqa ymm11, ymmword ptr [rip + .LCPI0_11]
        vmovdqa ymm12, ymmword ptr [rip + .LCPI0_12]
        vmovdqa ymm13, ymmword ptr [rip + .LCPI0_13]
        mov     ecx, 59
.LBB0_1:
        lea     rdx, [rcx - 59]
        lea     rax, [rcx - 3]
        vmovq   xmm0, rdx
        vpbroadcastq    ymm0, xmm0
        vpor    ymm15, ymm14, ymm0
        vmovdqu ymmword ptr [rdi + 8*rcx - 472], ymm15
        vpaddq  ymm15, ymm0, ymm1
        vmovdqu ymmword ptr [rdi + 8*rcx - 440], ymm15
        vpaddq  ymm15, ymm0, ymm2
        vmovdqu ymmword ptr [rdi + 8*rcx - 408], ymm15
        vpaddq  ymm15, ymm0, ymm3
        vmovdqu ymmword ptr [rdi + 8*rcx - 376], ymm15
        vpaddq  ymm15, ymm0, ymm4
        vmovdqu ymmword ptr [rdi + 8*rcx - 344], ymm15
        vpaddq  ymm15, ymm0, ymm5
        vmovdqu ymmword ptr [rdi + 8*rcx - 312], ymm15
        vpaddq  ymm15, ymm0, ymm6
        vmovdqu ymmword ptr [rdi + 8*rcx - 280], ymm15
        vpaddq  ymm15, ymm0, ymm7
        vmovdqu ymmword ptr [rdi + 8*rcx - 248], ymm15
        vpaddq  ymm15, ymm8, ymm0
        vmovdqu ymmword ptr [rdi + 8*rcx - 216], ymm15
        vpaddq  ymm15, ymm9, ymm0
        vmovdqu ymmword ptr [rdi + 8*rcx - 184], ymm15
        vpaddq  ymm15, ymm10, ymm0
        vmovdqu ymmword ptr [rdi + 8*rcx - 152], ymm15
        vpaddq  ymm15, ymm11, ymm0
        vmovdqu ymmword ptr [rdi + 8*rcx - 120], ymm15
        vpaddq  ymm15, ymm12, ymm0
        vpaddq  ymm0, ymm13, ymm0
        vmovdqu ymmword ptr [rdi + 8*rcx - 88], ymm15
        vmovdqu ymmword ptr [rdi + 8*rcx - 56], ymm0
        cmp     rax, 2999
        ja      .LBB0_6
        mov     qword ptr [rdi + 8*rcx - 24], rax
        lea     rax, [rcx - 2]
        cmp     rax, 2999
        ja      .LBB0_6
        mov     qword ptr [rdi + 8*rcx - 16], rax
        lea     rax, [rcx - 1]
        cmp     rax, 2999
        ja      .LBB0_6
        mov     qword ptr [rdi + 8*rcx - 8], rax
        cmp     rcx, 2999
        ja      .LBB0_5
        mov     qword ptr [rdi + 8*rcx], rcx
        add     rcx, 60
        cmp     rcx, 3059
        jne     .LBB0_1
        pop     rax
        vzeroupper
        ret
.LBB0_5:
        mov     rax, rcx
.LBB0_6:
        lea     rdx, [rip + .Lanon.104192cc768634eeeb2fcfd019dd0f1b.1]
        mov     esi, 3000
        mov     rdi, rax
        vzeroupper
        call    qword ptr [rip + core::panicking::panic_bounds_check@GOTPCREL]
        ud2

Using:

rustc 1.66.0-nightly (432abd86f 2022-09-20)
binary: rustc
commit-hash: 432abd86f231c908f6df3cdd779e83f35084be90
commit-date: 2022-09-20
host: x86_64-pc-windows-gnu
release: 1.66.0-nightly
LLVM version: 15.0.0

Metadata

Metadata

Assignees

No one assigned

    Labels

    A-LLVMArea: Code generation parts specific to LLVM. Both correctness bugs and optimization-related issues.A-arrayArea: `[T; N]`C-bugCategory: This is a bug.I-slowIssue: Problems and improvements with respect to performance of generated code.T-compilerRelevant to the compiler team, which will review and decide on the PR/issue.

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions