Skip to content

rustc can not inline simple for-loop when arm neon is enabled #102220

Open
@Nugine

Description

@Nugine

I tried this code:

https://rust.godbolt.org/z/PfGvMaK4f

#![feature(stdsimd)]
#![feature(arm_target_feature)]

#[inline(always)]
fn u8x32_add(a: [u8; 32], b: [u8; 32]) -> [u8; 32] {
    let mut c = [0; 32];
    for i in 0..32 {
        c[i] = a[i] + b[i];
    }
    c
}

#[target_feature(enable = "neon")]
pub unsafe fn test(a: [u8; 32], b: [u8; 32]) -> [u8; 32] {
    u8x32_add(a, b)
}
<core::ops::range::Range<T> as core::iter::range::RangeIteratorImpl>::spec_next:
        ldm     r0, {r1, r3}
        mov     r2, #0
        cmp     r1, r3
        addlo   r2, r1, #1
        strlo   r2, [r0]
        movlo   r2, #1
        mov     r0, r2
        bx      lr

example::test:
        push    {r4, r5, r6, r7, r11, lr}
        sub     sp, sp, #72
        vld1.8  {d16, d17}, [r1]!
        mov     r6, sp
        mov     r4, r0
        mov     r0, r6
        vst1.64 {d16, d17}, [r0]!
        vmov.i32        q10, #0x0
        add     r7, sp, #32
        add     r5, sp, #64
        vld1.8  {d18, d19}, [r1]
        vst1.64 {d18, d19}, [r0]
        mov     r0, r4
        vst1.8  {d20, d21}, [r0]!
        vld1.8  {d16, d17}, [r2]!
        vst1.8  {d20, d21}, [r0]
        mov     r0, r7
        vld1.8  {d18, d19}, [r2]
        vst1.64 {d16, d17}, [r0]!
        vst1.64 {d18, d19}, [r0]
        mov     r0, #32
        str     r0, [sp, #68]
        mov     r0, #0
        str     r0, [sp, #64]
        mov     r0, r5
        bl      <core::ops::range::Range<T> as core::iter::range::RangeIteratorImpl>::spec_next
        cmp     r0, #0
        beq     .LBB1_3
.LBB1_1:
        cmp     r1, #31
        bhi     .LBB1_4
        ldrb    r0, [r6, r1]
        ldrb    r2, [r7, r1]
        add     r0, r2, r0
        strb    r0, [r4, r1]
        mov     r0, r5
        bl      <core::ops::range::Range<T> as core::iter::range::RangeIteratorImpl>::spec_next
        cmp     r0, #0
        bne     .LBB1_1
.LBB1_3:
        add     sp, sp, #72
        pop     {r4, r5, r6, r7, r11, pc}
.LBB1_4:
        ldr     r2, .LCPI1_0
        mov     r0, r1
        mov     r1, #32
.LPC1_0:
        add     r2, pc, r2
        bl      core::panicking::panic_bounds_check
        .inst   0xe7ffdefe
.LCPI1_0:
        .long   .Lanon.099e68e15d4586f73c51628389ff76e1.1-(.LPC1_0+8)

.Lanon.099e68e15d4586f73c51628389ff76e1.0:
        .ascii  "/app/example.rs"

.Lanon.099e68e15d4586f73c51628389ff76e1.1:
        .long   .Lanon.099e68e15d4586f73c51628389ff76e1.0
        .asciz  "\017\000\000\000\017\000\000\000\005\000\000"

I expected to see this happen: The for-loop should be inlined.

Instead, this happened: I see the symbol <core::ops::range::Range<T> as core::iter::range::RangeIteratorImpl>::spec_next in the asm output.

Meta

rustc --version --verbose:

rustc 1.66.0-nightly (e7119a030 2022-09-22)
binary: rustc
commit-hash: e7119a0300b87a3d670408ee8e847c6821b3ae80
commit-date: 2022-09-22
host: x86_64-unknown-linux-gnu
release: 1.66.0-nightly
LLVM version: 15.0.0

Metadata

Metadata

Assignees

No one assigned

    Labels

    A-SIMDArea: SIMD (Single Instruction Multiple Data)A-autovectorizationArea: Autovectorization, which can impact perf or code sizeA-codegenArea: Code generationA-target-featureArea: Enabling/disabling target features like AVX, Neon, etc.C-bugCategory: This is a bug.O-ArmTarget: 32-bit Arm processors (armv6, armv7, thumb...), including 64-bit Arm in AArch32 stateT-compilerRelevant to the compiler team, which will review and decide on the PR/issue.

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions