Skip to content

SIMD intrinsics often fail to inline #53069

Closed
@pcwalton

Description

@pcwalton

Here's my code:

#[bench]
fn simd_paeth_1(bench: &mut Bencher) {
    // pi
    unsafe {
        let a = [0x32, 0x43, 0xf6, 0xa8];
        let b = [0x88, 0x5a, 0x30, 0x8d];
        let c = [0x31, 0x31, 0x98, 0xa2];
        let ba = x86::_mm_set_epi16(b[3], b[2], b[1], b[0], a[3], a[2], a[1], a[0]);
        let cc = x86::_mm_set_epi16(c[3], c[2], c[1], c[0], c[3], c[2], c[1], c[0]);
        let all_ones = x86::_mm_set1_epi16(-1);
        bench.iter(|| {
            test::black_box(a);
            test::black_box(b);
            test::black_box(c);
            for i in 0..1000 {
                test::black_box(i);

                // Compute signed distances.
                let spapb = x86::_mm_sub_epi16(ba, cc);
                let spbpa = x86::_mm_shuffle_epi32(spapb, 0b01001110);    // swap dwords
                let spcpc = x86::_mm_add_epi16(spbpa, spapb);

                // Compute absolute distances.
                let papb = x86::_mm_abs_epi16(spapb);
                let pcpc = x86::_mm_abs_epi16(spcpc);
                let pbpa = x86::_mm_shuffle_epi32(papb, 0b01001110);

                // Compute minima.
                let min_bc = x86::_mm_min_epi16(papb, pcpc);
                let min_abc = x86::_mm_min_epi16(pbpa, min_bc);

                // Choose b or c.
                let pick_b_or_c = x86::_mm_cmpeq_epi16(papb, min_bc);
                let b = x86::_mm_slli_si128(ba, 4);
                let b_or_c = x86::_mm_blendv_epi8(b, cc, pick_b_or_c);

                // Choose a if necessary.
                let pick_a = x86::_mm_cmpeq_epi16(pbpa, min_abc);
                let result = x86::_mm_blendv_epi8(ba, b_or_c, pick_a);
                test::black_box(result);
            }
        });
    }
}

Note that mm_blendv_epi8 fails to inline, ruining performance.

This happens a lot and it makes using SIMD intrinsics very annoying. I have to start using inline asm.

Metadata

Metadata

Assignees

No one assigned

    Labels

    A-SIMDArea: SIMD (Single Instruction Multiple Data)I-slowIssue: Problems and improvements with respect to performance of generated code.

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions