SIMD intrinsics often fail to inline

Here's my code:

    #[bench]
    fn simd_paeth_1(bench: &mut Bencher) {
        // pi
        unsafe {
            let a = [0x32, 0x43, 0xf6, 0xa8];
            let b = [0x88, 0x5a, 0x30, 0x8d];
            let c = [0x31, 0x31, 0x98, 0xa2];
            let ba = x86::_mm_set_epi16(b[3], b[2], b[1], b[0], a[3], a[2], a[1], a[0]);
            let cc = x86::_mm_set_epi16(c[3], c[2], c[1], c[0], c[3], c[2], c[1], c[0]);
            let all_ones = x86::_mm_set1_epi16(-1);
            bench.iter(|| {
                test::black_box(a);
                test::black_box(b);
                test::black_box(c);
                for i in 0..1000 {
                    test::black_box(i);

                    // Compute signed distances.
                    let spapb = x86::_mm_sub_epi16(ba, cc);
                    let spbpa = x86::_mm_shuffle_epi32(spapb, 0b01001110);    // swap dwords
                    let spcpc = x86::_mm_add_epi16(spbpa, spapb);

                    // Compute absolute distances.
                    let papb = x86::_mm_abs_epi16(spapb);
                    let pcpc = x86::_mm_abs_epi16(spcpc);
                    let pbpa = x86::_mm_shuffle_epi32(papb, 0b01001110);

                    // Compute minima.
                    let min_bc = x86::_mm_min_epi16(papb, pcpc);
                    let min_abc = x86::_mm_min_epi16(pbpa, min_bc);

                    // Choose b or c.
                    let pick_b_or_c = x86::_mm_cmpeq_epi16(papb, min_bc);
                    let b = x86::_mm_slli_si128(ba, 4);
                    let b_or_c = x86::_mm_blendv_epi8(b, cc, pick_b_or_c);

                    // Choose a if necessary.
                    let pick_a = x86::_mm_cmpeq_epi16(pbpa, min_abc);
                    let result = x86::_mm_blendv_epi8(ba, b_or_c, pick_a);
                    test::black_box(result);
                }
            });
        }
    }

Note that `mm_blendv_epi8` fails to inline, ruining performance.

This happens a lot and it makes using SIMD intrinsics *very* annoying. I have to start using inline asm.

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

SIMD intrinsics often fail to inline #53069

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

SIMD intrinsics often fail to inline #53069

Description

Metadata

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Issue actions