Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
fix simd_bitmask return type for non-power-of-two inputs, and add tests
  • Loading branch information
RalfJung committed Jul 1, 2024
commit e9dd39cda49c260213f73af4b0ba05cecb292b39
16 changes: 8 additions & 8 deletions compiler/rustc_codegen_llvm/src/intrinsic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1121,8 +1121,8 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
if name == sym::simd_select_bitmask {
let (len, _) = require_simd!(arg_tys[1], SimdArgument);

let expected_int_bits = (len.max(8) - 1).next_power_of_two();
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't know why the old code did -1 here. next_power_of_two is already idempotent; when the input is a power of 2 it is returned unchanged.

let expected_bytes = len / 8 + ((len % 8 > 0) as u64);
let expected_int_bits = len.max(8).next_power_of_two();
let expected_bytes = len.div_ceil(8);

let mask_ty = arg_tys[0];
let mask = match mask_ty.kind() {
Expand Down Expand Up @@ -1379,17 +1379,16 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
}

if name == sym::simd_bitmask {
// The `fn simd_bitmask(vector) -> unsigned integer` intrinsic takes a
// vector mask and returns the most significant bit (MSB) of each lane in the form
// of either:
// The `fn simd_bitmask(vector) -> unsigned integer` intrinsic takes a vector mask and
// returns one bit for each lane (which must all be `0` or `!0`) in the form of either:
// * an unsigned integer
// * an array of `u8`
// If the vector has less than 8 lanes, a u8 is returned with zeroed trailing bits.
//
// The bit order of the result depends on the byte endianness, LSB-first for little
// endian and MSB-first for big endian.
let expected_int_bits = in_len.max(8);
let expected_bytes = expected_int_bits / 8 + ((expected_int_bits % 8 > 0) as u64);
let expected_int_bits = in_len.max(8).next_power_of_two();
let expected_bytes = in_len.div_ceil(8);

// Integer vector <i{in_bitwidth} x in_len>:
let (i_xn, in_elem_bitwidth) = match in_elem.kind() {
Expand All @@ -1409,7 +1408,8 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
}),
};

// Shift the MSB to the right by "in_elem_bitwidth - 1" into the first bit position.
// LLVM doesn't always know the inputs are `0` or `!0`, so we shift here so it optimizes to
// `pmovmskb` and similar on x86.
let shift_indices =
vec![
bx.cx.const_int(bx.type_ix(in_elem_bitwidth), (in_elem_bitwidth - 1) as _);
Expand Down
90 changes: 90 additions & 0 deletions tests/ui/simd/simd-bitmask-notpow2.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
//@run-pass
// SEGFAULTS on LLVM 17. This should be merged into `simd-bitmask` once we require LLVM 18.
//@ min-llvm-version: 18
// FIXME: broken codegen on big-endian (https://github.com/rust-lang/rust/issues/127205)
//@ ignore-endian-big
#![feature(repr_simd, intrinsics)]

extern "rust-intrinsic" {
fn simd_bitmask<T, U>(v: T) -> U;
fn simd_select_bitmask<T, U>(m: T, a: U, b: U) -> U;
}

fn main() {
// Non-power-of-2 multi-byte mask.
#[repr(simd, packed)]
#[allow(non_camel_case_types)]
#[derive(Copy, Clone, Debug, PartialEq)]
struct i32x10([i32; 10]);
impl i32x10 {
fn splat(x: i32) -> Self {
Self([x; 10])
}
}
unsafe {
let mask = i32x10([!0, !0, 0, !0, 0, 0, !0, 0, !0, 0]);
let mask_bits = if cfg!(target_endian = "little") { 0b0101001011 } else { 0b1101001010 };
let mask_bytes =
if cfg!(target_endian = "little") { [0b01001011, 0b01] } else { [0b11, 0b01001010] };

let bitmask1: u16 = simd_bitmask(mask);
let bitmask2: [u8; 2] = simd_bitmask(mask);
assert_eq!(bitmask1, mask_bits);
assert_eq!(bitmask2, mask_bytes);

let selected1 = simd_select_bitmask::<u16, _>(
mask_bits,
i32x10::splat(!0), // yes
i32x10::splat(0), // no
);
let selected2 = simd_select_bitmask::<[u8; 2], _>(
mask_bytes,
i32x10::splat(!0), // yes
i32x10::splat(0), // no
);
assert_eq!(selected1, mask);
assert_eq!(selected2, mask);
}

// Test for a mask where the next multiple of 8 is not a power of two.
#[repr(simd, packed)]
#[allow(non_camel_case_types)]
#[derive(Copy, Clone, Debug, PartialEq)]
struct i32x20([i32; 20]);
impl i32x20 {
fn splat(x: i32) -> Self {
Self([x; 20])
}
}
unsafe {
let mask = i32x20([!0, !0, 0, !0, 0, 0, !0, 0, !0, 0, 0, 0, 0, !0, !0, !0, !0, !0, !0, !0]);
let mask_bits = if cfg!(target_endian = "little") {
0b11111110000101001011
} else {
0b11010010100001111111
};
let mask_bytes = if cfg!(target_endian = "little") {
[0b01001011, 0b11100001, 0b1111]
} else {
[0b1101, 0b00101000, 0b01111111]
};

let bitmask1: u32 = simd_bitmask(mask);
let bitmask2: [u8; 3] = simd_bitmask(mask);
assert_eq!(bitmask1, mask_bits);
assert_eq!(bitmask2, mask_bytes);

let selected1 = simd_select_bitmask::<u32, _>(
mask_bits,
i32x20::splat(!0), // yes
i32x20::splat(0), // no
);
let selected2 = simd_select_bitmask::<[u8; 3], _>(
mask_bytes,
i32x20::splat(!0), // yes
i32x20::splat(0), // no
);
assert_eq!(selected1, mask);
assert_eq!(selected2, mask);
}
}
45 changes: 33 additions & 12 deletions tests/ui/simd/simd-bitmask.rs
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

FWIW there are two more test files that cover these intrinsics, "generic-bitmask-pass.rs" and "generic-select-pass.rs". I've only made this one file big-endian-compatible.

Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
//@run-pass
//@ignore-endian-big behavior of simd_select_bitmask is endian-specific
#![feature(repr_simd, intrinsics)]

extern "rust-intrinsic" {
Expand All @@ -17,36 +16,58 @@ fn main() {
let i: u8 = simd_bitmask(v);
let a: [u8; 1] = simd_bitmask(v);

assert_eq!(i, 0b0101);
assert_eq!(a, [0b0101]);
if cfg!(target_endian = "little") {
assert_eq!(i, 0b0101);
assert_eq!(a, [0b0101]);
} else {
assert_eq!(i, 0b1010);
assert_eq!(a, [0b1010]);
}

let v = Simd::<i8, 16>([0, 0, -1, -1, 0, 0, 0, 0, 0, 0, 0, 0, -1, 0, -1, 0]);
let i: u16 = simd_bitmask(v);
let a: [u8; 2] = simd_bitmask(v);

assert_eq!(i, 0b0101000000001100);
assert_eq!(a, [0b1100, 0b01010000]);
if cfg!(target_endian = "little") {
assert_eq!(i, 0b0101000000001100);
assert_eq!(a, [0b00001100, 0b01010000]);
} else {
assert_eq!(i, 0b0011000000001010);
assert_eq!(a, [0b00110000, 0b00001010]);
}
}

unsafe {
let a = Simd::<i32, 8>([0, 1, 2, 3, 4, 5, 6, 7]);
let b = Simd::<i32, 8>([8, 9, 10, 11, 12, 13, 14, 15]);
let e = [0, 9, 2, 11, 12, 13, 14, 15];
let a = Simd::<i32, 4>([0, 1, 2, 3]);
let b = Simd::<i32, 4>([8, 9, 10, 11]);
let e = [0, 9, 2, 11];

let r = simd_select_bitmask(0b0101u8, a, b);
let mask = if cfg!(target_endian = "little") { 0b0101u8 } else { 0b1010u8 };
let r = simd_select_bitmask(mask, a, b);
assert_eq!(r.0, e);

let r = simd_select_bitmask([0b0101u8], a, b);
let mask = if cfg!(target_endian = "little") { [0b0101u8] } else { [0b1010u8] };
let r = simd_select_bitmask(mask, a, b);
assert_eq!(r.0, e);

let a = Simd::<i32, 16>([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]);
let b = Simd::<i32, 16>([16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31]);
let e = [16, 17, 2, 3, 20, 21, 22, 23, 24, 25, 26, 27, 12, 29, 14, 31];

let r = simd_select_bitmask(0b0101000000001100u16, a, b);
let mask = if cfg!(target_endian = "little") {
0b0101000000001100u16
} else {
0b0011000000001010u16
};
let r = simd_select_bitmask(mask, a, b);
assert_eq!(r.0, e);

let r = simd_select_bitmask([0b1100u8, 0b01010000u8], a, b);
let mask = if cfg!(target_endian = "little") {
[0b00001100u8, 0b01010000u8]
} else {
[0b00110000u8, 0b00001010u8]
};
let r = simd_select_bitmask(mask, a, b);
assert_eq!(r.0, e);
}
}