Skip to content

Commit 344a219

Browse files
authored
Merge pull request #3383 from akirilov-arm/vany_true
Cranelift AArch64: Fix the VanyTrue implementation for 64-bit elements
2 parents 26ef512 + 0fb3acf commit 344a219

File tree

4 files changed

+83
-9
lines changed

4 files changed

+83
-9
lines changed

cranelift/codegen/src/isa/aarch64/inst/emit.rs

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2338,7 +2338,11 @@ impl MachInstEmit for Inst {
23382338
VecALUOp::Orr => (0b000_01110_10_1, 0b000111),
23392339
VecALUOp::Eor => (0b001_01110_00_1, 0b000111),
23402340
VecALUOp::Bsl => (0b001_01110_01_1, 0b000111),
2341-
VecALUOp::Umaxp => (0b001_01110_00_1 | enc_size << 1, 0b101001),
2341+
VecALUOp::Umaxp => {
2342+
debug_assert_ne!(size, VectorSize::Size64x2);
2343+
2344+
(0b001_01110_00_1 | enc_size << 1, 0b101001)
2345+
}
23422346
VecALUOp::Add => (0b000_01110_00_1 | enc_size << 1, 0b100001),
23432347
VecALUOp::Sub => (0b001_01110_00_1 | enc_size << 1, 0b100001),
23442348
VecALUOp::Mul => {

cranelift/codegen/src/isa/aarch64/lower_inst.rs

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2397,7 +2397,15 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
23972397
// cmp xm, #0
23982398
// cset xm, ne
23992399

2400-
let size = VectorSize::from_ty(ctx.input_ty(insn, 0));
2400+
let s = VectorSize::from_ty(src_ty);
2401+
let size = if s == VectorSize::Size64x2 {
2402+
// `vall_true` with 64-bit elements is handled elsewhere.
2403+
debug_assert_ne!(op, Opcode::VallTrue);
2404+
2405+
VectorSize::Size32x4
2406+
} else {
2407+
s
2408+
};
24012409

24022410
if op == Opcode::VanyTrue {
24032411
ctx.emit(Inst::VecRRR {

cranelift/filetests/filetests/runtests/simd-logical.clif

Lines changed: 68 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,6 @@ target aarch64
33
; target s390x TODO: Not yet implemented on s390x
44
set enable_simd
55
target x86_64 machinst
6-
set enable_simd
7-
target x86_64 legacy skylake
86

97
function %bnot() -> b32 {
108
block0:
@@ -26,13 +24,77 @@ block0:
2624
}
2725
; run
2826

29-
function %vany_true_i16x8() -> b1 {
27+
function %vany_true_i8x16() -> b1, b1 {
3028
block0:
31-
v0 = vconst.i16x8 [1 0 0 0 0 0 0 0]
29+
v0 = vconst.i8x16 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
3230
v1 = vany_true v0
33-
return v1
31+
32+
v2 = vconst.i8x16 [0 0 0 1 0 0 0 0 0 0 42 0 0 0 0 0]
33+
v3 = vany_true v2
34+
35+
return v1, v3
3436
}
35-
; run
37+
; run: %vany_true_i8x16() == [false, true]
38+
39+
function %vany_true_i16x8() -> b1, b1 {
40+
block0:
41+
v0 = vconst.i16x8 [0 0 0 0 0 0 0 0]
42+
v1 = vany_true v0
43+
44+
v2 = vconst.i16x8 [0 0 42 0 0 0 0 0]
45+
v3 = vany_true v2
46+
47+
return v1, v3
48+
}
49+
; run: %vany_true_i16x8() == [false, true]
50+
51+
function %vany_true_i32x4() -> b1, b1 {
52+
block0:
53+
v0 = vconst.i32x4 [0 0 0 0]
54+
v1 = vany_true v0
55+
56+
v2 = vconst.i32x4 [0 42 0 0]
57+
v3 = vany_true v2
58+
59+
return v1, v3
60+
}
61+
; run: %vany_true_i32x4() == [false, true]
62+
63+
function %vany_true_i64x2() -> b1, b1 {
64+
block0:
65+
v0 = vconst.i64x2 [0 0]
66+
v1 = vany_true v0
67+
68+
v2 = vconst.i64x2 [0 1]
69+
v3 = vany_true v2
70+
71+
return v1, v3
72+
}
73+
; run: %vany_true_i64x2() == [false, true]
74+
75+
function %vany_true_f32x4() -> b1, b1 {
76+
block0:
77+
v0 = vconst.f32x4 [0.0 0.0 0.0 0.0]
78+
v1 = vany_true v0
79+
80+
v2 = vconst.f32x4 [0.0 0x4.2 0.0 0.0]
81+
v3 = vany_true v2
82+
83+
return v1, v3
84+
}
85+
; run: %vany_true_f32x4() == [false, true]
86+
87+
function %vany_true_f64x2() -> b1, b1 {
88+
block0:
89+
v0 = vconst.f64x2 [0.0 0.0]
90+
v1 = vany_true v0
91+
92+
v2 = vconst.f64x2 [0.0 0x1.0]
93+
v3 = vany_true v2
94+
95+
return v1, v3
96+
}
97+
; run: %vany_true_f64x2() == [false, true]
3698

3799
function %vany_true_b32x4() -> b1 {
38100
block0:

cranelift/filetests/filetests/runtests/simd-vanytrue.clif

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
test interpret
22
test run
3+
target aarch64
34
target x86_64 machinst
4-
; TODO: The AArch64 backend is producing an illegal instruction for b64x2. See: #3304
55

66
function %vany_true_b8x16(b8x16) -> b1 {
77
block0(v0: b8x16):

0 commit comments

Comments
 (0)