From 9f82f6daa5e470652f4ffced628547d0c24aac2c Mon Sep 17 00:00:00 2001 From: David Green Date: Sat, 24 Aug 2024 20:20:23 +0100 Subject: [PATCH 1/3] [ARM] Add a number of extra vmovimm tests for BE. NFC --- llvm/test/CodeGen/ARM/big-endian-vmov.ll | 100 +++ llvm/test/CodeGen/Thumb2/mve-vmovimm.ll | 818 ++++++++++++++++++++++- 2 files changed, 900 insertions(+), 18 deletions(-) diff --git a/llvm/test/CodeGen/ARM/big-endian-vmov.ll b/llvm/test/CodeGen/ARM/big-endian-vmov.ll index 2cb22b4d5fbc26..1cb7a030d58c26 100644 --- a/llvm/test/CodeGen/ARM/big-endian-vmov.ll +++ b/llvm/test/CodeGen/ARM/big-endian-vmov.ll @@ -134,3 +134,103 @@ define arm_aapcs_vfpcc <1 x i64> @vmov_i64_b() { ; CHECK-NEXT: bx lr ret <1 x i64> } + +define arm_aapcs_vfpcc <2 x i64> @vmov_v2i64_b() { +; CHECK-LABEL: vmov_v2i64_b: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov.i64 q0, #0xffff00ff0000ff +; CHECK-NEXT: bx lr + ret <2 x i64> +} + +define arm_aapcs_vfpcc <4 x i32> @vmov_v4i32_b() { +; CHECK-LE-LABEL: vmov_v4i32_b: +; CHECK-LE: @ %bb.0: +; CHECK-LE-NEXT: vmov.i64 q0, #0xff0000ff00ffff00 +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: vmov_v4i32_b: +; CHECK-BE: @ %bb.0: +; CHECK-BE-NEXT: vmov.i64 q0, #0xffff00ff0000ff +; CHECK-BE-NEXT: bx lr + ret <4 x i32> +} + +define arm_aapcs_vfpcc <2 x i64> @and_v2i64_b(<2 x i64> %a) { +; CHECK-LABEL: and_v2i64_b: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov.i64 q8, #0xffff00ff0000ff +; CHECK-NEXT: vand q0, q0, q8 +; CHECK-NEXT: bx lr + %b = and <2 x i64> %a, + ret <2 x i64> %b +} + +define arm_aapcs_vfpcc <4 x i32> @and_v4i32_b(<4 x i32> %a) { +; CHECK-LE-LABEL: and_v4i32_b: +; CHECK-LE: @ %bb.0: +; CHECK-LE-NEXT: vmov.i64 q8, #0xff0000ff00ffff00 +; CHECK-LE-NEXT: vand q0, q0, q8 +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: and_v4i32_b: +; CHECK-BE: @ %bb.0: +; CHECK-BE-NEXT: vmov.i64 q8, #0xffff00ff0000ff +; CHECK-BE-NEXT: vrev64.32 q9, q0 +; CHECK-BE-NEXT: vrev64.32 q8, q8 +; CHECK-BE-NEXT: vand q8, q9, q8 +; CHECK-BE-NEXT: vrev64.32 q0, q8 +; CHECK-BE-NEXT: bx lr + %b = and <4 x i32> %a, + ret <4 x i32> %b +} + +define arm_aapcs_vfpcc <8 x i16> @vmvn_v16i8_m1() { +; CHECK-LE-LABEL: vmvn_v16i8_m1: +; CHECK-LE: @ %bb.0: +; CHECK-LE-NEXT: vmvn.i32 q0, #0x10000 +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: vmvn_v16i8_m1: +; CHECK-BE: @ %bb.0: +; CHECK-BE-NEXT: vmvn.i32 q0, #0x1 +; CHECK-BE-NEXT: bx lr + ret <8 x i16> +} + +; FIXME: This is incorrect for BE +define arm_aapcs_vfpcc <8 x i16> @and_v8i16_m1(<8 x i16> %a) { +; CHECK-LE-LABEL: and_v8i16_m1: +; CHECK-LE: @ %bb.0: +; CHECK-LE-NEXT: vbic.i32 q0, #0x10000 +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: and_v8i16_m1: +; CHECK-BE: @ %bb.0: +; CHECK-BE-NEXT: vrev64.32 q8, q0 +; CHECK-BE-NEXT: vbic.i32 q8, #0x10000 +; CHECK-BE-NEXT: vrev64.32 q0, q8 +; CHECK-BE-NEXT: bx lr + %b = and <8 x i16> %a, + ret <8 x i16> %b +} + +; FIXME: This is incorrect for BE +define arm_aapcs_vfpcc <8 x i16> @xor_v8i16_m1(<8 x i16> %a) { +; CHECK-LE-LABEL: xor_v8i16_m1: +; CHECK-LE: @ %bb.0: +; CHECK-LE-NEXT: vmvn.i32 q8, #0x10000 +; CHECK-LE-NEXT: veor q0, q0, q8 +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: xor_v8i16_m1: +; CHECK-BE: @ %bb.0: +; CHECK-BE-NEXT: vmvn.i32 q8, #0x10000 +; CHECK-BE-NEXT: vrev64.16 q9, q0 +; CHECK-BE-NEXT: vrev32.16 q8, q8 +; CHECK-BE-NEXT: veor q8, q9, q8 +; CHECK-BE-NEXT: vrev64.16 q0, q8 +; CHECK-BE-NEXT: bx lr + %b = xor <8 x i16> %a, + ret <8 x i16> %b +} diff --git a/llvm/test/CodeGen/Thumb2/mve-vmovimm.ll b/llvm/test/CodeGen/Thumb2/mve-vmovimm.ll index 97abc539557131..729e4c5e89c75e 100644 --- a/llvm/test/CodeGen/Thumb2/mve-vmovimm.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vmovimm.ll @@ -12,6 +12,25 @@ entry: ret <16 x i8> } +define arm_aapcs_vfpcc <16 x i8> @xor_int8_1(<16 x i8> %a) { +; CHECKLE-LABEL: xor_int8_1: +; CHECKLE: @ %bb.0: @ %entry +; CHECKLE-NEXT: vmov.i8 q1, #0x1 +; CHECKLE-NEXT: veor q0, q0, q1 +; CHECKLE-NEXT: bx lr +; +; CHECKBE-LABEL: xor_int8_1: +; CHECKBE: @ %bb.0: @ %entry +; CHECKBE-NEXT: vmov.i8 q1, #0x1 +; CHECKBE-NEXT: vrev64.8 q2, q0 +; CHECKBE-NEXT: veor q1, q2, q1 +; CHECKBE-NEXT: vrev64.8 q0, q1 +; CHECKBE-NEXT: bx lr +entry: + %b = xor <16 x i8> %a, + ret <16 x i8> %b +} + define arm_aapcs_vfpcc <16 x i8> @mov_int8_m1() { ; CHECK-LABEL: mov_int8_m1: ; CHECK: @ %bb.0: @ %entry @@ -21,6 +40,23 @@ entry: ret <16 x i8> } +define arm_aapcs_vfpcc <16 x i8> @xor_int8_m1(<16 x i8> %a) { +; CHECKLE-LABEL: xor_int8_m1: +; CHECKLE: @ %bb.0: @ %entry +; CHECKLE-NEXT: vmvn q0, q0 +; CHECKLE-NEXT: bx lr +; +; CHECKBE-LABEL: xor_int8_m1: +; CHECKBE: @ %bb.0: @ %entry +; CHECKBE-NEXT: vrev64.8 q1, q0 +; CHECKBE-NEXT: vmvn q1, q1 +; CHECKBE-NEXT: vrev64.8 q0, q1 +; CHECKBE-NEXT: bx lr +entry: + %b = xor <16 x i8> %a, + ret <16 x i8> %b +} + ; This has 0x01020304 or 0x04030201 vdup.32'd to q reg depending on endianness. ; The big endian is different as there is an implicit vrev64.8 out of the ; function, which gets constant folded away. @@ -42,6 +78,98 @@ entry: ret <16 x i8> } +define arm_aapcs_vfpcc <16 x i8> @xor_int8_1234(<16 x i8> %a) { +; CHECKLE-LABEL: xor_int8_1234: +; CHECKLE: @ %bb.0: @ %entry +; CHECKLE-NEXT: movw r0, #513 +; CHECKLE-NEXT: movt r0, #1027 +; CHECKLE-NEXT: vdup.32 q1, r0 +; CHECKLE-NEXT: veor q0, q0, q1 +; CHECKLE-NEXT: bx lr +; +; CHECKBE-LABEL: xor_int8_1234: +; CHECKBE: @ %bb.0: @ %entry +; CHECKBE-NEXT: movw r0, #513 +; CHECKBE-NEXT: vrev64.8 q1, q0 +; CHECKBE-NEXT: movt r0, #1027 +; CHECKBE-NEXT: vdup.32 q0, r0 +; CHECKBE-NEXT: veor q1, q1, q0 +; CHECKBE-NEXT: vrev64.8 q0, q1 +; CHECKBE-NEXT: bx lr +entry: + %b = xor <16 x i8> %a, + ret <16 x i8> %b +} + +define arm_aapcs_vfpcc <16 x i8> @mov_int8_32() { +; CHECKLE-LABEL: mov_int8_32: +; CHECKLE: @ %bb.0: @ %entry +; CHECKLE-NEXT: vmov.i32 q0, #0x1 +; CHECKLE-NEXT: bx lr +; +; CHECKBE-LABEL: mov_int8_32: +; CHECKBE: @ %bb.0: @ %entry +; CHECKBE-NEXT: vmov.i32 q0, #0x1000000 +; CHECKBE-NEXT: bx lr +entry: + ret <16 x i8> +} + +; FIXME: This is incorrect for BE +define arm_aapcs_vfpcc <16 x i8> @xor_int8_32(<16 x i8> %a) { +; CHECKLE-LABEL: xor_int8_32: +; CHECKLE: @ %bb.0: @ %entry +; CHECKLE-NEXT: vmov.i32 q1, #0x1 +; CHECKLE-NEXT: veor q0, q0, q1 +; CHECKLE-NEXT: bx lr +; +; CHECKBE-LABEL: xor_int8_32: +; CHECKBE: @ %bb.0: @ %entry +; CHECKBE-NEXT: vmov.i32 q1, #0x1 +; CHECKBE-NEXT: vrev64.8 q2, q0 +; CHECKBE-NEXT: vrev32.8 q1, q1 +; CHECKBE-NEXT: veor q1, q2, q1 +; CHECKBE-NEXT: vrev64.8 q0, q1 +; CHECKBE-NEXT: bx lr +entry: + %b = xor <16 x i8> %a, + ret <16 x i8> %b +} + +define arm_aapcs_vfpcc <16 x i8> @mov_int8_64() { +; CHECKLE-LABEL: mov_int8_64: +; CHECKLE: @ %bb.0: @ %entry +; CHECKLE-NEXT: vmov.i64 q0, #0xffff00ffff0000ff +; CHECKLE-NEXT: bx lr +; +; CHECKBE-LABEL: mov_int8_64: +; CHECKBE: @ %bb.0: @ %entry +; CHECKBE-NEXT: vmov.i64 q0, #0xff0000ffff00ffff +; CHECKBE-NEXT: bx lr +entry: + ret <16 x i8> +} + +define arm_aapcs_vfpcc <16 x i8> @xor_int8_64(<16 x i8> %a) { +; CHECKLE-LABEL: xor_int8_64: +; CHECKLE: @ %bb.0: @ %entry +; CHECKLE-NEXT: vmov.i64 q1, #0xffff00ffff0000ff +; CHECKLE-NEXT: veor q0, q0, q1 +; CHECKLE-NEXT: bx lr +; +; CHECKBE-LABEL: xor_int8_64: +; CHECKBE: @ %bb.0: @ %entry +; CHECKBE-NEXT: vmov.i64 q1, #0xff0000ffff00ffff +; CHECKBE-NEXT: vrev64.8 q2, q1 +; CHECKBE-NEXT: vrev64.8 q1, q0 +; CHECKBE-NEXT: veor q1, q1, q2 +; CHECKBE-NEXT: vrev64.8 q0, q1 +; CHECKBE-NEXT: bx lr +entry: + %b = xor <16 x i8> %a, + ret <16 x i8> %b +} + define arm_aapcs_vfpcc <8 x i16> @mov_int16_1() { ; CHECK-LABEL: mov_int16_1: ; CHECK: @ %bb.0: @ %entry @@ -51,6 +179,25 @@ entry: ret <8 x i16> } +define arm_aapcs_vfpcc <8 x i16> @xor_int16_1(<8 x i16> %a) { +; CHECKLE-LABEL: xor_int16_1: +; CHECKLE: @ %bb.0: @ %entry +; CHECKLE-NEXT: vmov.i16 q1, #0x1 +; CHECKLE-NEXT: veor q0, q0, q1 +; CHECKLE-NEXT: bx lr +; +; CHECKBE-LABEL: xor_int16_1: +; CHECKBE: @ %bb.0: @ %entry +; CHECKBE-NEXT: vmov.i16 q1, #0x1 +; CHECKBE-NEXT: vrev64.16 q2, q0 +; CHECKBE-NEXT: veor q1, q2, q1 +; CHECKBE-NEXT: vrev64.16 q0, q1 +; CHECKBE-NEXT: bx lr +entry: + %b = xor <8 x i16> %a, + ret <8 x i16> %b +} + define arm_aapcs_vfpcc <8 x i16> @mov_int16_m1() { ; CHECK-LABEL: mov_int16_m1: ; CHECK: @ %bb.0: @ %entry @@ -60,6 +207,24 @@ entry: ret <8 x i16> } +define arm_aapcs_vfpcc <8 x i16> @xor_int16_m1(<8 x i16> %a) { +; CHECKLE-LABEL: xor_int16_m1: +; CHECKLE: @ %bb.0: @ %entry +; CHECKLE-NEXT: vmvn q0, q0 +; CHECKLE-NEXT: bx lr +; +; CHECKBE-LABEL: xor_int16_m1: +; CHECKBE: @ %bb.0: @ %entry +; CHECKBE-NEXT: vmov.i8 q1, #0xff +; CHECKBE-NEXT: vrev64.16 q2, q0 +; CHECKBE-NEXT: veor q1, q2, q1 +; CHECKBE-NEXT: vrev64.16 q0, q1 +; CHECKBE-NEXT: bx lr +entry: + %b = xor <8 x i16> %a, + ret <8 x i16> %b +} + define arm_aapcs_vfpcc <8 x i16> @mov_int16_256() { ; CHECK-LABEL: mov_int16_256: ; CHECK: @ %bb.0: @ %entry @@ -69,6 +234,25 @@ entry: ret <8 x i16> } +define arm_aapcs_vfpcc <8 x i16> @xor_int16_256(<8 x i16> %a) { +; CHECKLE-LABEL: xor_int16_256: +; CHECKLE: @ %bb.0: @ %entry +; CHECKLE-NEXT: vmov.i16 q1, #0x100 +; CHECKLE-NEXT: veor q0, q0, q1 +; CHECKLE-NEXT: bx lr +; +; CHECKBE-LABEL: xor_int16_256: +; CHECKBE: @ %bb.0: @ %entry +; CHECKBE-NEXT: vmov.i16 q1, #0x100 +; CHECKBE-NEXT: vrev64.16 q2, q0 +; CHECKBE-NEXT: veor q1, q2, q1 +; CHECKBE-NEXT: vrev64.16 q0, q1 +; CHECKBE-NEXT: bx lr +entry: + %b = xor <8 x i16> %a, + ret <8 x i16> %b +} + define arm_aapcs_vfpcc <8 x i16> @mov_int16_257() { ; CHECK-LABEL: mov_int16_257: ; CHECK: @ %bb.0: @ %entry @@ -78,6 +262,25 @@ entry: ret <8 x i16> } +define arm_aapcs_vfpcc <8 x i16> @xor_int16_257(<8 x i16> %a) { +; CHECKLE-LABEL: xor_int16_257: +; CHECKLE: @ %bb.0: @ %entry +; CHECKLE-NEXT: vmov.i8 q1, #0x1 +; CHECKLE-NEXT: veor q0, q0, q1 +; CHECKLE-NEXT: bx lr +; +; CHECKBE-LABEL: xor_int16_257: +; CHECKBE: @ %bb.0: @ %entry +; CHECKBE-NEXT: vmov.i8 q1, #0x1 +; CHECKBE-NEXT: vrev64.16 q2, q0 +; CHECKBE-NEXT: veor q1, q2, q1 +; CHECKBE-NEXT: vrev64.16 q0, q1 +; CHECKBE-NEXT: bx lr +entry: + %b = xor <8 x i16> %a, + ret <8 x i16> %b +} + define arm_aapcs_vfpcc <8 x i16> @mov_int16_258() { ; CHECK-LABEL: mov_int16_258: ; CHECK: @ %bb.0: @ %entry @@ -88,6 +291,97 @@ entry: ret <8 x i16> } +define arm_aapcs_vfpcc <8 x i16> @xor_int16_258(<8 x i16> %a) { +; CHECKLE-LABEL: xor_int16_258: +; CHECKLE: @ %bb.0: @ %entry +; CHECKLE-NEXT: mov.w r0, #258 +; CHECKLE-NEXT: vdup.16 q1, r0 +; CHECKLE-NEXT: veor q0, q0, q1 +; CHECKLE-NEXT: bx lr +; +; CHECKBE-LABEL: xor_int16_258: +; CHECKBE: @ %bb.0: @ %entry +; CHECKBE-NEXT: mov.w r0, #258 +; CHECKBE-NEXT: vrev64.16 q2, q0 +; CHECKBE-NEXT: vdup.16 q1, r0 +; CHECKBE-NEXT: veor q1, q2, q1 +; CHECKBE-NEXT: vrev64.16 q0, q1 +; CHECKBE-NEXT: bx lr +entry: + %b = xor <8 x i16> %a, + ret <8 x i16> %b +} + +define arm_aapcs_vfpcc <8 x i16> @mov_int16_32() { +; CHECKLE-LABEL: mov_int16_32: +; CHECKLE: @ %bb.0: @ %entry +; CHECKLE-NEXT: movw r0, #257 +; CHECKLE-NEXT: movt r0, #256 +; CHECKLE-NEXT: vdup.32 q0, r0 +; CHECKLE-NEXT: bx lr +; +; CHECKBE-LABEL: mov_int16_32: +; CHECKBE: @ %bb.0: @ %entry +; CHECKBE-NEXT: movw r0, #256 +; CHECKBE-NEXT: movt r0, #257 +; CHECKBE-NEXT: vdup.32 q0, r0 +; CHECKBE-NEXT: bx lr +entry: + ret <8 x i16> +} + +define arm_aapcs_vfpcc <8 x i16> @xor_int16_32(<8 x i16> %a) { +; CHECKLE-LABEL: xor_int16_32: +; CHECKLE: @ %bb.0: @ %entry +; CHECKLE-NEXT: movw r0, #257 +; CHECKLE-NEXT: movt r0, #256 +; CHECKLE-NEXT: vdup.32 q1, r0 +; CHECKLE-NEXT: veor q0, q0, q1 +; CHECKLE-NEXT: bx lr +; +; CHECKBE-LABEL: xor_int16_32: +; CHECKBE: @ %bb.0: @ %entry +; CHECKBE-NEXT: movw r0, #257 +; CHECKBE-NEXT: vrev64.16 q1, q0 +; CHECKBE-NEXT: movt r0, #256 +; CHECKBE-NEXT: vdup.32 q0, r0 +; CHECKBE-NEXT: veor q1, q1, q0 +; CHECKBE-NEXT: vrev64.16 q0, q1 +; CHECKBE-NEXT: bx lr +entry: + %b = xor <8 x i16> %a, + ret <8 x i16> %b +} + +define arm_aapcs_vfpcc <8 x i16> @mov_int16_64() { +; CHECK-LABEL: mov_int16_64: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov.i64 q0, #0xff0000000000ff +; CHECK-NEXT: bx lr +entry: + ret <8 x i16> +} + +define arm_aapcs_vfpcc <8 x i16> @xor_int16_64(<8 x i16> %a) { +; CHECKLE-LABEL: xor_int16_64: +; CHECKLE: @ %bb.0: @ %entry +; CHECKLE-NEXT: vmov.i64 q1, #0xff0000000000ff +; CHECKLE-NEXT: veor q0, q0, q1 +; CHECKLE-NEXT: bx lr +; +; CHECKBE-LABEL: xor_int16_64: +; CHECKBE: @ %bb.0: @ %entry +; CHECKBE-NEXT: vmov.i64 q1, #0xff0000000000ff +; CHECKBE-NEXT: vrev64.16 q2, q1 +; CHECKBE-NEXT: vrev64.16 q1, q0 +; CHECKBE-NEXT: veor q1, q1, q2 +; CHECKBE-NEXT: vrev64.16 q0, q1 +; CHECKBE-NEXT: bx lr +entry: + %b = xor <8 x i16> %a, + ret <8 x i16> %b +} + define arm_aapcs_vfpcc <4 x i32> @mov_int32_1() { ; CHECK-LABEL: mov_int32_1: ; CHECK: @ %bb.0: @ %entry @@ -97,6 +391,25 @@ entry: ret <4 x i32> } +define arm_aapcs_vfpcc <4 x i32> @xor_int32_1(<4 x i32> %a) { +; CHECKLE-LABEL: xor_int32_1: +; CHECKLE: @ %bb.0: @ %entry +; CHECKLE-NEXT: vmov.i32 q1, #0x1 +; CHECKLE-NEXT: veor q0, q0, q1 +; CHECKLE-NEXT: bx lr +; +; CHECKBE-LABEL: xor_int32_1: +; CHECKBE: @ %bb.0: @ %entry +; CHECKBE-NEXT: vmov.i32 q1, #0x1 +; CHECKBE-NEXT: vrev64.32 q2, q0 +; CHECKBE-NEXT: veor q1, q2, q1 +; CHECKBE-NEXT: vrev64.32 q0, q1 +; CHECKBE-NEXT: bx lr +entry: + %b = xor <4 x i32> %a, + ret <4 x i32> %b +} + define arm_aapcs_vfpcc <4 x i32> @mov_int32_256() { ; CHECK-LABEL: mov_int32_256: ; CHECK: @ %bb.0: @ %entry @@ -106,6 +419,25 @@ entry: ret <4 x i32> } +define arm_aapcs_vfpcc <4 x i32> @xor_int32_256(<4 x i32> %a) { +; CHECKLE-LABEL: xor_int32_256: +; CHECKLE: @ %bb.0: @ %entry +; CHECKLE-NEXT: vmov.i32 q1, #0x100 +; CHECKLE-NEXT: veor q0, q0, q1 +; CHECKLE-NEXT: bx lr +; +; CHECKBE-LABEL: xor_int32_256: +; CHECKBE: @ %bb.0: @ %entry +; CHECKBE-NEXT: vmov.i32 q1, #0x100 +; CHECKBE-NEXT: vrev64.32 q2, q0 +; CHECKBE-NEXT: veor q1, q2, q1 +; CHECKBE-NEXT: vrev64.32 q0, q1 +; CHECKBE-NEXT: bx lr +entry: + %b = xor <4 x i32> %a, + ret <4 x i32> %b +} + define arm_aapcs_vfpcc <4 x i32> @mov_int32_65536() { ; CHECK-LABEL: mov_int32_65536: ; CHECK: @ %bb.0: @ %entry @@ -115,6 +447,25 @@ entry: ret <4 x i32> } +define arm_aapcs_vfpcc <4 x i32> @xor_int32_65536(<4 x i32> %a) { +; CHECKLE-LABEL: xor_int32_65536: +; CHECKLE: @ %bb.0: @ %entry +; CHECKLE-NEXT: vmov.i32 q1, #0x10000 +; CHECKLE-NEXT: veor q0, q0, q1 +; CHECKLE-NEXT: bx lr +; +; CHECKBE-LABEL: xor_int32_65536: +; CHECKBE: @ %bb.0: @ %entry +; CHECKBE-NEXT: vmov.i32 q1, #0x10000 +; CHECKBE-NEXT: vrev64.32 q2, q0 +; CHECKBE-NEXT: veor q1, q2, q1 +; CHECKBE-NEXT: vrev64.32 q0, q1 +; CHECKBE-NEXT: bx lr +entry: + %b = xor <4 x i32> %a, + ret <4 x i32> %b +} + define arm_aapcs_vfpcc <4 x i32> @mov_int32_16777216() { ; CHECK-LABEL: mov_int32_16777216: ; CHECK: @ %bb.0: @ %entry @@ -124,6 +475,25 @@ entry: ret <4 x i32> } +define arm_aapcs_vfpcc <4 x i32> @xor_int32_16777216(<4 x i32> %a) { +; CHECKLE-LABEL: xor_int32_16777216: +; CHECKLE: @ %bb.0: @ %entry +; CHECKLE-NEXT: vmov.i32 q1, #0x1000000 +; CHECKLE-NEXT: veor q0, q0, q1 +; CHECKLE-NEXT: bx lr +; +; CHECKBE-LABEL: xor_int32_16777216: +; CHECKBE: @ %bb.0: @ %entry +; CHECKBE-NEXT: vmov.i32 q1, #0x1000000 +; CHECKBE-NEXT: vrev64.32 q2, q0 +; CHECKBE-NEXT: veor q1, q2, q1 +; CHECKBE-NEXT: vrev64.32 q0, q1 +; CHECKBE-NEXT: bx lr +entry: + %b = xor <4 x i32> %a, + ret <4 x i32> %b +} + define arm_aapcs_vfpcc <4 x i32> @mov_int32_16777217() { ; CHECK-LABEL: mov_int32_16777217: ; CHECK: @ %bb.0: @ %entry @@ -135,6 +505,29 @@ entry: ret <4 x i32> } +define arm_aapcs_vfpcc <4 x i32> @xor_int32_16777217(<4 x i32> %a) { +; CHECKLE-LABEL: xor_int32_16777217: +; CHECKLE: @ %bb.0: @ %entry +; CHECKLE-NEXT: movs r0, #1 +; CHECKLE-NEXT: movt r0, #256 +; CHECKLE-NEXT: vdup.32 q1, r0 +; CHECKLE-NEXT: veor q0, q0, q1 +; CHECKLE-NEXT: bx lr +; +; CHECKBE-LABEL: xor_int32_16777217: +; CHECKBE: @ %bb.0: @ %entry +; CHECKBE-NEXT: movs r0, #1 +; CHECKBE-NEXT: vrev64.32 q1, q0 +; CHECKBE-NEXT: movt r0, #256 +; CHECKBE-NEXT: vdup.32 q0, r0 +; CHECKBE-NEXT: veor q1, q1, q0 +; CHECKBE-NEXT: vrev64.32 q0, q1 +; CHECKBE-NEXT: bx lr +entry: + %b = xor <4 x i32> %a, + ret <4 x i32> %b +} + define arm_aapcs_vfpcc <4 x i32> @mov_int32_17919() { ; CHECK-LABEL: mov_int32_17919: ; CHECK: @ %bb.0: @ %entry @@ -144,6 +537,25 @@ entry: ret <4 x i32> } +define arm_aapcs_vfpcc <4 x i32> @xor_int32_17919(<4 x i32> %a) { +; CHECKLE-LABEL: xor_int32_17919: +; CHECKLE: @ %bb.0: @ %entry +; CHECKLE-NEXT: vmov.i32 q1, #0x45ff +; CHECKLE-NEXT: veor q0, q0, q1 +; CHECKLE-NEXT: bx lr +; +; CHECKBE-LABEL: xor_int32_17919: +; CHECKBE: @ %bb.0: @ %entry +; CHECKBE-NEXT: vmov.i32 q1, #0x45ff +; CHECKBE-NEXT: vrev64.32 q2, q0 +; CHECKBE-NEXT: veor q1, q2, q1 +; CHECKBE-NEXT: vrev64.32 q0, q1 +; CHECKBE-NEXT: bx lr +entry: + %b = xor <4 x i32> %a, + ret <4 x i32> %b +} + define arm_aapcs_vfpcc <4 x i32> @mov_int32_4587519() { ; CHECK-LABEL: mov_int32_4587519: ; CHECK: @ %bb.0: @ %entry @@ -153,6 +565,25 @@ entry: ret <4 x i32> } +define arm_aapcs_vfpcc <4 x i32> @xor_int32_4587519(<4 x i32> %a) { +; CHECKLE-LABEL: xor_int32_4587519: +; CHECKLE: @ %bb.0: @ %entry +; CHECKLE-NEXT: vmov.i32 q1, #0x45ffff +; CHECKLE-NEXT: veor q0, q0, q1 +; CHECKLE-NEXT: bx lr +; +; CHECKBE-LABEL: xor_int32_4587519: +; CHECKBE: @ %bb.0: @ %entry +; CHECKBE-NEXT: vmov.i32 q1, #0x45ffff +; CHECKBE-NEXT: vrev64.32 q2, q0 +; CHECKBE-NEXT: veor q1, q2, q1 +; CHECKBE-NEXT: vrev64.32 q0, q1 +; CHECKBE-NEXT: bx lr +entry: + %b = xor <4 x i32> %a, + ret <4 x i32> %b +} + define arm_aapcs_vfpcc <4 x i32> @mov_int32_m1() { ; CHECK-LABEL: mov_int32_m1: ; CHECK: @ %bb.0: @ %entry @@ -162,6 +593,24 @@ entry: ret <4 x i32> } +define arm_aapcs_vfpcc <4 x i32> @xor_int32_m1(<4 x i32> %a) { +; CHECKLE-LABEL: xor_int32_m1: +; CHECKLE: @ %bb.0: @ %entry +; CHECKLE-NEXT: vmvn q0, q0 +; CHECKLE-NEXT: bx lr +; +; CHECKBE-LABEL: xor_int32_m1: +; CHECKBE: @ %bb.0: @ %entry +; CHECKBE-NEXT: vmov.i8 q1, #0xff +; CHECKBE-NEXT: vrev64.32 q2, q0 +; CHECKBE-NEXT: veor q1, q2, q1 +; CHECKBE-NEXT: vrev64.32 q0, q1 +; CHECKBE-NEXT: bx lr +entry: + %b = xor <4 x i32> %a, + ret <4 x i32> %b +} + define arm_aapcs_vfpcc <4 x i32> @mov_int32_4294901760() { ; CHECK-LABEL: mov_int32_4294901760: ; CHECK: @ %bb.0: @ %entry @@ -171,6 +620,25 @@ entry: ret <4 x i32> } +define arm_aapcs_vfpcc <4 x i32> @xor_int32_4294901760(<4 x i32> %a) { +; CHECKLE-LABEL: xor_int32_4294901760: +; CHECKLE: @ %bb.0: @ %entry +; CHECKLE-NEXT: vmvn.i32 q1, #0xffff +; CHECKLE-NEXT: veor q0, q0, q1 +; CHECKLE-NEXT: bx lr +; +; CHECKBE-LABEL: xor_int32_4294901760: +; CHECKBE: @ %bb.0: @ %entry +; CHECKBE-NEXT: vmvn.i32 q1, #0xffff +; CHECKBE-NEXT: vrev64.32 q2, q0 +; CHECKBE-NEXT: veor q1, q2, q1 +; CHECKBE-NEXT: vrev64.32 q0, q1 +; CHECKBE-NEXT: bx lr +entry: + %b = xor <4 x i32> %a, + ret <4 x i32> %b +} + define arm_aapcs_vfpcc <4 x i32> @mov_int32_4278190335() { ; CHECK-LABEL: mov_int32_4278190335: ; CHECK: @ %bb.0: @ %entry @@ -182,6 +650,29 @@ entry: ret <4 x i32> } +define arm_aapcs_vfpcc <4 x i32> @xor_int32_4278190335(<4 x i32> %a) { +; CHECKLE-LABEL: xor_int32_4278190335: +; CHECKLE: @ %bb.0: @ %entry +; CHECKLE-NEXT: movs r0, #255 +; CHECKLE-NEXT: movt r0, #65280 +; CHECKLE-NEXT: vdup.32 q1, r0 +; CHECKLE-NEXT: veor q0, q0, q1 +; CHECKLE-NEXT: bx lr +; +; CHECKBE-LABEL: xor_int32_4278190335: +; CHECKBE: @ %bb.0: @ %entry +; CHECKBE-NEXT: movs r0, #255 +; CHECKBE-NEXT: vrev64.32 q1, q0 +; CHECKBE-NEXT: movt r0, #65280 +; CHECKBE-NEXT: vdup.32 q0, r0 +; CHECKBE-NEXT: veor q1, q1, q0 +; CHECKBE-NEXT: vrev64.32 q0, q1 +; CHECKBE-NEXT: bx lr +entry: + %b = xor <4 x i32> %a, + ret <4 x i32> %b +} + define arm_aapcs_vfpcc <4 x i32> @mov_int32_4278255615() { ; CHECK-LABEL: mov_int32_4278255615: ; CHECK: @ %bb.0: @ %entry @@ -191,6 +682,25 @@ entry: ret <4 x i32> } +define arm_aapcs_vfpcc <4 x i32> @xor_int32_4278255615(<4 x i32> %a) { +; CHECKLE-LABEL: xor_int32_4278255615: +; CHECKLE: @ %bb.0: @ %entry +; CHECKLE-NEXT: vmvn.i32 q1, #0xff0000 +; CHECKLE-NEXT: veor q0, q0, q1 +; CHECKLE-NEXT: bx lr +; +; CHECKBE-LABEL: xor_int32_4278255615: +; CHECKBE: @ %bb.0: @ %entry +; CHECKBE-NEXT: vmvn.i32 q1, #0xff0000 +; CHECKBE-NEXT: vrev64.32 q2, q0 +; CHECKBE-NEXT: veor q1, q2, q1 +; CHECKBE-NEXT: vrev64.32 q0, q1 +; CHECKBE-NEXT: bx lr +entry: + %b = xor <4 x i32> %a, + ret <4 x i32> %b +} + define arm_aapcs_vfpcc <4 x i32> @mov_int32_16908546() { ; CHECK-LABEL: mov_int32_16908546: ; CHECK: @ %bb.0: @ %entry @@ -201,15 +711,70 @@ entry: ret <4 x i32> } +define arm_aapcs_vfpcc <4 x i32> @xor_int32_16908546(<4 x i32> %a) { +; CHECKLE-LABEL: xor_int32_16908546: +; CHECKLE: @ %bb.0: @ %entry +; CHECKLE-NEXT: mov.w r0, #258 +; CHECKLE-NEXT: vdup.16 q1, r0 +; CHECKLE-NEXT: veor q0, q0, q1 +; CHECKLE-NEXT: bx lr +; +; CHECKBE-LABEL: xor_int32_16908546: +; CHECKBE: @ %bb.0: @ %entry +; CHECKBE-NEXT: mov.w r0, #258 +; CHECKBE-NEXT: vrev64.32 q2, q0 +; CHECKBE-NEXT: vdup.16 q1, r0 +; CHECKBE-NEXT: veor q1, q2, q1 +; CHECKBE-NEXT: vrev64.32 q0, q1 +; CHECKBE-NEXT: bx lr +entry: + %b = xor <4 x i32> %a, + ret <4 x i32> %b +} + +define arm_aapcs_vfpcc <4 x i32> @mov_int32_64() { +; CHECKLE-LABEL: mov_int32_64: +; CHECKLE: @ %bb.0: @ %entry +; CHECKLE-NEXT: vmov.i64 q0, #0xff00ffff00ff00 +; CHECKLE-NEXT: bx lr +; +; CHECKBE-LABEL: mov_int32_64: +; CHECKBE: @ %bb.0: @ %entry +; CHECKBE-NEXT: vmov.i64 q0, #0xff00ff0000ff00ff +; CHECKBE-NEXT: bx lr +entry: + ret <4 x i32> +} + +define arm_aapcs_vfpcc <4 x i32> @xor_int32_64(<4 x i32> %a) { +; CHECKLE-LABEL: xor_int32_64: +; CHECKLE: @ %bb.0: @ %entry +; CHECKLE-NEXT: vmov.i64 q1, #0xff00ffff00ff00 +; CHECKLE-NEXT: veor q0, q0, q1 +; CHECKLE-NEXT: bx lr +; +; CHECKBE-LABEL: xor_int32_64: +; CHECKBE: @ %bb.0: @ %entry +; CHECKBE-NEXT: vmov.i64 q1, #0xff00ff0000ff00ff +; CHECKBE-NEXT: vrev64.32 q2, q1 +; CHECKBE-NEXT: vrev64.32 q1, q0 +; CHECKBE-NEXT: veor q1, q1, q2 +; CHECKBE-NEXT: vrev64.32 q0, q1 +; CHECKBE-NEXT: bx lr +entry: + %b = xor <4 x i32> %a, + ret <4 x i32> %b +} + define arm_aapcs_vfpcc <2 x i64> @mov_int64_1() { ; CHECKLE-LABEL: mov_int64_1: ; CHECKLE: @ %bb.0: @ %entry -; CHECKLE-NEXT: adr r0, .LCPI20_0 +; CHECKLE-NEXT: adr r0, .LCPI50_0 ; CHECKLE-NEXT: vldrw.u32 q0, [r0] ; CHECKLE-NEXT: bx lr ; CHECKLE-NEXT: .p2align 4 ; CHECKLE-NEXT: @ %bb.1: -; CHECKLE-NEXT: .LCPI20_0: +; CHECKLE-NEXT: .LCPI50_0: ; CHECKLE-NEXT: .long 1 @ double 4.9406564584124654E-324 ; CHECKLE-NEXT: .long 0 ; CHECKLE-NEXT: .long 1 @ double 4.9406564584124654E-324 @@ -217,13 +782,13 @@ define arm_aapcs_vfpcc <2 x i64> @mov_int64_1() { ; ; CHECKBE-LABEL: mov_int64_1: ; CHECKBE: @ %bb.0: @ %entry -; CHECKBE-NEXT: adr r0, .LCPI20_0 +; CHECKBE-NEXT: adr r0, .LCPI50_0 ; CHECKBE-NEXT: vldrb.u8 q1, [r0] ; CHECKBE-NEXT: vrev64.8 q0, q1 ; CHECKBE-NEXT: bx lr ; CHECKBE-NEXT: .p2align 4 ; CHECKBE-NEXT: @ %bb.1: -; CHECKBE-NEXT: .LCPI20_0: +; CHECKBE-NEXT: .LCPI50_0: ; CHECKBE-NEXT: .long 0 @ double 4.9406564584124654E-324 ; CHECKBE-NEXT: .long 1 ; CHECKBE-NEXT: .long 0 @ double 4.9406564584124654E-324 @@ -232,13 +797,58 @@ entry: ret <2 x i64> } +define arm_aapcs_vfpcc <2 x i64> @xor_int64_1(<2 x i64> %a) { +; CHECKLE-LABEL: xor_int64_1: +; CHECKLE: @ %bb.0: @ %entry +; CHECKLE-NEXT: adr r0, .LCPI51_0 +; CHECKLE-NEXT: vldrw.u32 q1, [r0] +; CHECKLE-NEXT: veor q0, q0, q1 +; CHECKLE-NEXT: bx lr +; CHECKLE-NEXT: .p2align 4 +; CHECKLE-NEXT: @ %bb.1: +; CHECKLE-NEXT: .LCPI51_0: +; CHECKLE-NEXT: .long 1 @ 0x1 +; CHECKLE-NEXT: .long 0 @ 0x0 +; CHECKLE-NEXT: .long 1 @ 0x1 +; CHECKLE-NEXT: .long 0 @ 0x0 +; +; CHECKBE-LABEL: xor_int64_1: +; CHECKBE: @ %bb.0: @ %entry +; CHECKBE-NEXT: adr r0, .LCPI51_0 +; CHECKBE-NEXT: vldrb.u8 q1, [r0] +; CHECKBE-NEXT: vrev64.8 q2, q1 +; CHECKBE-NEXT: veor q0, q0, q2 +; CHECKBE-NEXT: bx lr +; CHECKBE-NEXT: .p2align 4 +; CHECKBE-NEXT: @ %bb.1: +; CHECKBE-NEXT: .LCPI51_0: +; CHECKBE-NEXT: .long 0 @ 0x0 +; CHECKBE-NEXT: .long 1 @ 0x1 +; CHECKBE-NEXT: .long 0 @ 0x0 +; CHECKBE-NEXT: .long 1 @ 0x1 +entry: + %b = xor <2 x i64> %a, + ret <2 x i64> %b +} + define arm_aapcs_vfpcc <2 x i64> @mov_int64_ff() { ; CHECK-LABEL: mov_int64_ff: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vmov.i64 q0, #0xff ; CHECK-NEXT: bx lr entry: - ret <2 x i64> < i64 255, i64 255 > + ret <2 x i64> +} + +define arm_aapcs_vfpcc <2 x i64> @xor_int64_ff(<2 x i64> %a) { +; CHECK-LABEL: xor_int64_ff: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov.i64 q1, #0xff +; CHECK-NEXT: veor q0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %b = xor <2 x i64> %a, + ret <2 x i64> %b } define arm_aapcs_vfpcc <2 x i64> @mov_int64_m1() { @@ -247,7 +857,23 @@ define arm_aapcs_vfpcc <2 x i64> @mov_int64_m1() { ; CHECK-NEXT: vmov.i8 q0, #0xff ; CHECK-NEXT: bx lr entry: - ret <2 x i64> < i64 -1, i64 -1 > + ret <2 x i64> +} + +define arm_aapcs_vfpcc <2 x i64> @xor_int64_m1(<2 x i64> %a) { +; CHECKLE-LABEL: xor_int64_m1: +; CHECKLE: @ %bb.0: @ %entry +; CHECKLE-NEXT: vmvn q0, q0 +; CHECKLE-NEXT: bx lr +; +; CHECKBE-LABEL: xor_int64_m1: +; CHECKBE: @ %bb.0: @ %entry +; CHECKBE-NEXT: vmov.i8 q1, #0xff +; CHECKBE-NEXT: veor q0, q0, q1 +; CHECKBE-NEXT: bx lr +entry: + %b = xor <2 x i64> %a, + ret <2 x i64> %b } define arm_aapcs_vfpcc <2 x i64> @mov_int64_ff0000ff0000ffff() { @@ -256,18 +882,29 @@ define arm_aapcs_vfpcc <2 x i64> @mov_int64_ff0000ff0000ffff() { ; CHECK-NEXT: vmov.i64 q0, #0xff0000ff0000ffff ; CHECK-NEXT: bx lr entry: - ret <2 x i64> < i64 18374687574888349695, i64 18374687574888349695 > + ret <2 x i64> +} + +define arm_aapcs_vfpcc <2 x i64> @xor_int64_ff0000ff0000ffff(<2 x i64> %a) { +; CHECK-LABEL: xor_int64_ff0000ff0000ffff: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov.i64 q1, #0xff0000ff0000ffff +; CHECK-NEXT: veor q0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %b = xor <2 x i64> %a, + ret <2 x i64> %b } define arm_aapcs_vfpcc <2 x i64> @mov_int64_f_0() { ; CHECKLE-LABEL: mov_int64_f_0: ; CHECKLE: @ %bb.0: @ %entry -; CHECKLE-NEXT: adr r0, .LCPI24_0 +; CHECKLE-NEXT: adr r0, .LCPI58_0 ; CHECKLE-NEXT: vldrw.u32 q0, [r0] ; CHECKLE-NEXT: bx lr ; CHECKLE-NEXT: .p2align 4 ; CHECKLE-NEXT: @ %bb.1: -; CHECKLE-NEXT: .LCPI24_0: +; CHECKLE-NEXT: .LCPI58_0: ; CHECKLE-NEXT: .long 255 @ double 1.2598673968951787E-321 ; CHECKLE-NEXT: .long 0 ; CHECKLE-NEXT: .long 0 @ double 0 @@ -275,19 +912,53 @@ define arm_aapcs_vfpcc <2 x i64> @mov_int64_f_0() { ; ; CHECKBE-LABEL: mov_int64_f_0: ; CHECKBE: @ %bb.0: @ %entry -; CHECKBE-NEXT: adr r0, .LCPI24_0 +; CHECKBE-NEXT: adr r0, .LCPI58_0 ; CHECKBE-NEXT: vldrb.u8 q1, [r0] ; CHECKBE-NEXT: vrev64.8 q0, q1 ; CHECKBE-NEXT: bx lr ; CHECKBE-NEXT: .p2align 4 ; CHECKBE-NEXT: @ %bb.1: -; CHECKBE-NEXT: .LCPI24_0: +; CHECKBE-NEXT: .LCPI58_0: ; CHECKBE-NEXT: .long 0 @ double 1.2598673968951787E-321 ; CHECKBE-NEXT: .long 255 ; CHECKBE-NEXT: .long 0 @ double 0 ; CHECKBE-NEXT: .long 0 entry: - ret <2 x i64> < i64 255, i64 0 > + ret <2 x i64> +} + +define arm_aapcs_vfpcc <2 x i64> @xor_int64_f_0(<2 x i64> %a) { +; CHECKLE-LABEL: xor_int64_f_0: +; CHECKLE: @ %bb.0: @ %entry +; CHECKLE-NEXT: adr r0, .LCPI59_0 +; CHECKLE-NEXT: vldrw.u32 q1, [r0] +; CHECKLE-NEXT: veor q0, q0, q1 +; CHECKLE-NEXT: bx lr +; CHECKLE-NEXT: .p2align 4 +; CHECKLE-NEXT: @ %bb.1: +; CHECKLE-NEXT: .LCPI59_0: +; CHECKLE-NEXT: .long 255 @ 0xff +; CHECKLE-NEXT: .long 0 @ 0x0 +; CHECKLE-NEXT: .long 0 @ 0x0 +; CHECKLE-NEXT: .long 0 @ 0x0 +; +; CHECKBE-LABEL: xor_int64_f_0: +; CHECKBE: @ %bb.0: @ %entry +; CHECKBE-NEXT: adr r0, .LCPI59_0 +; CHECKBE-NEXT: vldrb.u8 q1, [r0] +; CHECKBE-NEXT: vrev64.8 q2, q1 +; CHECKBE-NEXT: veor q0, q0, q2 +; CHECKBE-NEXT: bx lr +; CHECKBE-NEXT: .p2align 4 +; CHECKBE-NEXT: @ %bb.1: +; CHECKBE-NEXT: .LCPI59_0: +; CHECKBE-NEXT: .long 0 @ 0x0 +; CHECKBE-NEXT: .long 255 @ 0xff +; CHECKBE-NEXT: .long 0 @ 0x0 +; CHECKBE-NEXT: .long 0 @ 0x0 +entry: + %b = xor <2 x i64> %a, + ret <2 x i64> %b } define arm_aapcs_vfpcc <16 x i8> @mov_int64_0f000f0f() { @@ -304,6 +975,26 @@ entry: ret <16 x i8> } +define arm_aapcs_vfpcc <16 x i8> @xor_int64_0f000f0f(<16 x i8> %a) { +; CHECKLE-LABEL: xor_int64_0f000f0f: +; CHECKLE: @ %bb.0: @ %entry +; CHECKLE-NEXT: vmov.i64 q1, #0xff000000ff00ff +; CHECKLE-NEXT: veor q0, q0, q1 +; CHECKLE-NEXT: bx lr +; +; CHECKBE-LABEL: xor_int64_0f000f0f: +; CHECKBE: @ %bb.0: @ %entry +; CHECKBE-NEXT: vmov.i64 q1, #0xff00ff000000ff00 +; CHECKBE-NEXT: vrev64.8 q2, q1 +; CHECKBE-NEXT: vrev64.8 q1, q0 +; CHECKBE-NEXT: veor q1, q1, q2 +; CHECKBE-NEXT: vrev64.8 q0, q1 +; CHECKBE-NEXT: bx lr +entry: + %b = xor <16 x i8> %a, + ret <16 x i8> %b +} + define arm_aapcs_vfpcc <8 x i16> @mov_int64_ff00ffff() { ; CHECKLE-LABEL: mov_int64_ff00ffff: ; CHECKLE: @ %bb.0: @ %entry @@ -318,6 +1009,26 @@ entry: ret <8 x i16> } +define arm_aapcs_vfpcc <8 x i16> @xor_int64_ff00ffff(<8 x i16> %a) { +; CHECKLE-LABEL: xor_int64_ff00ffff: +; CHECKLE: @ %bb.0: @ %entry +; CHECKLE-NEXT: vmov.i64 q1, #0xffffffff0000ffff +; CHECKLE-NEXT: veor q0, q0, q1 +; CHECKLE-NEXT: bx lr +; +; CHECKBE-LABEL: xor_int64_ff00ffff: +; CHECKBE: @ %bb.0: @ %entry +; CHECKBE-NEXT: vmov.i64 q1, #0xffff0000ffffffff +; CHECKBE-NEXT: vrev64.16 q2, q1 +; CHECKBE-NEXT: vrev64.16 q1, q0 +; CHECKBE-NEXT: veor q1, q1, q2 +; CHECKBE-NEXT: vrev64.16 q0, q1 +; CHECKBE-NEXT: bx lr +entry: + %b = xor <8 x i16> %a, + ret <8 x i16> %b +} + define arm_aapcs_vfpcc <16 x i8> @mov_int64_0f0f0f0f0f0f0f0f() { ; CHECKLE-LABEL: mov_int64_0f0f0f0f0f0f0f0f: ; CHECKLE: @ %bb.0: @ %entry @@ -332,6 +1043,27 @@ entry: ret <16 x i8> } +; FIXME: This is incorrect for BE +define arm_aapcs_vfpcc <16 x i8> @xor_int64_0f0f0f0f0f0f0f0f(<16 x i8> %a) { +; CHECKLE-LABEL: xor_int64_0f0f0f0f0f0f0f0f: +; CHECKLE: @ %bb.0: @ %entry +; CHECKLE-NEXT: vmov.i16 q1, #0xff +; CHECKLE-NEXT: veor q0, q0, q1 +; CHECKLE-NEXT: bx lr +; +; CHECKBE-LABEL: xor_int64_0f0f0f0f0f0f0f0f: +; CHECKBE: @ %bb.0: @ %entry +; CHECKBE-NEXT: vmov.i16 q1, #0xff +; CHECKBE-NEXT: vrev64.8 q2, q0 +; CHECKBE-NEXT: vrev16.8 q1, q1 +; CHECKBE-NEXT: veor q1, q2, q1 +; CHECKBE-NEXT: vrev64.8 q0, q1 +; CHECKBE-NEXT: bx lr +entry: + %b = xor <16 x i8> %a, + ret <16 x i8> %b +} + define arm_aapcs_vfpcc <4 x float> @mov_float_1() { ; CHECK-LABEL: mov_float_1: ; CHECK: @ %bb.0: @ %entry @@ -342,6 +1074,19 @@ entry: ret <4 x float> } +define arm_aapcs_vfpcc <4 x float> @fadd_float_1(<4 x float> %a) { +; CHECKBE-LABEL: fadd_float_1: +; CHECKBE: @ %bb.0: @ %entry +; CHECKBE-NEXT: vmov.f32 q1, #1.000000e+00 +; CHECKBE-NEXT: vrev64.32 q2, q0 +; CHECKBE-NEXT: vadd.f32 q1, q2, q1 +; CHECKBE-NEXT: vrev64.32 q0, q1 +; CHECKBE-NEXT: bx lr +entry: + %b = fadd <4 x float> %a, + ret <4 x float> %b +} + define arm_aapcs_vfpcc <4 x float> @mov_float_m3() { ; CHECK-LABEL: mov_float_m3: ; CHECK: @ %bb.0: @ %entry @@ -353,35 +1098,72 @@ entry: ret <4 x float> } +define arm_aapcs_vfpcc <4 x float> @fadd_float_m3(<4 x float> %a) { +; CHECKBE-LABEL: fadd_float_m3: +; CHECKBE: @ %bb.0: @ %entry +; CHECKBE-NEXT: vmov.f32 q1, #-3.000000e+00 +; CHECKBE-NEXT: vrev64.32 q2, q0 +; CHECKBE-NEXT: vadd.f32 q1, q2, q1 +; CHECKBE-NEXT: vrev64.32 q0, q1 +; CHECKBE-NEXT: bx lr +entry: + %b = fadd <4 x float> %a, + ret <4 x float> %b +} + define arm_aapcs_vfpcc <8 x half> @mov_float16_1() { ; CHECK-LABEL: mov_float16_1: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vmov.i16 q0, #0x3c00 ; CHECK-NEXT: bx lr - entry: ret <8 x half> } +define arm_aapcs_vfpcc <8 x half> @fadd_float16_1(<8 x half> %a) { +; CHECKBE-LABEL: fadd_float16_1: +; CHECKBE: @ %bb.0: @ %entry +; CHECKBE-NEXT: vmov.i16 q1, #0x3c00 +; CHECKBE-NEXT: vrev64.16 q2, q0 +; CHECKBE-NEXT: vadd.f16 q1, q2, q1 +; CHECKBE-NEXT: vrev64.16 q0, q1 +; CHECKBE-NEXT: bx lr +entry: + %b = fadd <8 x half> %a, + ret <8 x half> %b +} + define arm_aapcs_vfpcc <8 x half> @mov_float16_m3() { ; CHECK-LABEL: mov_float16_m3: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vmov.i16 q0, #0xc200 ; CHECK-NEXT: bx lr - entry: ret <8 x half> } +define arm_aapcs_vfpcc <8 x half> @fadd_float16_m3(<8 x half> %a) { +; CHECKBE-LABEL: fadd_float16_m3: +; CHECKBE: @ %bb.0: @ %entry +; CHECKBE-NEXT: vmov.i16 q1, #0xc200 +; CHECKBE-NEXT: vrev64.16 q2, q0 +; CHECKBE-NEXT: vadd.f16 q1, q2, q1 +; CHECKBE-NEXT: vrev64.16 q0, q1 +; CHECKBE-NEXT: bx lr +entry: + %b = fadd <8 x half> %a, + ret <8 x half> %b +} + define arm_aapcs_vfpcc <2 x double> @mov_double_1() { ; CHECKLE-LABEL: mov_double_1: ; CHECKLE: @ %bb.0: @ %entry -; CHECKLE-NEXT: adr r0, .LCPI32_0 +; CHECKLE-NEXT: adr r0, .LCPI74_0 ; CHECKLE-NEXT: vldrw.u32 q0, [r0] ; CHECKLE-NEXT: bx lr ; CHECKLE-NEXT: .p2align 4 ; CHECKLE-NEXT: @ %bb.1: -; CHECKLE-NEXT: .LCPI32_0: +; CHECKLE-NEXT: .LCPI74_0: ; CHECKLE-NEXT: .long 0 @ double 1 ; CHECKLE-NEXT: .long 1072693248 ; CHECKLE-NEXT: .long 0 @ double 1 @@ -389,13 +1171,13 @@ define arm_aapcs_vfpcc <2 x double> @mov_double_1() { ; ; CHECKBE-LABEL: mov_double_1: ; CHECKBE: @ %bb.0: @ %entry -; CHECKBE-NEXT: adr r0, .LCPI32_0 +; CHECKBE-NEXT: adr r0, .LCPI74_0 ; CHECKBE-NEXT: vldrb.u8 q1, [r0] ; CHECKBE-NEXT: vrev64.8 q0, q1 ; CHECKBE-NEXT: bx lr ; CHECKBE-NEXT: .p2align 4 ; CHECKBE-NEXT: @ %bb.1: -; CHECKBE-NEXT: .LCPI32_0: +; CHECKBE-NEXT: .LCPI74_0: ; CHECKBE-NEXT: .long 1072693248 @ double 1 ; CHECKBE-NEXT: .long 0 ; CHECKBE-NEXT: .long 1072693248 @ double 1 From 43b88851cefe68645aa59b1fccc8390a8a31f469 Mon Sep 17 00:00:00 2001 From: Max Winkler Date: Sat, 24 Aug 2024 12:25:46 -0700 Subject: [PATCH 2/3] [clang-cl] [AST] Reapply #102848 Fix placeholder return type name mangling for MSVC 1920+ / VS2019+ (#104722) Reapply https://github.com/llvm/llvm-project/pull/102848. The description in this PR will detail the changes from the reverted original PR above. For `auto&&` return types that can partake in reference collapsing we weren't properly handling that mangling that can arise. When collapsing occurs an inner reference is created with the collapsed reference type. If we return `int&` from such a function then an inner reference of `int&` is created within the `auto&&` return type. `getPointeeType` on a reference type goes through all inner references before returning the pointee type which ends up being a builtin type, `int`, which is unexpected. We can use `getPointeeTypeAsWritten` to get the `AutoType` as expected however for the instantiated template declaration reference collapsing already occurred on the return type. This means `auto&&` is turned into `auto&` in our example above. We end up mangling an lvalue reference type. This is unintended as MSVC mangles on the declaration of the return type, `auto&&` in this case, which is treated as an rvalue reference. ``` template auto&& AutoReferenceCollapseT(int& x) { return static_cast(x); } void test() { int x = 1; auto&& rref = AutoReferenceCollapseT(x); // "??$AutoReferenceCollapseT@X@@YA$$QEA_PAEAH@Z" // Mangled as an rvalue reference to auto } ``` If we are mangling a template with a placeholder return type we want to get the first template declaration and use its return type to do the mangling of any instantiations. This fixes the bug reported in the original PR that caused the revert with libcxx `std::variant`. I also tested locally with libcxx and the following test code which fails in the original PR but now works in this PR. ``` #include void test() { std::variant v{ 1 }; int& r = std::get<0>(v); (void)r; } ``` --- clang/docs/ReleaseNotes.rst | 2 + clang/lib/AST/MicrosoftMangle.cpp | 170 +++++++- .../test/CodeGenCXX/mangle-ms-auto-return.cpp | 383 ++++++++++++++++++ .../mangle-ms-auto-templates-memptrs.cpp | 12 +- .../mangle-ms-auto-templates-nullptr.cpp | 2 +- .../CodeGenCXX/mangle-ms-auto-templates.cpp | 6 +- 6 files changed, 556 insertions(+), 19 deletions(-) create mode 100644 clang/test/CodeGenCXX/mangle-ms-auto-return.cpp diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 0ced2f779f7058..6e1db41a55cbe0 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -77,6 +77,8 @@ C++ Specific Potentially Breaking Changes ABI Changes in This Version --------------------------- +- Fixed Microsoft name mangling of placeholder, auto and decltype(auto), return types for MSVC 1920+. This change resolves incompatibilities with code compiled by MSVC 1920+ but will introduce incompatibilities with code compiled by earlier versions of Clang unless such code is built with the compiler option -fms-compatibility-version=19.14 to imitate the MSVC 1914 mangling behavior. + AST Dumping Potentially Breaking Changes ---------------------------------------- diff --git a/clang/lib/AST/MicrosoftMangle.cpp b/clang/lib/AST/MicrosoftMangle.cpp index ed8d1cf1b98dd8..b539681984ef7c 100644 --- a/clang/lib/AST/MicrosoftMangle.cpp +++ b/clang/lib/AST/MicrosoftMangle.cpp @@ -408,6 +408,8 @@ class MicrosoftCXXNameMangler { void mangleSourceName(StringRef Name); void mangleNestedName(GlobalDecl GD); + void mangleAutoReturnType(QualType T, QualifierMangleMode QMM); + private: bool isStructorDecl(const NamedDecl *ND) const { return ND == Structor || getStructor(ND) == Structor; @@ -477,6 +479,11 @@ class MicrosoftCXXNameMangler { SourceRange Range); void mangleObjCKindOfType(const ObjCObjectType *T, Qualifiers Quals, SourceRange Range); + + void mangleAutoReturnType(const MemberPointerType *T, Qualifiers Quals); + void mangleAutoReturnType(const PointerType *T, Qualifiers Quals); + void mangleAutoReturnType(const LValueReferenceType *T, Qualifiers Quals); + void mangleAutoReturnType(const RValueReferenceType *T, Qualifiers Quals); }; } @@ -2494,6 +2501,57 @@ void MicrosoftCXXNameMangler::mangleAddressSpaceType(QualType T, mangleArtificialTagType(TagTypeKind::Struct, ASMangling, {"__clang"}); } +void MicrosoftCXXNameMangler::mangleAutoReturnType(QualType T, + QualifierMangleMode QMM) { + assert(getASTContext().getLangOpts().isCompatibleWithMSVC( + LangOptions::MSVC2019) && + "Cannot mangle MSVC 2017 auto return types!"); + + if (isa(T)) { + const auto *AT = T->getContainedAutoType(); + Qualifiers Quals = T.getLocalQualifiers(); + + if (QMM == QMM_Result) + Out << '?'; + if (QMM != QMM_Drop) + mangleQualifiers(Quals, false); + Out << (AT->isDecltypeAuto() ? "_T" : "_P"); + return; + } + + T = T.getDesugaredType(getASTContext()); + Qualifiers Quals = T.getLocalQualifiers(); + + switch (QMM) { + case QMM_Drop: + case QMM_Result: + break; + case QMM_Mangle: + mangleQualifiers(Quals, false); + break; + default: + llvm_unreachable("QMM_Escape unexpected"); + } + + const Type *ty = T.getTypePtr(); + switch (ty->getTypeClass()) { + case Type::MemberPointer: + mangleAutoReturnType(cast(ty), Quals); + break; + case Type::Pointer: + mangleAutoReturnType(cast(ty), Quals); + break; + case Type::LValueReference: + mangleAutoReturnType(cast(ty), Quals); + break; + case Type::RValueReference: + mangleAutoReturnType(cast(ty), Quals); + break; + default: + llvm_unreachable("Invalid type expected"); + } +} + void MicrosoftCXXNameMangler::mangleType(QualType T, SourceRange Range, QualifierMangleMode QMM) { // Don't use the canonical types. MSVC includes things like 'const' on @@ -2907,17 +2965,60 @@ void MicrosoftCXXNameMangler::mangleFunctionType(const FunctionType *T, // can differ by their calling convention and are typically deduced. So // we make sure that this type gets mangled properly. mangleType(ResultType, Range, QMM_Result); - } else if (const auto *AT = dyn_cast_or_null( - ResultType->getContainedAutoType())) { - Out << '?'; - mangleQualifiers(ResultType.getLocalQualifiers(), /*IsMember=*/false); - Out << '?'; + } else if (IsInLambda) { + if (const auto *AT = ResultType->getContainedAutoType()) { + assert(AT->getKeyword() == AutoTypeKeyword::Auto && + "should only need to mangle auto!"); + (void)AT; + Out << '?'; + mangleQualifiers(ResultType.getLocalQualifiers(), /*IsMember=*/false); + Out << '?'; + mangleSourceName(""); + Out << '@'; + } else { + Out << '@'; + } + } else if (const auto *AT = ResultType->getContainedAutoType()) { assert(AT->getKeyword() != AutoTypeKeyword::GNUAutoType && "shouldn't need to mangle __auto_type!"); - mangleSourceName(AT->isDecltypeAuto() ? "" : ""); - Out << '@'; - } else if (IsInLambda) { - Out << '@'; + + // If we have any pointer types with the clang address space extension + // then defer to the custom clang mangling to keep backwards + // compatibility. See `mangleType(const PointerType *T, Qualifiers Quals, + // SourceRange Range)` for details. + auto UseClangMangling = [](QualType ResultType) { + QualType T = ResultType; + while (isa(T.getTypePtr())) { + T = T->getPointeeType(); + if (T.getQualifiers().hasAddressSpace()) + return true; + } + return false; + }; + + if (getASTContext().getLangOpts().isCompatibleWithMSVC( + LangOptions::MSVC2019) && + !UseClangMangling(ResultType)) { + if (D && !D->getPrimaryTemplate()) { + Out << '@'; + } else { + if (D && D->getPrimaryTemplate()) { + const FunctionProtoType *FPT = D->getPrimaryTemplate() + ->getTemplatedDecl() + ->getFirstDecl() + ->getType() + ->castAs(); + ResultType = FPT->getReturnType(); + } + mangleAutoReturnType(ResultType, QMM_Result); + } + } else { + Out << '?'; + mangleQualifiers(ResultType.getLocalQualifiers(), /*IsMember=*/false); + Out << '?'; + mangleSourceName(AT->isDecltypeAuto() ? "" : ""); + Out << '@'; + } } else { if (ResultType->isVoidType()) ResultType = ResultType.getUnqualifiedType(); @@ -4220,6 +4321,57 @@ void MicrosoftMangleContextImpl::mangleStringLiteral(const StringLiteral *SL, Mangler.getStream() << '@'; } +void MicrosoftCXXNameMangler::mangleAutoReturnType(const MemberPointerType *T, + Qualifiers Quals) { + QualType PointeeType = T->getPointeeType(); + manglePointerCVQualifiers(Quals); + manglePointerExtQualifiers(Quals, PointeeType); + if (const FunctionProtoType *FPT = PointeeType->getAs()) { + Out << '8'; + mangleName(T->getClass()->castAs()->getDecl()); + mangleFunctionType(FPT, nullptr, true); + } else { + mangleQualifiers(PointeeType.getQualifiers(), true); + mangleName(T->getClass()->castAs()->getDecl()); + mangleAutoReturnType(PointeeType, QMM_Drop); + } +} + +void MicrosoftCXXNameMangler::mangleAutoReturnType(const PointerType *T, + Qualifiers Quals) { + QualType PointeeType = T->getPointeeType(); + assert(!PointeeType.getQualifiers().hasAddressSpace() && + "Unexpected address space mangling required"); + + manglePointerCVQualifiers(Quals); + manglePointerExtQualifiers(Quals, PointeeType); + + if (const FunctionProtoType *FPT = PointeeType->getAs()) { + Out << '6'; + mangleFunctionType(FPT); + } else { + mangleAutoReturnType(PointeeType, QMM_Mangle); + } +} + +void MicrosoftCXXNameMangler::mangleAutoReturnType(const LValueReferenceType *T, + Qualifiers Quals) { + QualType PointeeType = T->getPointeeType(); + assert(!Quals.hasConst() && !Quals.hasVolatile() && "unexpected qualifier!"); + Out << 'A'; + manglePointerExtQualifiers(Quals, PointeeType); + mangleAutoReturnType(PointeeType, QMM_Mangle); +} + +void MicrosoftCXXNameMangler::mangleAutoReturnType(const RValueReferenceType *T, + Qualifiers Quals) { + QualType PointeeType = T->getPointeeType(); + assert(!Quals.hasConst() && !Quals.hasVolatile() && "unexpected qualifier!"); + Out << "$$Q"; + manglePointerExtQualifiers(Quals, PointeeType); + mangleAutoReturnType(PointeeType, QMM_Mangle); +} + MicrosoftMangleContext *MicrosoftMangleContext::create(ASTContext &Context, DiagnosticsEngine &Diags, bool IsAux) { diff --git a/clang/test/CodeGenCXX/mangle-ms-auto-return.cpp b/clang/test/CodeGenCXX/mangle-ms-auto-return.cpp new file mode 100644 index 00000000000000..5b18dcc0820ee6 --- /dev/null +++ b/clang/test/CodeGenCXX/mangle-ms-auto-return.cpp @@ -0,0 +1,383 @@ +// RUN: %clang_cc1 -std=c++17 -fms-compatibility-version=19.20 -emit-llvm %s -o - -fms-extensions -fdelayed-template-parsing -triple=x86_64-pc-windows-msvc | FileCheck %s + +struct StructA {}; + +template +auto AutoT() { return T(); } + +template +const auto AutoConstT() { return T(); } + +template +volatile auto AutoVolatileT() { return T(); } + +template +const volatile auto AutoConstVolatileT() { return T(); } + +// The qualifiers of the return type should always be emitted even for void types. +// Void types usually have their qualifers stripped in the mangled name for MSVC ABI. +void test_template_auto_void() { + AutoT(); + // CHECK: call {{.*}} @"??$AutoT@X@@YA?A_PXZ" + + AutoT(); + // CHECK: call {{.*}} @"??$AutoT@$$CBX@@YA?A_PXZ" + + AutoT(); + // CHECK: call {{.*}} @"??$AutoT@$$CCX@@YA?A_PXZ" + + AutoT(); + // CHECK: call {{.*}} @"??$AutoT@$$CDX@@YA?A_PXZ" + + AutoConstT(); + // CHECK: call {{.*}} @"??$AutoConstT@X@@YA?B_PXZ" + + AutoVolatileT(); + // CHECK: call {{.*}} @"??$AutoVolatileT@X@@YA?C_PXZ" + + AutoConstVolatileT(); + // CHECK: call {{.*}} @"??$AutoConstVolatileT@X@@YA?D_PXZ" +} + +void test_template_auto_int() { + AutoT(); + // CHECK: call {{.*}} @"??$AutoT@H@@YA?A_PXZ" + + AutoT(); + // CHECK: call {{.*}} @"??$AutoT@$$CBH@@YA?A_PXZ" + + AutoT(); + // CHECK: call {{.*}} @"??$AutoT@$$CCH@@YA?A_PXZ" + + AutoT(); + // CHECK: call {{.*}} @"??$AutoT@$$CDH@@YA?A_PXZ" + + AutoConstT(); + // CHECK: call {{.*}} @"??$AutoConstT@H@@YA?B_PXZ" + + AutoVolatileT(); + // CHECK: call {{.*}} @"??$AutoVolatileT@H@@YA?C_PXZ" + + AutoConstVolatileT(); + // CHECK: call {{.*}} @"??$AutoConstVolatileT@H@@YA?D_PXZ" +} + +void test_template_auto_struct() { + AutoT(); + // CHECK: call {{.*}} @"??$AutoT@UStructA@@@@YA?A_PXZ" + + AutoT(); + // CHECK: call {{.*}} @"??$AutoT@$$CBUStructA@@@@YA?A_PXZ" + + AutoConstT(); + // CHECK: call {{.*}} @"??$AutoConstT@UStructA@@@@YA?B_PXZ" + + AutoVolatileT(); + // CHECK: call {{.*}} @"??$AutoVolatileT@UStructA@@@@YA?C_PXZ" + + AutoConstVolatileT(); + // CHECK: call {{.*}} @"??$AutoConstVolatileT@UStructA@@@@YA?D_PXZ" +} + +void test_template_auto_ptr() { + AutoT(); + // CHECK: call {{.*}} @"??$AutoT@PEAH@@YA?A_PXZ" + + AutoT(); + // CHECK: call {{.*}} @"??$AutoT@PEBH@@YA?A_PXZ" + + AutoT(); + // CHECK: call {{.*}} @"??$AutoT@QEBH@@YA?A_PXZ" + + AutoConstT(); + // CHECK: call {{.*}} @"??$AutoConstT@PEAH@@YA?B_PXZ" + + AutoVolatileT(); + // CHECK: call {{.*}} @"??$AutoVolatileT@PEAH@@YA?C_PXZ" + + AutoConstVolatileT(); + // CHECK: call {{.*}} @"??$AutoConstVolatileT@PEAH@@YA?D_PXZ" +} + +template +auto* PtrAutoT() { return T(); } + +template +const auto* PtrAutoConstT() { return T(); } + +template +volatile auto* PtrAutoVolatileT() { return T(); } + +template +const volatile auto* PtrAutoConstVolatileT() { return T(); } + +void test_template_ptr_auto() { + PtrAutoT(); + // CHECK: call {{.*}} @"??$PtrAutoT@PEAH@@YAPEA_PXZ" + + PtrAutoT(); + // CHECK: call {{.*}} @"??$PtrAutoT@PEBH@@YAPEA_PXZ" + + PtrAutoT(); + // CHECK: call {{.*}} @"??$PtrAutoT@QEBH@@YAPEA_PXZ" + + PtrAutoConstT(); + // CHECK: call {{.*}} @"??$PtrAutoConstT@PEAH@@YAPEB_PXZ" + + PtrAutoVolatileT(); + // CHECK: call {{.*}} @"??$PtrAutoVolatileT@PEAH@@YAPEC_PXZ" + + PtrAutoConstVolatileT(); + // CHECK: call {{.*}} @"??$PtrAutoConstVolatileT@PEAH@@YAPED_PXZ" +} + +int func_int(); +const int func_constint(); +void func_void(); +int* func_intptr(); + +template +auto (*FuncPtrAutoT())() { return v; } + +void test_template_func_ptr_auto() { + FuncPtrAutoT(); + // CHECK: call {{.*}} @"??$FuncPtrAutoT@P6AHXZ$1?func_int@@YAHXZ@@YAP6A?A_PXZXZ" + + FuncPtrAutoT(); + // CHECK: call {{.*}} @"??$FuncPtrAutoT@P6A?BHXZ$1?func_constint@@YA?BHXZ@@YAP6A?A_PXZXZ" + + FuncPtrAutoT(); + // CHECK: call {{.*}} @"??$FuncPtrAutoT@P6AXXZ$1?func_void@@YAXXZ@@YAP6A?A_PXZXZ" + + FuncPtrAutoT(); + // CHECK: call {{.*}} @"??$FuncPtrAutoT@P6APEAHXZ$1?func_intptr@@YAPEAHXZ@@YAP6A?A_PXZXZ" +} + +template +auto& RefAutoT(T& x) { return x; } + +template +const auto& ConstRefAutoT(T& x) { return x; } + +template +auto&& RRefAutoT(T& x) { return static_cast(x); } + +void test_template_ref_auto() { + int x; + + RefAutoT(x); + // CHECK: call {{.*}} @"??$RefAutoT@H@@YAAEA_PAEAH@Z" + + ConstRefAutoT(x); + // CHECK: call {{.*}} @"??$ConstRefAutoT@H@@YAAEB_PAEAH@Z" + + RRefAutoT(x); + // CHECK: call {{.*}} @"??$RRefAutoT@H@@YA$$QEA_PAEAH@Z" +} + +template +decltype(auto) DecltypeAutoT() { return T(); } + +template +decltype(auto) DecltypeAutoT2(T& x) { return static_cast(x); } + +void test_template_decltypeauto() { + DecltypeAutoT(); + // CHECK: call {{.*}} @"??$DecltypeAutoT@X@@YA?A_TXZ" + + DecltypeAutoT(); + // CHECK: call {{.*}} @"??$DecltypeAutoT@$$CBX@@YA?A_TXZ" + + DecltypeAutoT(); + // CHECK: call {{.*}} @"??$DecltypeAutoT@$$CCX@@YA?A_TXZ" + + DecltypeAutoT(); + // CHECK: call {{.*}} @"??$DecltypeAutoT@$$CDX@@YA?A_TXZ" + + DecltypeAutoT(); + // CHECK: call {{.*}} @"??$DecltypeAutoT@H@@YA?A_TXZ" + + DecltypeAutoT(); + // CHECK: call {{.*}} @"??$DecltypeAutoT@$$CBH@@YA?A_TXZ" + + DecltypeAutoT(); + // CHECK: call {{.*}} @"??$DecltypeAutoT@$$CCH@@YA?A_TXZ" + + DecltypeAutoT(); + // CHECK: call {{.*}} @"??$DecltypeAutoT@$$CDH@@YA?A_TXZ" + + int x; + + DecltypeAutoT2(x); + // CHECK: call {{.*}} @"??$DecltypeAutoT2@H@@YA?A_TAEAH@Z" +} + +// Still want to use clang's custom mangling for lambdas to keep backwards compatibility until +// MSVC lambda name mangling has been deciphered. +void test_lambda() { + auto lambdaIntRetAuto = []() { return 0; }; + lambdaIntRetAuto(); + // CHECK: call {{.*}} @"??R@?0??test_lambda@@YAXXZ@QEBA?A?@@XZ" + + auto lambdaIntRet = []() -> int { return 0; }; + lambdaIntRet(); + // CHECK: call {{.*}} @"??R@?0??test_lambda@@YAXXZ@QEBA@XZ" + + auto lambdaGenericIntIntRetAuto = [](auto a) { return a; }; + lambdaGenericIntIntRetAuto(0); + // CHECK: call {{.*}} @"??$?RH@@?0??test_lambda@@YAXXZ@QEBA?A?@@H@Z" +} + +auto TestTrailingInt() -> int { + return 0; +} + +auto TestTrailingConstVolatileVoid() -> const volatile void { +} + +auto TestTrailingStructA() -> StructA { + return StructA{}; +} + +void test_trailing_return() { + TestTrailingInt(); + // CHECK: call {{.*}} @"?TestTrailingInt@@YAHXZ" + + TestTrailingConstVolatileVoid(); + // CHECK: call {{.*}} @"?TestTrailingConstVolatileVoid@@YAXXZ" + + TestTrailingStructA(); + // CHECK: call {{.*}} @"?TestTrailingStructA@@YA?AUStructA@@XZ" +} + +auto TestNonTemplateAutoInt() { + return 0; +} + +auto TestNonTemplateAutoVoid() { + return; +} + +auto TestNonTemplateAutoStructA() { + return StructA{}; +} + +const auto TestNonTemplateConstAutoInt() { + return 0; +} + +const auto TestNonTemplateConstAutoVoid() { + return; +} + +const auto TestNonTemplateConstAutoStructA() { + return StructA{}; +} + +void test_nontemplate_auto() { + TestNonTemplateAutoInt(); + // CHECK: call {{.*}} @"?TestNonTemplateAutoInt@@YA@XZ" + + TestNonTemplateAutoVoid(); + // CHECK: call {{.*}} @"?TestNonTemplateAutoVoid@@YA@XZ" + + TestNonTemplateAutoStructA(); + // CHECK: call {{.*}} @"?TestNonTemplateAutoStructA@@YA@XZ" + + TestNonTemplateConstAutoInt(); + // CHECK: call {{.*}} @"?TestNonTemplateConstAutoInt@@YA@XZ" + + TestNonTemplateConstAutoVoid(); + // CHECK: call {{.*}} @"?TestNonTemplateConstAutoVoid@@YA@XZ" + + TestNonTemplateConstAutoStructA(); + // CHECK: call {{.*}} @"?TestNonTemplateConstAutoStructA@@YA@XZ" +} + +decltype(auto) TestNonTemplateDecltypeAutoInt() { + return 0; +} + +decltype(auto) TestNonTemplateDecltypeAutoVoid() { + return; +} + +decltype(auto) TestNonTemplateDecltypeAutoStructA() { + return StructA{}; +} + +void test_nontemplate_decltypeauto() { + TestNonTemplateDecltypeAutoInt(); + // CHECK: call {{.*}} @"?TestNonTemplateDecltypeAutoInt@@YA@XZ" + + TestNonTemplateDecltypeAutoVoid(); + // CHECK: call {{.*}} @"?TestNonTemplateDecltypeAutoVoid@@YA@XZ" + + TestNonTemplateDecltypeAutoStructA(); + // CHECK: call {{.*}} @"?TestNonTemplateDecltypeAutoStructA@@YA@XZ" +} + +struct StructB { + int x; +}; + +template +auto StructB::* AutoMemberDataPtrT(T x) { return x; } + +template +const auto StructB::* AutoConstMemberDataPtrT(T x) { return x; } + +void test_template_auto_member_data_ptr() { + AutoMemberDataPtrT(&StructB::x); + // CHECK: call {{.*}} @"??$AutoMemberDataPtrT@PEQStructB@@H@@YAPEQStructB@@_PPEQ0@H@Z" + + AutoConstMemberDataPtrT(&StructB::x); + // CHECK: call {{.*}} @"??$AutoConstMemberDataPtrT@PEQStructB@@H@@YAPERStructB@@_PPEQ0@H@Z" +} + +struct StructC { + void test() {} +}; + +struct StructD { + const int test() { return 0; } +}; + +template +auto (StructC::*AutoMemberFuncPtrT(T x))() { return x; } + +template +const auto (StructD::*AutoConstMemberFuncPtrT(T x))() { return x; } + +void test_template_auto_member_func_ptr() { + AutoMemberFuncPtrT(&StructC::test); + // CHECK: call {{.*}} @"??$AutoMemberFuncPtrT@P8StructC@@EAAXXZ@@YAP8StructC@@EAA?A_PXZP80@EAAXXZ@Z" + + AutoConstMemberFuncPtrT(&StructD::test); + // CHECK: call {{.*}} @"??$AutoConstMemberFuncPtrT@P8StructD@@EAA?BHXZ@@YAP8StructD@@EAA?B_PXZP80@EAA?BHXZ@Z" +} + +template +auto * __attribute__((address_space(1))) * AutoPtrAddressSpaceT() { + T * __attribute__((address_space(1))) * p = nullptr; + return p; +} + +void test_template_auto_address_space_ptr() { + AutoPtrAddressSpaceT(); + // CHECK: call {{.*}} @"??$AutoPtrAddressSpaceT@H@@YA?A?@@XZ" +} + +template +auto&& AutoReferenceCollapseT(T& x) { return static_cast(x); } + +auto&& AutoReferenceCollapse(int& x) { return static_cast(x); } + +void test2() { + int x = 1; + auto&& rref0 = AutoReferenceCollapseT(x); + // CHECK: call {{.*}} @"??$AutoReferenceCollapseT@H@@YA$$QEA_PAEAH@Z" + + auto&& rref1 = AutoReferenceCollapse(x); + // CHECK: call {{.*}} @"?AutoReferenceCollapse@@YA@AEAH@Z" +} diff --git a/clang/test/CodeGenCXX/mangle-ms-auto-templates-memptrs.cpp b/clang/test/CodeGenCXX/mangle-ms-auto-templates-memptrs.cpp index 360ebdecc5562b..b7bc3953f0b438 100644 --- a/clang/test/CodeGenCXX/mangle-ms-auto-templates-memptrs.cpp +++ b/clang/test/CodeGenCXX/mangle-ms-auto-templates-memptrs.cpp @@ -34,15 +34,15 @@ void template_mangling() { // BEFORE: call {{.*}} @"??0?$AutoParmTemplate@$I?f@V@@QEAAXXZA@A@@@QEAA@XZ" AutoFunc<&S::f>(); - // AFTER: call {{.*}} @"??$AutoFunc@$MP8S@@EAAXXZ1?f@1@QEAAXXZ@@YA?A?@@XZ" + // AFTER: call {{.*}} @"??$AutoFunc@$MP8S@@EAAXXZ1?f@1@QEAAXXZ@@YA?A_PXZ" // BEFORE: call {{.*}} @"??$AutoFunc@$1?f@S@@QEAAXXZ@@YA?A?@@XZ" AutoFunc<&M::f>(); - // AFTER: call {{.*}} @"??$AutoFunc@$MP8M@@EAAXXZH?f@1@QEAAXXZA@@@YA?A?@@XZ" + // AFTER: call {{.*}} @"??$AutoFunc@$MP8M@@EAAXXZH?f@1@QEAAXXZA@@@YA?A_PXZ" // BEFORE: call {{.*}} @"??$AutoFunc@$H?f@M@@QEAAXXZA@@@YA?A?@@XZ" AutoFunc<&V::f>(); - // AFTER: call {{.*}} @"??$AutoFunc@$MP8V@@EAAXXZI?f@1@QEAAXXZA@A@@@YA?A?@@XZ" + // AFTER: call {{.*}} @"??$AutoFunc@$MP8V@@EAAXXZI?f@1@QEAAXXZA@A@@@YA?A_PXZ" // BEFORE: call {{.*}} @"??$AutoFunc@$I?f@V@@QEAAXXZA@A@@@YA?A?@@XZ" AutoParmTemplate<&S::a> auto_data_single_inheritance; @@ -58,14 +58,14 @@ void template_mangling() { // BEFORE: call {{.*}} @"??0?$AutoParmTemplate@$FBA@A@@@QEAA@XZ" AutoFunc<&S::a>(); - // AFTER: call {{.*}} @"??$AutoFunc@$MPEQS@@H07@@YA?A?@@XZ" + // AFTER: call {{.*}} @"??$AutoFunc@$MPEQS@@H07@@YA?A_PXZ" // BEFORE: call {{.*}} @"??$AutoFunc@$07@@YA?A?@@XZ" AutoFunc<&M::a>(); - // AFTER: call {{.*}} @"??$AutoFunc@$MPEQM@@H0M@@@YA?A?@@XZ" + // AFTER: call {{.*}} @"??$AutoFunc@$MPEQM@@H0M@@@YA?A_PXZ" // BEFORE: call {{.*}} @"??$AutoFunc@$0M@@@YA?A?@@XZ" AutoFunc<&V::a>(); - // AFTER: call {{.*}} @"??$AutoFunc@$MPEQV@@HFBA@A@@@YA?A?@@XZ" + // AFTER: call {{.*}} @"??$AutoFunc@$MPEQV@@HFBA@A@@@YA?A_PXZ" // BEFORE: call {{.*}} @"??$AutoFunc@$FBA@A@@@YA?A?@@XZ" } diff --git a/clang/test/CodeGenCXX/mangle-ms-auto-templates-nullptr.cpp b/clang/test/CodeGenCXX/mangle-ms-auto-templates-nullptr.cpp index 8f98c1e59f73d7..251d9219c01ce2 100644 --- a/clang/test/CodeGenCXX/mangle-ms-auto-templates-nullptr.cpp +++ b/clang/test/CodeGenCXX/mangle-ms-auto-templates-nullptr.cpp @@ -19,6 +19,6 @@ void template_mangling() { // BEFORE: call {{.*}} @"??0?$AutoParmTemplate@$0A@@@QEAA@XZ" AutoFunc(); - // AFTER: call {{.*}} @"??$AutoFunc@$M$$T0A@@@YA?A?@@XZ" + // AFTER: call {{.*}} @"??$AutoFunc@$M$$T0A@@@YA?A_PXZ" // BEFORE: call {{.*}} @"??$AutoFunc@$0A@@@YA?A?@@XZ" } diff --git a/clang/test/CodeGenCXX/mangle-ms-auto-templates.cpp b/clang/test/CodeGenCXX/mangle-ms-auto-templates.cpp index ff5395cea75eb7..effcc31ee31103 100644 --- a/clang/test/CodeGenCXX/mangle-ms-auto-templates.cpp +++ b/clang/test/CodeGenCXX/mangle-ms-auto-templates.cpp @@ -26,7 +26,7 @@ int j; void template_mangling() { AutoFunc<1>(); - // AFTER: call {{.*}} @"??$AutoFunc@$MH00@@YA?A?@@XZ" + // AFTER: call {{.*}} @"??$AutoFunc@$MH00@@YA?A_PXZ" // BEFORE: call {{.*}} @"??$AutoFunc@$00@@YA?A?@@XZ" AutoParmTemplate<0> auto_int; // AFTER: call {{.*}} @"??0?$AutoParmTemplate@$MH0A@@@QEAA@XZ" @@ -52,7 +52,7 @@ void template_mangling() { // BEFORE: call {{.*}} @"??0?$AutoParmsTemplate@$00$0HPPPPPPPPPPPPPPP@@@QEAA@XZ" AutoFunc<&i>(); - // AFTER: call {{.*}} @"??$AutoFunc@$MPEAH1?i@@3HA@@YA?A?@@XZ" + // AFTER: call {{.*}} @"??$AutoFunc@$MPEAH1?i@@3HA@@YA?A_PXZ" // BEFORE: call {{.*}} @"??$AutoFunc@$1?i@@3HA@@YA?A?@@XZ" AutoParmTemplate<&i> auto_int_ptr; @@ -64,7 +64,7 @@ void template_mangling() { // BEFORE: call {{.*}} @"??0?$AutoParmsTemplate@$1?i@@3HA$1?j@@3HA@@QEAA@XZ" AutoFunc<&Func>(); - // AFTER: call {{.*}} @"??$AutoFunc@$MP6AHXZ1?Func@@YAHXZ@@YA?A?@@XZ" + // AFTER: call {{.*}} @"??$AutoFunc@$MP6AHXZ1?Func@@YAHXZ@@YA?A_PXZ" // BEFORE: call {{.*}} @"??$AutoFunc@$1?Func@@YAHXZ@@YA?A?@@XZ" AutoParmTemplate<&Func> auto_func_ptr; From 77fccb35ac08f66d52bb152735e27572bf9f3f93 Mon Sep 17 00:00:00 2001 From: hanbeom Date: Sun, 25 Aug 2024 04:30:40 +0900 Subject: [PATCH 3/3] [AArch64] Replace AND with LSL#2 for LDR target (#34101) (#89531) Currently, process of replacing bitwise operations consisting of `LSR`/`LSL` with `And` is performed by `DAGCombiner`. However, in certain cases, the `AND` generated by this process can be removed. Consider following case: ``` lsr x8, x8, #56 and x8, x8, #0xfc ldr w0, [x2, x8] ret ``` In this case, we can remove the `AND` by changing the target of `LDR` to `[X2, X8, LSL #2]` and right-shifting amount change to 56 to 58. after changed: ``` lsr x8, x8, #58 ldr w0, [x2, x8, lsl #2] ret ``` This patch checks to see if the `SHIFTING` + `AND` operation on load target can be optimized and optimizes it if it can. --- .../Target/AArch64/AArch64ISelLowering.cpp | 17 +++ llvm/test/CodeGen/AArch64/arm64-fold-lshr.ll | 138 ++++++++++++++++++ 2 files changed, 155 insertions(+) create mode 100644 llvm/test/CodeGen/AArch64/arm64-fold-lshr.ll diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 8c2f85657ff87e..5ac5b7f8a5ab18 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -18023,6 +18023,23 @@ bool AArch64TargetLowering::shouldFoldConstantShiftPairToMask( return (!C1 || !C2 || C1->getZExtValue() >= C2->getZExtValue()); } + // We do not need to fold when this shifting used in specific load case: + // (ldr x, (add x, (shl (srl x, c1) 2))) + if (N->getOpcode() == ISD::SHL && N->hasOneUse()) { + if (auto C2 = dyn_cast(N->getOperand(1))) { + unsigned ShlAmt = C2->getZExtValue(); + if (auto ShouldADD = *N->use_begin(); + ShouldADD->getOpcode() == ISD::ADD && ShouldADD->hasOneUse()) { + if (auto ShouldLOAD = dyn_cast(*ShouldADD->use_begin())) { + unsigned ByteVT = ShouldLOAD->getMemoryVT().getSizeInBits() / 8; + if ((1ULL << ShlAmt) == ByteVT && + isIndexedLoadLegal(ISD::PRE_INC, ShouldLOAD->getMemoryVT())) + return false; + } + } + } + } + return true; } diff --git a/llvm/test/CodeGen/AArch64/arm64-fold-lshr.ll b/llvm/test/CodeGen/AArch64/arm64-fold-lshr.ll new file mode 100644 index 00000000000000..9dfc8df703ce64 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/arm64-fold-lshr.ll @@ -0,0 +1,138 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc < %s -mtriple=aarch64 | FileCheck %s +; + +define i16 @load16_shr63(i64 %a, i64 %b, ptr %table) { +; CHECK-LABEL: load16_shr63: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mul x8, x1, x0 +; CHECK-NEXT: lsr x8, x8, #63 +; CHECK-NEXT: ldrh w0, [x2, x8, lsl #1] +; CHECK-NEXT: ret +entry: + %mul = mul i64 %b, %a + %shr = lshr i64 %mul, 63 + %arrayidx = getelementptr inbounds i16, ptr %table, i64 %shr + %0 = load i16, ptr %arrayidx, align 2 + ret i16 %0 +} + +define i16 @load16_shr2(i64 %a, i64 %b, ptr %table) { +; CHECK-LABEL: load16_shr2: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mul x8, x1, x0 +; CHECK-NEXT: lsr x8, x8, #2 +; CHECK-NEXT: ldrh w0, [x2, x8, lsl #1] +; CHECK-NEXT: ret +entry: + %mul = mul i64 %b, %a + %shr = lshr i64 %mul, 2 + %arrayidx = getelementptr inbounds i16, ptr %table, i64 %shr + %0 = load i16, ptr %arrayidx, align 2 + ret i16 %0 +} + +define i16 @load16_shr1(i64 %a, i64 %b, ptr %table) { +; CHECK-LABEL: load16_shr1: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mul x8, x1, x0 +; CHECK-NEXT: lsr x8, x8, #1 +; CHECK-NEXT: ldrh w0, [x2, x8, lsl #1] +; CHECK-NEXT: ret +entry: + %mul = mul i64 %b, %a + %shr = lshr i64 %mul, 1 + %arrayidx = getelementptr inbounds i16, ptr %table, i64 %shr + %0 = load i16, ptr %arrayidx, align 2 + ret i16 %0 +} + +define i32 @load32_shr63(i64 %a, i64 %b, ptr %table) { +; CHECK-LABEL: load32_shr63: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mul x8, x1, x0 +; CHECK-NEXT: lsr x8, x8, #63 +; CHECK-NEXT: ldr w0, [x2, x8, lsl #2] +; CHECK-NEXT: ret +entry: + %mul = mul i64 %b, %a + %shr = lshr i64 %mul, 63 + %arrayidx = getelementptr inbounds i32, ptr %table, i64 %shr + %0 = load i32, ptr %arrayidx, align 4 + ret i32 %0 +} + +define i32 @load32_shr2(i64 %a, i64 %b, ptr %table) { +; CHECK-LABEL: load32_shr2: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mul x8, x1, x0 +; CHECK-NEXT: lsr x8, x8, #2 +; CHECK-NEXT: ldr w0, [x2, x8, lsl #2] +; CHECK-NEXT: ret +entry: + %mul = mul i64 %b, %a + %shr = lshr i64 %mul, 2 + %arrayidx = getelementptr inbounds i32, ptr %table, i64 %shr + %0 = load i32, ptr %arrayidx, align 4 + ret i32 %0 +} + +define i32 @load32_shr1(i64 %a, i64 %b, ptr %table) { +; CHECK-LABEL: load32_shr1: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mul x8, x1, x0 +; CHECK-NEXT: lsr x8, x8, #1 +; CHECK-NEXT: ldr w0, [x2, x8, lsl #2] +; CHECK-NEXT: ret +entry: + %mul = mul i64 %b, %a + %shr = lshr i64 %mul, 1 + %arrayidx = getelementptr inbounds i32, ptr %table, i64 %shr + %0 = load i32, ptr %arrayidx, align 4 + ret i32 %0 +} + +define i64 @load64_shr63(i64 %a, i64 %b, ptr %table) { +; CHECK-LABEL: load64_shr63: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mul x8, x1, x0 +; CHECK-NEXT: lsr x8, x8, #63 +; CHECK-NEXT: ldr x0, [x2, x8, lsl #3] +; CHECK-NEXT: ret +entry: + %mul = mul i64 %b, %a + %shr = lshr i64 %mul, 63 + %arrayidx = getelementptr inbounds i64, ptr %table, i64 %shr + %0 = load i64, ptr %arrayidx, align 8 + ret i64 %0 +} + +define i64 @load64_shr2(i64 %a, i64 %b, ptr %table) { +; CHECK-LABEL: load64_shr2: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mul x8, x1, x0 +; CHECK-NEXT: lsr x8, x8, #2 +; CHECK-NEXT: ldr x0, [x2, x8, lsl #3] +; CHECK-NEXT: ret +entry: + %mul = mul i64 %b, %a + %shr = lshr i64 %mul, 2 + %arrayidx = getelementptr inbounds i64, ptr %table, i64 %shr + %0 = load i64, ptr %arrayidx, align 8 + ret i64 %0 +} + +define i64 @load64_shr1(i64 %a, i64 %b, ptr %table) { +; CHECK-LABEL: load64_shr1: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mul x8, x1, x0 +; CHECK-NEXT: lsr x8, x8, #1 +; CHECK-NEXT: ldr x0, [x2, x8, lsl #3] +; CHECK-NEXT: ret +entry: + %mul = mul i64 %b, %a + %shr = lshr i64 %mul, 1 + %arrayidx = getelementptr inbounds i64, ptr %table, i64 %shr + %0 = load i64, ptr %arrayidx, align 8 + ret i64 %0 +}