From a15144f2baed59bd72df6c35f7cbe5547b84a008 Mon Sep 17 00:00:00 2001 From: Vladislav Dzhidzhoev Date: Mon, 28 Aug 2023 19:41:35 +0200 Subject: [PATCH] [AArch64][GlobalISel] Lower G_EXTRACT_VECTOR_ELT with variable indices G_EXTRACT_VECTOR_ELT instructions with non-constant indices are not selected, so they need to be lowered. Fixes https://github.com/llvm/llvm-project/issues/65049. Reviewed By: Peter Differential Revision: https://reviews.llvm.org/D159096 --- .../AArch64/GISel/AArch64LegalizerInfo.cpp | 18 +- .../AArch64/GISel/AArch64LegalizerInfo.h | 2 + .../legalize-extract-vector-elt.mir | 281 +++++++++++++- .../AArch64/arm64-extract-insert-varidx.ll | 365 ++++++++++++++++++ 4 files changed, 643 insertions(+), 23 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/arm64-extract-insert-varidx.ll diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp index 17b5490a914b27..e2df8fb1321df8 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp @@ -696,12 +696,11 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) return Query.Types[0] != EltTy; }) .minScalar(2, s64) - .legalIf([=](const LegalityQuery &Query) { + .customIf([=](const LegalityQuery &Query) { const LLT &VecTy = Query.Types[1]; return VecTy == v2s16 || VecTy == v4s16 || VecTy == v8s16 || VecTy == v4s32 || VecTy == v2s64 || VecTy == v2s32 || - VecTy == v8s8 || VecTy == v16s8 || VecTy == v2s32 || - VecTy == v2p0; + VecTy == v8s8 || VecTy == v16s8 || VecTy == v2p0; }) .minScalarOrEltIf( [=](const LegalityQuery &Query) { @@ -1022,6 +1021,8 @@ bool AArch64LegalizerInfo::legalizeCustom(LegalizerHelper &Helper, return legalizeMemOps(MI, Helper); case TargetOpcode::G_FCOPYSIGN: return legalizeFCopySign(MI, Helper); + case TargetOpcode::G_EXTRACT_VECTOR_ELT: + return legalizeExtractVectorElt(MI, MRI, Helper); } llvm_unreachable("expected switch to return"); @@ -1801,3 +1802,14 @@ bool AArch64LegalizerInfo::legalizeFCopySign(MachineInstr &MI, MI.eraseFromParent(); return true; } + +bool AArch64LegalizerInfo::legalizeExtractVectorElt( + MachineInstr &MI, MachineRegisterInfo &MRI, LegalizerHelper &Helper) const { + assert(MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT); + auto VRegAndVal = + getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI); + if (VRegAndVal) + return true; + return Helper.lowerExtractInsertVectorElt(MI) != + LegalizerHelper::LegalizeResult::UnableToLegalize; +} diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h index 13a68b83c54ee1..853d5a2305ac68 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h +++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h @@ -62,6 +62,8 @@ class AArch64LegalizerInfo : public LegalizerInfo { bool legalizeCTTZ(MachineInstr &MI, LegalizerHelper &Helper) const; bool legalizeMemOps(MachineInstr &MI, LegalizerHelper &Helper) const; bool legalizeFCopySign(MachineInstr &MI, LegalizerHelper &Helper) const; + bool legalizeExtractVectorElt(MachineInstr &MI, MachineRegisterInfo &MRI, + LegalizerHelper &Helper) const; const AArch64Subtarget *ST; }; } // End llvm namespace. diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-extract-vector-elt.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-extract-vector-elt.mir index 22c2a479363762..2209287284b711 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-extract-vector-elt.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-extract-vector-elt.mir @@ -22,18 +22,18 @@ body: | name: test_eve_v2s1 body: | bb.0: - liveins: $q0, $q1, $x0 + liveins: $q0, $q1 ; CHECK-LABEL: name: test_eve_v2s1 ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $q1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $x0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(<2 x s64>) = G_ICMP intpred(eq), [[COPY]](<2 x s64>), [[COPY1]] - ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[ICMP]](<2 x s64>), [[COPY2]](s64) + ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[ICMP]](<2 x s64>), [[C]](s64) ; CHECK-NEXT: $x0 = COPY [[EVEC]](s64) ; CHECK-NEXT: RET_ReallyLR %0:_(<2 x s64>) = COPY $q0 %1:_(<2 x s64>) = COPY $q1 - %2:_(s64) = COPY $x0 + %2:_(s64) = G_CONSTANT i64 1 %3:_(<2 x s1>) = G_ICMP intpred(eq), %0(<2 x s64>), %1 %4:_(s1) = G_EXTRACT_VECTOR_ELT %3:_(<2 x s1>), %2:_(s64) %5:_(s64) = G_ANYEXT %4(s1) @@ -44,19 +44,19 @@ body: | name: test_eve_v4s1 body: | bb.0: - liveins: $q0, $q1, $x0 + liveins: $q0, $q1 ; CHECK-LABEL: name: test_eve_v4s1 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $x0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(<4 x s32>) = G_ICMP intpred(eq), [[COPY]](<4 x s32>), [[COPY1]] - ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[ICMP]](<4 x s32>), [[COPY2]](s64) + ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[ICMP]](<4 x s32>), [[C]](s64) ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[EVEC]](s32) ; CHECK-NEXT: $x0 = COPY [[ANYEXT]](s64) ; CHECK-NEXT: RET_ReallyLR %0:_(<4 x s32>) = COPY $q0 %1:_(<4 x s32>) = COPY $q1 - %2:_(s64) = COPY $x0 + %2:_(s64) = G_CONSTANT i64 1 %3:_(<4 x s1>) = G_ICMP intpred(eq), %0(<4 x s32>), %1 %4:_(s1) = G_EXTRACT_VECTOR_ELT %3:_(<4 x s1>), %2:_(s64) %5:_(s64) = G_ANYEXT %4(s1) @@ -67,19 +67,19 @@ body: | name: test_eve_v8s1 body: | bb.0: - liveins: $q0, $q1, $x0 + liveins: $q0, $q1 ; CHECK-LABEL: name: test_eve_v8s1 ; CHECK: [[COPY:%[0-9]+]]:_(<8 x s16>) = COPY $q0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s16>) = COPY $q1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $x0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(<8 x s16>) = G_ICMP intpred(eq), [[COPY]](<8 x s16>), [[COPY1]] - ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[ICMP]](<8 x s16>), [[COPY2]](s64) + ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[ICMP]](<8 x s16>), [[C]](s64) ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[EVEC]](s16) ; CHECK-NEXT: $x0 = COPY [[ANYEXT]](s64) ; CHECK-NEXT: RET_ReallyLR %0:_(<8 x s16>) = COPY $q0 %1:_(<8 x s16>) = COPY $q1 - %2:_(s64) = COPY $x0 + %2:_(s64) = G_CONSTANT i64 1 %3:_(<8 x s1>) = G_ICMP intpred(eq), %0(<8 x s16>), %1 %4:_(s1) = G_EXTRACT_VECTOR_ELT %3:_(<8 x s1>), %2:_(s64) %5:_(s64) = G_ANYEXT %4(s1) @@ -90,19 +90,19 @@ body: | name: test_eve_v16s1 body: | bb.0: - liveins: $q0, $q1, $x0 + liveins: $q0, $q1 ; CHECK-LABEL: name: test_eve_v16s1 ; CHECK: [[COPY:%[0-9]+]]:_(<16 x s8>) = COPY $q0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<16 x s8>) = COPY $q1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $x0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(<16 x s8>) = G_ICMP intpred(eq), [[COPY]](<16 x s8>), [[COPY1]] - ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[ICMP]](<16 x s8>), [[COPY2]](s64) + ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[ICMP]](<16 x s8>), [[C]](s64) ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[EVEC]](s8) ; CHECK-NEXT: $x0 = COPY [[ANYEXT]](s64) ; CHECK-NEXT: RET_ReallyLR %0:_(<16 x s8>) = COPY $q0 %1:_(<16 x s8>) = COPY $q1 - %2:_(s64) = COPY $x0 + %2:_(s64) = G_CONSTANT i64 1 %3:_(<16 x s1>) = G_ICMP intpred(eq), %0(<16 x s8>), %1 %4:_(s1) = G_EXTRACT_VECTOR_ELT %3:_(<16 x s1>), %2:_(s64) %5:_(s64) = G_ANYEXT %4(s1) @@ -113,15 +113,15 @@ body: | name: test_eve_v2p0 body: | bb.0: - liveins: $q0, $q1, $x0 + liveins: $q0, $q1 ; CHECK-LABEL: name: test_eve_v2p0 ; CHECK: [[COPY:%[0-9]+]]:_(<2 x p0>) = COPY $q0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x0 - ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(p0) = G_EXTRACT_VECTOR_ELT [[COPY]](<2 x p0>), [[COPY1]](s64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(p0) = G_EXTRACT_VECTOR_ELT [[COPY]](<2 x p0>), [[C]](s64) ; CHECK-NEXT: $x0 = COPY [[EVEC]](p0) ; CHECK-NEXT: RET_ReallyLR %0:_(<2 x p0>) = COPY $q0 - %2:_(s64) = COPY $x0 + %2:_(s64) = G_CONSTANT i64 1 %3:_(p0) = G_EXTRACT_VECTOR_ELT %0:_(<2 x p0>), %2:_(s64) $x0 = COPY %3(p0) RET_ReallyLR @@ -147,6 +147,150 @@ body: | RET_ReallyLR ... --- +name: test_eve_v2s1_unknown_idx +body: | + bb.0: + liveins: $q0, $q1, $x0 + ; CHECK-LABEL: name: test_eve_v2s1_unknown_idx + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $q1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $x0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(<2 x s64>) = G_ICMP intpred(eq), [[COPY]](<2 x s64>), [[COPY1]] + ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0 + ; CHECK-NEXT: G_STORE [[ICMP]](<2 x s64>), [[FRAME_INDEX]](p0) :: (store (<2 x s64>) into %stack.0) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY2]], [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[AND]], [[C1]] + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[FRAME_INDEX]], [[MUL]](s64) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p0) :: (load (s64)) + ; CHECK-NEXT: $x0 = COPY [[LOAD]](s64) + ; CHECK-NEXT: RET_ReallyLR + %0:_(<2 x s64>) = COPY $q0 + %1:_(<2 x s64>) = COPY $q1 + %2:_(s64) = COPY $x0 + %3:_(<2 x s1>) = G_ICMP intpred(eq), %0(<2 x s64>), %1 + %4:_(s1) = G_EXTRACT_VECTOR_ELT %3:_(<2 x s1>), %2:_(s64) + %5:_(s64) = G_ANYEXT %4(s1) + $x0 = COPY %5(s64) + RET_ReallyLR +... +--- +name: test_eve_v4s1_unknown_idx +body: | + bb.0: + liveins: $q0, $q1, $x0 + ; CHECK-LABEL: name: test_eve_v4s1_unknown_idx + ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $x0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(<4 x s32>) = G_ICMP intpred(eq), [[COPY]](<4 x s32>), [[COPY1]] + ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0 + ; CHECK-NEXT: G_STORE [[ICMP]](<4 x s32>), [[FRAME_INDEX]](p0) :: (store (<4 x s32>) into %stack.0) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY2]], [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[AND]], [[C1]] + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[FRAME_INDEX]], [[MUL]](s64) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32)) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; CHECK-NEXT: $x0 = COPY [[ANYEXT]](s64) + ; CHECK-NEXT: RET_ReallyLR + %0:_(<4 x s32>) = COPY $q0 + %1:_(<4 x s32>) = COPY $q1 + %2:_(s64) = COPY $x0 + %3:_(<4 x s1>) = G_ICMP intpred(eq), %0(<4 x s32>), %1 + %4:_(s1) = G_EXTRACT_VECTOR_ELT %3:_(<4 x s1>), %2:_(s64) + %5:_(s64) = G_ANYEXT %4(s1) + $x0 = COPY %5(s64) + RET_ReallyLR +... +--- +name: test_eve_v8s1_unknown_idx +body: | + bb.0: + liveins: $q0, $q1, $x0 + ; CHECK-LABEL: name: test_eve_v8s1_unknown_idx + ; CHECK: [[COPY:%[0-9]+]]:_(<8 x s16>) = COPY $q0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s16>) = COPY $q1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $x0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(<8 x s16>) = G_ICMP intpred(eq), [[COPY]](<8 x s16>), [[COPY1]] + ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0 + ; CHECK-NEXT: G_STORE [[ICMP]](<8 x s16>), [[FRAME_INDEX]](p0) :: (store (<8 x s16>) into %stack.0) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 7 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY2]], [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[AND]], [[C1]] + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[FRAME_INDEX]], [[MUL]](s64) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD]](p0) :: (load (s16)) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s16) + ; CHECK-NEXT: $x0 = COPY [[ANYEXT]](s64) + ; CHECK-NEXT: RET_ReallyLR + %0:_(<8 x s16>) = COPY $q0 + %1:_(<8 x s16>) = COPY $q1 + %2:_(s64) = COPY $x0 + %3:_(<8 x s1>) = G_ICMP intpred(eq), %0(<8 x s16>), %1 + %4:_(s1) = G_EXTRACT_VECTOR_ELT %3:_(<8 x s1>), %2:_(s64) + %5:_(s64) = G_ANYEXT %4(s1) + $x0 = COPY %5(s64) + RET_ReallyLR +... +--- +name: test_eve_v16s1_unknown_idx +body: | + bb.0: + liveins: $q0, $q1, $x0 + ; CHECK-LABEL: name: test_eve_v16s1_unknown_idx + ; CHECK: [[COPY:%[0-9]+]]:_(<16 x s8>) = COPY $q0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<16 x s8>) = COPY $q1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $x0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(<16 x s8>) = G_ICMP intpred(eq), [[COPY]](<16 x s8>), [[COPY1]] + ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0 + ; CHECK-NEXT: G_STORE [[ICMP]](<16 x s8>), [[FRAME_INDEX]](p0) :: (store (<16 x s8>) into %stack.0) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 15 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY2]], [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[AND]], [[C1]] + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[FRAME_INDEX]], [[MUL]](s64) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD]](p0) :: (load (s8)) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s8) + ; CHECK-NEXT: $x0 = COPY [[ANYEXT]](s64) + ; CHECK-NEXT: RET_ReallyLR + %0:_(<16 x s8>) = COPY $q0 + %1:_(<16 x s8>) = COPY $q1 + %2:_(s64) = COPY $x0 + %3:_(<16 x s1>) = G_ICMP intpred(eq), %0(<16 x s8>), %1 + %4:_(s1) = G_EXTRACT_VECTOR_ELT %3:_(<16 x s1>), %2:_(s64) + %5:_(s64) = G_ANYEXT %4(s1) + $x0 = COPY %5(s64) + RET_ReallyLR +... +--- +name: test_eve_v2p0_unknown_idx +body: | + bb.0: + liveins: $q0, $q1, $x0 + ; CHECK-LABEL: name: test_eve_v2p0_unknown_idx + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x p0>) = COPY $q0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x0 + ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s64>) = G_BITCAST [[COPY]](<2 x p0>) + ; CHECK-NEXT: G_STORE [[BITCAST]](<2 x s64>), [[FRAME_INDEX]](p0) :: (store (<2 x s64>) into %stack.0) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[AND]], [[C1]] + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[FRAME_INDEX]], [[MUL]](s64) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[PTR_ADD]](p0) :: (load (p0)) + ; CHECK-NEXT: $x0 = COPY [[LOAD]](p0) + ; CHECK-NEXT: RET_ReallyLR + %0:_(<2 x p0>) = COPY $q0 + %2:_(s64) = COPY $x0 + %3:_(p0) = G_EXTRACT_VECTOR_ELT %0:_(<2 x p0>), %2:_(s64) + $x0 = COPY %3(p0) + RET_ReallyLR +... +--- name: test_eve_v4s64_unknown_idx body: | bb.0: @@ -237,6 +381,103 @@ body: | RET_ReallyLR ... --- +name: test_eve_v8s32_unknown_idx +body: | + bb.0: + liveins: $q0, $q1, $w0 + ; CHECK-LABEL: name: test_eve_v8s32_unknown_idx + ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1 + ; CHECK-NEXT: %idx:_(s32) = COPY $w0 + ; CHECK-NEXT: %idxprom:_(s64) = G_SEXT %idx(s32) + ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0 + ; CHECK-NEXT: G_STORE [[COPY]](<4 x s32>), [[FRAME_INDEX]](p0) :: (store (<4 x s32>) into %stack.0, align 32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[FRAME_INDEX]], [[C]](s64) + ; CHECK-NEXT: G_STORE [[COPY1]](<4 x s32>), [[PTR_ADD]](p0) :: (store (<4 x s32>) into %stack.0 + 16, basealign 32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 7 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND %idxprom, [[C1]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[AND]], [[C2]] + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[FRAME_INDEX]], [[MUL]](s64) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s32)) + ; CHECK-NEXT: $w0 = COPY [[LOAD]](s32) + ; CHECK-NEXT: RET_ReallyLR + %0:_(<4 x s32>) = COPY $q0 + %1:_(<4 x s32>) = COPY $q1 + %concat:_(<8 x s32>) = G_CONCAT_VECTORS %0(<4 x s32>), %1(<4 x s32>) + %idx:_(s32) = COPY $w0 + %idxprom:_(s64) = G_SEXT %idx(s32) + %3:_(s32) = G_EXTRACT_VECTOR_ELT %concat:_(<8 x s32>), %idxprom:_(s64) + $w0 = COPY %3(s32) + RET_ReallyLR +... +--- +name: test_eve_v16s16_unknown_idx +body: | + bb.0: + liveins: $q0, $q1, $w0 + ; CHECK-LABEL: name: test_eve_v16s16_unknown_idx + ; CHECK: [[COPY:%[0-9]+]]:_(<8 x s16>) = COPY $q0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s16>) = COPY $q1 + ; CHECK-NEXT: %idx:_(s32) = COPY $w0 + ; CHECK-NEXT: %idxprom:_(s64) = G_SEXT %idx(s32) + ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0 + ; CHECK-NEXT: G_STORE [[COPY]](<8 x s16>), [[FRAME_INDEX]](p0) :: (store (<8 x s16>) into %stack.0, align 32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[FRAME_INDEX]], [[C]](s64) + ; CHECK-NEXT: G_STORE [[COPY1]](<8 x s16>), [[PTR_ADD]](p0) :: (store (<8 x s16>) into %stack.0 + 16, basealign 32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 15 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND %idxprom, [[C1]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[AND]], [[C2]] + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[FRAME_INDEX]], [[MUL]](s64) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD1]](p0) :: (load (s16)) + ; CHECK-NEXT: %ext:_(s32) = G_ANYEXT [[LOAD]](s16) + ; CHECK-NEXT: $w0 = COPY %ext(s32) + ; CHECK-NEXT: RET_ReallyLR + %0:_(<8 x s16>) = COPY $q0 + %1:_(<8 x s16>) = COPY $q1 + %concat:_(<16 x s16>) = G_CONCAT_VECTORS %0(<8 x s16>), %1(<8 x s16>) + %idx:_(s32) = COPY $w0 + %idxprom:_(s64) = G_SEXT %idx(s32) + %3:_(s16) = G_EXTRACT_VECTOR_ELT %concat:_(<16 x s16>), %idxprom:_(s64) + %ext:_(s32) = G_ANYEXT %3 + $w0 = COPY %ext(s32) + RET_ReallyLR +... +--- +name: test_eve_v4p0_unknown_idx +body: | + bb.0: + liveins: $x0 + ; CHECK-LABEL: name: test_eve_v4p0_unknown_idx + ; CHECK: [[DEF:%[0-9]+]]:_(p0) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p0>) = G_BUILD_VECTOR [[DEF]](p0), [[DEF]](p0) + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x p0>) = G_BUILD_VECTOR [[DEF]](p0), [[DEF]](p0) + ; CHECK-NEXT: %idx:_(s64) = COPY $x0 + ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s64>) = G_BITCAST [[BUILD_VECTOR]](<2 x p0>) + ; CHECK-NEXT: G_STORE [[BITCAST]](<2 x s64>), [[FRAME_INDEX]](p0) :: (store (<2 x s64>) into %stack.0, align 32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[FRAME_INDEX]], [[C]](s64) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s64>) = G_BITCAST [[BUILD_VECTOR1]](<2 x p0>) + ; CHECK-NEXT: G_STORE [[BITCAST1]](<2 x s64>), [[PTR_ADD]](p0) :: (store (<2 x s64>) into %stack.0 + 16, basealign 32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND %idx, [[C1]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[AND]], [[C2]] + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[FRAME_INDEX]], [[MUL]](s64) + ; CHECK-NEXT: %eve:_(p0) = G_LOAD [[PTR_ADD1]](p0) :: (load (p0)) + ; CHECK-NEXT: $x0 = COPY %eve(p0) + ; CHECK-NEXT: RET_ReallyLR + %vec:_(<4 x p0>) = G_IMPLICIT_DEF + %idx:_(s64) = COPY $x0 + %eve:_(p0) = G_EXTRACT_VECTOR_ELT %vec:_(<4 x p0>), %idx:_(s64) + $x0 = COPY %eve(p0) + RET_ReallyLR +... +--- name: cant_legalize_different_address_spaces body: | bb.0: diff --git a/llvm/test/CodeGen/AArch64/arm64-extract-insert-varidx.ll b/llvm/test/CodeGen/AArch64/arm64-extract-insert-varidx.ll new file mode 100644 index 00000000000000..49ddc6f6147805 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/arm64-extract-insert-varidx.ll @@ -0,0 +1,365 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 +; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s --check-prefix=CHECK-SDAG +; RUN: llc < %s -global-isel -global-isel-abort=2 -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s --check-prefix=CHECK-GISEL + +define <4 x i8> @test_varidx_extract_v8s8(<8 x i8> %x, i32 %idx) { +; CHECK-SDAG-LABEL: test_varidx_extract_v8s8: +; CHECK-SDAG: // %bb.0: +; CHECK-SDAG-NEXT: sub sp, sp, #16 +; CHECK-SDAG-NEXT: .cfi_def_cfa_offset 16 +; CHECK-SDAG-NEXT: add x8, sp, #8 +; CHECK-SDAG-NEXT: // kill: def $w0 killed $w0 def $x0 +; CHECK-SDAG-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-SDAG-NEXT: str d0, [sp, #8] +; CHECK-SDAG-NEXT: umov w9, v0.b[1] +; CHECK-SDAG-NEXT: bfxil x8, x0, #0, #3 +; CHECK-SDAG-NEXT: ld1 { v1.b }[0], [x8] +; CHECK-SDAG-NEXT: umov w8, v0.b[2] +; CHECK-SDAG-NEXT: mov v1.h[1], w9 +; CHECK-SDAG-NEXT: umov w9, v0.b[3] +; CHECK-SDAG-NEXT: mov v1.h[2], w8 +; CHECK-SDAG-NEXT: mov v1.h[3], w9 +; CHECK-SDAG-NEXT: fmov d0, d1 +; CHECK-SDAG-NEXT: add sp, sp, #16 +; CHECK-SDAG-NEXT: ret +; +; CHECK-GISEL-LABEL: test_varidx_extract_v8s8: +; CHECK-GISEL: // %bb.0: +; CHECK-GISEL-NEXT: sub sp, sp, #16 +; CHECK-GISEL-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GISEL-NEXT: mov w9, w0 +; CHECK-GISEL-NEXT: add x8, sp, #8 +; CHECK-GISEL-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-GISEL-NEXT: str d0, [sp, #8] +; CHECK-GISEL-NEXT: and x9, x9, #0x7 +; CHECK-GISEL-NEXT: mov b2, v0.b[1] +; CHECK-GISEL-NEXT: mov b3, v0.b[2] +; CHECK-GISEL-NEXT: lsl x10, x9, #1 +; CHECK-GISEL-NEXT: mov b0, v0.b[3] +; CHECK-GISEL-NEXT: sub x9, x10, x9 +; CHECK-GISEL-NEXT: add x8, x8, x9 +; CHECK-GISEL-NEXT: ldrb w8, [x8] +; CHECK-GISEL-NEXT: fmov s1, w8 +; CHECK-GISEL-NEXT: mov v1.h[1], v2.h[0] +; CHECK-GISEL-NEXT: mov v1.h[2], v3.h[0] +; CHECK-GISEL-NEXT: mov v1.h[3], v0.h[0] +; CHECK-GISEL-NEXT: fmov d0, d1 +; CHECK-GISEL-NEXT: add sp, sp, #16 +; CHECK-GISEL-NEXT: ret + %tmp = extractelement <8 x i8> %x, i32 %idx + %tmp2 = insertelement <4 x i8> undef, i8 %tmp, i32 0 + %tmp3 = extractelement <8 x i8> %x, i32 1 + %tmp4 = insertelement <4 x i8> %tmp2, i8 %tmp3, i32 1 + %tmp5 = extractelement <8 x i8> %x, i32 2 + %tmp6 = insertelement <4 x i8> %tmp4, i8 %tmp5, i32 2 + %tmp7 = extractelement <8 x i8> %x, i32 3 + %tmp8 = insertelement <4 x i8> %tmp6, i8 %tmp7, i32 3 + ret <4 x i8> %tmp8 +} + +define <8 x i8> @test_varidx_extract_v16s8(<16 x i8> %x, i32 %idx) { +; CHECK-SDAG-LABEL: test_varidx_extract_v16s8: +; CHECK-SDAG: // %bb.0: +; CHECK-SDAG-NEXT: sub sp, sp, #16 +; CHECK-SDAG-NEXT: .cfi_def_cfa_offset 16 +; CHECK-SDAG-NEXT: mov x8, sp +; CHECK-SDAG-NEXT: // kill: def $w0 killed $w0 def $x0 +; CHECK-SDAG-NEXT: str q0, [sp] +; CHECK-SDAG-NEXT: bfxil x8, x0, #0, #4 +; CHECK-SDAG-NEXT: ldr b1, [x8] +; CHECK-SDAG-NEXT: mov v1.b[1], v0.b[1] +; CHECK-SDAG-NEXT: mov v1.b[2], v0.b[2] +; CHECK-SDAG-NEXT: mov v1.b[3], v0.b[3] +; CHECK-SDAG-NEXT: mov v1.b[4], v0.b[4] +; CHECK-SDAG-NEXT: mov v1.b[5], v0.b[5] +; CHECK-SDAG-NEXT: mov v1.b[6], v0.b[6] +; CHECK-SDAG-NEXT: mov v1.b[7], v0.b[7] +; CHECK-SDAG-NEXT: fmov d0, d1 +; CHECK-SDAG-NEXT: add sp, sp, #16 +; CHECK-SDAG-NEXT: ret +; +; CHECK-GISEL-LABEL: test_varidx_extract_v16s8: +; CHECK-GISEL: // %bb.0: +; CHECK-GISEL-NEXT: sub sp, sp, #16 +; CHECK-GISEL-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GISEL-NEXT: mov w9, w0 +; CHECK-GISEL-NEXT: mov b2, v0.b[1] +; CHECK-GISEL-NEXT: mov x8, sp +; CHECK-GISEL-NEXT: and x9, x9, #0xf +; CHECK-GISEL-NEXT: str q0, [sp] +; CHECK-GISEL-NEXT: mov b3, v0.b[2] +; CHECK-GISEL-NEXT: lsl x10, x9, #1 +; CHECK-GISEL-NEXT: sub x9, x10, x9 +; CHECK-GISEL-NEXT: ldr b1, [x8, x9] +; CHECK-GISEL-NEXT: mov v1.b[1], v2.b[0] +; CHECK-GISEL-NEXT: mov b2, v0.b[3] +; CHECK-GISEL-NEXT: mov v1.b[2], v3.b[0] +; CHECK-GISEL-NEXT: mov b3, v0.b[4] +; CHECK-GISEL-NEXT: mov v1.b[3], v2.b[0] +; CHECK-GISEL-NEXT: mov b2, v0.b[5] +; CHECK-GISEL-NEXT: mov v1.b[4], v3.b[0] +; CHECK-GISEL-NEXT: mov b3, v0.b[6] +; CHECK-GISEL-NEXT: mov b0, v0.b[7] +; CHECK-GISEL-NEXT: mov v1.b[5], v2.b[0] +; CHECK-GISEL-NEXT: mov v1.b[6], v3.b[0] +; CHECK-GISEL-NEXT: mov v1.b[7], v0.b[0] +; CHECK-GISEL-NEXT: fmov d0, d1 +; CHECK-GISEL-NEXT: add sp, sp, #16 +; CHECK-GISEL-NEXT: ret + %tmp = extractelement <16 x i8> %x, i32 %idx + %tmp2 = insertelement <8 x i8> undef, i8 %tmp, i32 0 + %tmp3 = extractelement <16 x i8> %x, i32 1 + %tmp4 = insertelement <8 x i8> %tmp2, i8 %tmp3, i32 1 + %tmp5 = extractelement <16 x i8> %x, i32 2 + %tmp6 = insertelement <8 x i8> %tmp4, i8 %tmp5, i32 2 + %tmp7 = extractelement <16 x i8> %x, i32 3 + %tmp8 = insertelement <8 x i8> %tmp6, i8 %tmp7, i32 3 + %tmp9 = extractelement <16 x i8> %x, i32 4 + %tmp10 = insertelement <8 x i8> %tmp8, i8 %tmp9, i32 4 + %tmp11 = extractelement <16 x i8> %x, i32 5 + %tmp12 = insertelement <8 x i8> %tmp10, i8 %tmp11, i32 5 + %tmp13 = extractelement <16 x i8> %x, i32 6 + %tmp14 = insertelement <8 x i8> %tmp12, i8 %tmp13, i32 6 + %tmp15 = extractelement <16 x i8> %x, i32 7 + %tmp16 = insertelement <8 x i8> %tmp14, i8 %tmp15, i32 7 + ret <8 x i8> %tmp16 +} + +define i16 @test_varidx_extract_v2s16(<2 x i16> %x, i32 %idx) { +; CHECK-SDAG-LABEL: test_varidx_extract_v2s16: +; CHECK-SDAG: // %bb.0: +; CHECK-SDAG-NEXT: sub sp, sp, #16 +; CHECK-SDAG-NEXT: .cfi_def_cfa_offset 16 +; CHECK-SDAG-NEXT: add x8, sp, #8 +; CHECK-SDAG-NEXT: // kill: def $w0 killed $w0 def $x0 +; CHECK-SDAG-NEXT: str d0, [sp, #8] +; CHECK-SDAG-NEXT: bfi x8, x0, #2, #1 +; CHECK-SDAG-NEXT: ldr w0, [x8] +; CHECK-SDAG-NEXT: add sp, sp, #16 +; CHECK-SDAG-NEXT: ret +; +; CHECK-GISEL-LABEL: test_varidx_extract_v2s16: +; CHECK-GISEL: // %bb.0: +; CHECK-GISEL-NEXT: sub sp, sp, #16 +; CHECK-GISEL-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GISEL-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-GISEL-NEXT: mov s1, v0.s[1] +; CHECK-GISEL-NEXT: mov w9, w0 +; CHECK-GISEL-NEXT: add x8, sp, #12 +; CHECK-GISEL-NEXT: str h0, [sp, #12] +; CHECK-GISEL-NEXT: and x9, x9, #0x1 +; CHECK-GISEL-NEXT: str h1, [sp, #14] +; CHECK-GISEL-NEXT: ldrh w0, [x8, x9, lsl #1] +; CHECK-GISEL-NEXT: add sp, sp, #16 +; CHECK-GISEL-NEXT: ret + %tmp = extractelement <2 x i16> %x, i32 %idx + ret i16 %tmp +} + +define <2 x i16> @test_varidx_extract_v4s16(<4 x i16> %x, i32 %idx) { +; CHECK-SDAG-LABEL: test_varidx_extract_v4s16: +; CHECK-SDAG: // %bb.0: +; CHECK-SDAG-NEXT: sub sp, sp, #16 +; CHECK-SDAG-NEXT: .cfi_def_cfa_offset 16 +; CHECK-SDAG-NEXT: add x8, sp, #8 +; CHECK-SDAG-NEXT: // kill: def $w0 killed $w0 def $x0 +; CHECK-SDAG-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-SDAG-NEXT: str d0, [sp, #8] +; CHECK-SDAG-NEXT: umov w9, v0.h[1] +; CHECK-SDAG-NEXT: bfi x8, x0, #1, #2 +; CHECK-SDAG-NEXT: ld1 { v0.h }[0], [x8] +; CHECK-SDAG-NEXT: mov v0.s[1], w9 +; CHECK-SDAG-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-SDAG-NEXT: add sp, sp, #16 +; CHECK-SDAG-NEXT: ret +; +; CHECK-GISEL-LABEL: test_varidx_extract_v4s16: +; CHECK-GISEL: // %bb.0: +; CHECK-GISEL-NEXT: sub sp, sp, #16 +; CHECK-GISEL-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GISEL-NEXT: mov w9, w0 +; CHECK-GISEL-NEXT: mov w8, #2 // =0x2 +; CHECK-GISEL-NEXT: add x10, sp, #8 +; CHECK-GISEL-NEXT: and x9, x9, #0x3 +; CHECK-GISEL-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-GISEL-NEXT: str d0, [sp, #8] +; CHECK-GISEL-NEXT: madd x8, x9, x8, x10 +; CHECK-GISEL-NEXT: umov w9, v0.h[1] +; CHECK-GISEL-NEXT: fmov s1, w9 +; CHECK-GISEL-NEXT: ldr h0, [x8] +; CHECK-GISEL-NEXT: mov v0.s[1], v1.s[0] +; CHECK-GISEL-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-GISEL-NEXT: add sp, sp, #16 +; CHECK-GISEL-NEXT: ret + %tmp = extractelement <4 x i16> %x, i32 %idx + %tmp2 = insertelement <2 x i16> undef, i16 %tmp, i32 0 + %tmp3 = extractelement <4 x i16> %x, i32 1 + %tmp4 = insertelement <2 x i16> %tmp2, i16 %tmp3, i32 1 + ret <2 x i16> %tmp4 +} + +define <4 x i16> @test_varidx_extract_v8s16(<8 x i16> %x, i32 %idx) { +; CHECK-SDAG-LABEL: test_varidx_extract_v8s16: +; CHECK-SDAG: // %bb.0: +; CHECK-SDAG-NEXT: sub sp, sp, #16 +; CHECK-SDAG-NEXT: .cfi_def_cfa_offset 16 +; CHECK-SDAG-NEXT: mov x8, sp +; CHECK-SDAG-NEXT: // kill: def $w0 killed $w0 def $x0 +; CHECK-SDAG-NEXT: str q0, [sp] +; CHECK-SDAG-NEXT: bfi x8, x0, #1, #3 +; CHECK-SDAG-NEXT: ldr h1, [x8] +; CHECK-SDAG-NEXT: mov v1.h[1], v0.h[1] +; CHECK-SDAG-NEXT: mov v1.h[2], v0.h[2] +; CHECK-SDAG-NEXT: mov v1.h[3], v0.h[3] +; CHECK-SDAG-NEXT: fmov d0, d1 +; CHECK-SDAG-NEXT: add sp, sp, #16 +; CHECK-SDAG-NEXT: ret +; +; CHECK-GISEL-LABEL: test_varidx_extract_v8s16: +; CHECK-GISEL: // %bb.0: +; CHECK-GISEL-NEXT: sub sp, sp, #16 +; CHECK-GISEL-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GISEL-NEXT: mov w9, w0 +; CHECK-GISEL-NEXT: mov h2, v0.h[1] +; CHECK-GISEL-NEXT: mov x8, sp +; CHECK-GISEL-NEXT: str q0, [sp] +; CHECK-GISEL-NEXT: and x9, x9, #0x7 +; CHECK-GISEL-NEXT: mov h3, v0.h[2] +; CHECK-GISEL-NEXT: ldr h1, [x8, x9, lsl #1] +; CHECK-GISEL-NEXT: mov h0, v0.h[3] +; CHECK-GISEL-NEXT: mov v1.h[1], v2.h[0] +; CHECK-GISEL-NEXT: mov v1.h[2], v3.h[0] +; CHECK-GISEL-NEXT: mov v1.h[3], v0.h[0] +; CHECK-GISEL-NEXT: fmov d0, d1 +; CHECK-GISEL-NEXT: add sp, sp, #16 +; CHECK-GISEL-NEXT: ret + %tmp = extractelement <8 x i16> %x, i32 %idx + %tmp2 = insertelement <4 x i16> undef, i16 %tmp, i32 0 + %tmp3 = extractelement <8 x i16> %x, i32 1 + %tmp4 = insertelement <4 x i16> %tmp2, i16 %tmp3, i32 1 + %tmp5 = extractelement <8 x i16> %x, i32 2 + %tmp6 = insertelement <4 x i16> %tmp4, i16 %tmp5, i32 2 + %tmp7 = extractelement <8 x i16> %x, i32 3 + %tmp8 = insertelement <4 x i16> %tmp6, i16 %tmp7, i32 3 + ret <4 x i16> %tmp8 +} + +define i32 @test_varidx_extract_v2s32(<2 x i32> %x, i32 %idx) { +; CHECK-SDAG-LABEL: test_varidx_extract_v2s32: +; CHECK-SDAG: // %bb.0: +; CHECK-SDAG-NEXT: sub sp, sp, #16 +; CHECK-SDAG-NEXT: .cfi_def_cfa_offset 16 +; CHECK-SDAG-NEXT: add x8, sp, #8 +; CHECK-SDAG-NEXT: // kill: def $w0 killed $w0 def $x0 +; CHECK-SDAG-NEXT: str d0, [sp, #8] +; CHECK-SDAG-NEXT: bfi x8, x0, #2, #1 +; CHECK-SDAG-NEXT: ldr w0, [x8] +; CHECK-SDAG-NEXT: add sp, sp, #16 +; CHECK-SDAG-NEXT: ret +; +; CHECK-GISEL-LABEL: test_varidx_extract_v2s32: +; CHECK-GISEL: // %bb.0: +; CHECK-GISEL-NEXT: sub sp, sp, #16 +; CHECK-GISEL-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GISEL-NEXT: mov w9, w0 +; CHECK-GISEL-NEXT: add x8, sp, #8 +; CHECK-GISEL-NEXT: str d0, [sp, #8] +; CHECK-GISEL-NEXT: and x9, x9, #0x1 +; CHECK-GISEL-NEXT: ldr w0, [x8, x9, lsl #2] +; CHECK-GISEL-NEXT: add sp, sp, #16 +; CHECK-GISEL-NEXT: ret + %tmp = extractelement <2 x i32> %x, i32 %idx + ret i32 %tmp +} + +define <2 x i32> @test_varidx_extract_v4s32(<4 x i32> %x, i32 %idx) { +; CHECK-SDAG-LABEL: test_varidx_extract_v4s32: +; CHECK-SDAG: // %bb.0: +; CHECK-SDAG-NEXT: sub sp, sp, #16 +; CHECK-SDAG-NEXT: .cfi_def_cfa_offset 16 +; CHECK-SDAG-NEXT: mov x8, sp +; CHECK-SDAG-NEXT: // kill: def $w0 killed $w0 def $x0 +; CHECK-SDAG-NEXT: str q0, [sp] +; CHECK-SDAG-NEXT: bfi x8, x0, #2, #2 +; CHECK-SDAG-NEXT: ldr s1, [x8] +; CHECK-SDAG-NEXT: mov v1.s[1], v0.s[1] +; CHECK-SDAG-NEXT: fmov d0, d1 +; CHECK-SDAG-NEXT: add sp, sp, #16 +; CHECK-SDAG-NEXT: ret +; +; CHECK-GISEL-LABEL: test_varidx_extract_v4s32: +; CHECK-GISEL: // %bb.0: +; CHECK-GISEL-NEXT: sub sp, sp, #16 +; CHECK-GISEL-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GISEL-NEXT: mov w9, w0 +; CHECK-GISEL-NEXT: mov s1, v0.s[1] +; CHECK-GISEL-NEXT: mov x8, sp +; CHECK-GISEL-NEXT: str q0, [sp] +; CHECK-GISEL-NEXT: and x9, x9, #0x3 +; CHECK-GISEL-NEXT: ldr s0, [x8, x9, lsl #2] +; CHECK-GISEL-NEXT: mov v0.s[1], v1.s[0] +; CHECK-GISEL-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-GISEL-NEXT: add sp, sp, #16 +; CHECK-GISEL-NEXT: ret + %tmp = extractelement <4 x i32> %x, i32 %idx + %tmp2 = insertelement <2 x i32> undef, i32 %tmp, i32 0 + %tmp3 = extractelement <4 x i32> %x, i32 1 + %tmp4 = insertelement <2 x i32> %tmp2, i32 %tmp3, i32 1 + ret <2 x i32> %tmp4 +} + +define i64 @test_varidx_extract_v2s64(<2 x i64> %x, i32 %idx) { +; CHECK-SDAG-LABEL: test_varidx_extract_v2s64: +; CHECK-SDAG: // %bb.0: +; CHECK-SDAG-NEXT: sub sp, sp, #16 +; CHECK-SDAG-NEXT: .cfi_def_cfa_offset 16 +; CHECK-SDAG-NEXT: mov x8, sp +; CHECK-SDAG-NEXT: // kill: def $w0 killed $w0 def $x0 +; CHECK-SDAG-NEXT: str q0, [sp] +; CHECK-SDAG-NEXT: bfi x8, x0, #3, #1 +; CHECK-SDAG-NEXT: ldr x0, [x8] +; CHECK-SDAG-NEXT: add sp, sp, #16 +; CHECK-SDAG-NEXT: ret +; +; CHECK-GISEL-LABEL: test_varidx_extract_v2s64: +; CHECK-GISEL: // %bb.0: +; CHECK-GISEL-NEXT: sub sp, sp, #16 +; CHECK-GISEL-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GISEL-NEXT: mov w9, w0 +; CHECK-GISEL-NEXT: mov x8, sp +; CHECK-GISEL-NEXT: str q0, [sp] +; CHECK-GISEL-NEXT: and x9, x9, #0x1 +; CHECK-GISEL-NEXT: ldr x0, [x8, x9, lsl #3] +; CHECK-GISEL-NEXT: add sp, sp, #16 +; CHECK-GISEL-NEXT: ret + %tmp = extractelement <2 x i64> %x, i32 %idx + ret i64 %tmp +} + +define ptr @test_varidx_extract_v2p0(<2 x ptr> %x, i32 %idx) { +; CHECK-SDAG-LABEL: test_varidx_extract_v2p0: +; CHECK-SDAG: // %bb.0: +; CHECK-SDAG-NEXT: sub sp, sp, #16 +; CHECK-SDAG-NEXT: .cfi_def_cfa_offset 16 +; CHECK-SDAG-NEXT: mov x8, sp +; CHECK-SDAG-NEXT: // kill: def $w0 killed $w0 def $x0 +; CHECK-SDAG-NEXT: str q0, [sp] +; CHECK-SDAG-NEXT: bfi x8, x0, #3, #1 +; CHECK-SDAG-NEXT: ldr x0, [x8] +; CHECK-SDAG-NEXT: add sp, sp, #16 +; CHECK-SDAG-NEXT: ret +; +; CHECK-GISEL-LABEL: test_varidx_extract_v2p0: +; CHECK-GISEL: // %bb.0: +; CHECK-GISEL-NEXT: sub sp, sp, #16 +; CHECK-GISEL-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GISEL-NEXT: mov w9, w0 +; CHECK-GISEL-NEXT: mov x8, sp +; CHECK-GISEL-NEXT: str q0, [sp] +; CHECK-GISEL-NEXT: and x9, x9, #0x1 +; CHECK-GISEL-NEXT: ldr x0, [x8, x9, lsl #3] +; CHECK-GISEL-NEXT: add sp, sp, #16 +; CHECK-GISEL-NEXT: ret + %tmp = extractelement <2 x ptr> %x, i32 %idx + ret ptr %tmp +}