From c2e7c9cb33acbd118fe5011a1607d6cf8e21de34 Mon Sep 17 00:00:00 2001 From: Peter Rong Date: Tue, 30 Aug 2022 14:55:59 -0700 Subject: [PATCH] [CodeGen] Using ZExt for extractelement indices. In https://github.com/llvm/llvm-project/issues/57452, we found that IRTranslator is translating `i1 true` into `i32 -1`. This is because IRTranslator uses SExt for indices. In this fix, we change the expected behavior of extractelement's index, moving from SExt to ZExt. This change includes both documentation, SelectionDAG and IRTranslator. We also included a test for AMDGPU, updated tests for AArch64, Mips, PowerPC, RISCV, VE, WebAssembly and X86 This patch fixes issue #57452. Differential Revision: https://reviews.llvm.org/D132978 --- llvm/docs/LangRef.rst | 5 +- llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp | 4 +- .../SelectionDAG/SelectionDAGBuilder.cpp | 4 +- .../AArch64/GlobalISel/arm64-irtranslator.ll | 23 +- .../AArch64/arm64-indexed-vector-ldst.ll | 6 +- .../CodeGen/AArch64/sve-extract-element.ll | 33 +-- .../CodeGen/AArch64/sve-insert-element.ll | 21 +- .../CodeGen/AArch64/sve-split-extract-elt.ll | 15 +- .../GlobalISel/irtranslator-zext-vec-index.ll | 28 +++ .../test/CodeGen/Mips/msa/basic_operations.ll | 24 +-- .../Mips/msa/basic_operations_float.ll | 12 +- .../PowerPC/aix-p9-xxinsertw-xxextractuw.ll | 1 - .../CodeGen/PowerPC/aix-vec_extract_p9.ll | 6 + .../CodeGen/PowerPC/aix-vec_extract_p9_2.ll | 6 + .../CodeGen/PowerPC/aix-vec_insert_elt.ll | 78 +++---- .../PowerPC/p8-scalar_vector_conversions.ll | 146 ++++++++----- .../PowerPC/variable_elem_vec_extracts.ll | 14 +- llvm/test/CodeGen/PowerPC/vec_extract_p9.ll | 36 ++-- llvm/test/CodeGen/PowerPC/vec_extract_p9_2.ll | 36 ++-- llvm/test/CodeGen/PowerPC/vec_insert_elt.ll | 202 +++++++----------- llvm/test/CodeGen/RISCV/rvv/extractelt-fp.ll | 54 ++--- .../CodeGen/RISCV/rvv/extractelt-int-rv64.ll | 68 +++--- .../RISCV/rvv/fixed-vectors-extract.ll | 32 +-- .../RISCV/rvv/fixed-vectors-insert-i1.ll | 170 +++++---------- .../CodeGen/RISCV/rvv/fixed-vectors-insert.ll | 12 +- llvm/test/CodeGen/RISCV/rvv/insertelt-fp.ll | 30 +-- .../CodeGen/RISCV/rvv/insertelt-int-rv64.ll | 48 +++-- llvm/test/CodeGen/VE/Vector/extract_elt.ll | 14 +- llvm/test/CodeGen/VE/Vector/insert_elt.ll | 12 +- .../CodeGen/WebAssembly/simd-build-vector.ll | 143 +++++++++++-- llvm/test/CodeGen/X86/extract-insert.ll | 1 + .../CodeGen/X86/insertelement-var-index.ll | 102 ++++++--- llvm/test/CodeGen/X86/var-permute-128.ll | 2 +- llvm/test/CodeGen/X86/var-permute-512.ll | 24 +-- llvm/test/CodeGen/X86/vec_extract.ll | 4 + 35 files changed, 786 insertions(+), 630 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-zext-vec-index.ll diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index 8092a8eb2d9eee..f66d913e44bf5b 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -9699,7 +9699,7 @@ Arguments: The first operand of an '``extractelement``' instruction is a value of :ref:`vector ` type. The second operand is an index indicating the position from which to extract the element. The index may be a -variable of any integer type. +variable of any integer type, and will be treated as an unsigned integer. Semantics: """""""""" @@ -9744,7 +9744,8 @@ The first operand of an '``insertelement``' instruction is a value of :ref:`vector ` type. The second operand is a scalar value whose type must equal the element type of the first operand. The third operand is an index indicating the position at which to insert the value. The -index may be a variable of any integer type. +index may be a variable of any integer type, and will be treated as an +unsigned integer. Semantics: """""""""" diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp index c6871724da8d21..69fb5bce632e84 100644 --- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -2813,7 +2813,7 @@ bool IRTranslator::translateExtractElement(const User &U, Register Idx; if (auto *CI = dyn_cast(U.getOperand(1))) { if (CI->getBitWidth() != PreferredVecIdxWidth) { - APInt NewIdx = CI->getValue().sextOrTrunc(PreferredVecIdxWidth); + APInt NewIdx = CI->getValue().zextOrTrunc(PreferredVecIdxWidth); auto *NewIdxCI = ConstantInt::get(CI->getContext(), NewIdx); Idx = getOrCreateVReg(*NewIdxCI); } @@ -2822,7 +2822,7 @@ bool IRTranslator::translateExtractElement(const User &U, Idx = getOrCreateVReg(*U.getOperand(1)); if (MRI->getType(Idx).getSizeInBits() != PreferredVecIdxWidth) { const LLT VecIdxTy = LLT::scalar(PreferredVecIdxWidth); - Idx = MIRBuilder.buildSExtOrTrunc(VecIdxTy, Idx).getReg(0); + Idx = MIRBuilder.buildZExtOrTrunc(VecIdxTy, Idx).getReg(0); } MIRBuilder.buildExtractVectorElement(Res, Val, Idx); return true; diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index cdac20af5bd8ad..02403d6bf29da4 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -3582,7 +3582,7 @@ void SelectionDAGBuilder::visitInsertElement(const User &I) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SDValue InVec = getValue(I.getOperand(0)); SDValue InVal = getValue(I.getOperand(1)); - SDValue InIdx = DAG.getSExtOrTrunc(getValue(I.getOperand(2)), getCurSDLoc(), + SDValue InIdx = DAG.getZExtOrTrunc(getValue(I.getOperand(2)), getCurSDLoc(), TLI.getVectorIdxTy(DAG.getDataLayout())); setValue(&I, DAG.getNode(ISD::INSERT_VECTOR_ELT, getCurSDLoc(), TLI.getValueType(DAG.getDataLayout(), I.getType()), @@ -3592,7 +3592,7 @@ void SelectionDAGBuilder::visitInsertElement(const User &I) { void SelectionDAGBuilder::visitExtractElement(const User &I) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SDValue InVec = getValue(I.getOperand(0)); - SDValue InIdx = DAG.getSExtOrTrunc(getValue(I.getOperand(1)), getCurSDLoc(), + SDValue InIdx = DAG.getZExtOrTrunc(getValue(I.getOperand(1)), getCurSDLoc(), TLI.getVectorIdxTy(DAG.getDataLayout())); setValue(&I, DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurSDLoc(), TLI.getValueType(DAG.getDataLayout(), I.getType()), diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll index 462ff6b12569be..24ae9b7de2e28f 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll @@ -1549,7 +1549,7 @@ define i32 @test_extractelement(<2 x i32> %vec, i32 %idx) { ; CHECK-LABEL: name: test_extractelement ; CHECK: [[VEC:%[0-9]+]]:_(<2 x s32>) = COPY $d0 ; CHECK: [[IDX:%[0-9]+]]:_(s32) = COPY $w0 -; CHECK: [[IDXEXT:%[0-9]+]]:_(s64) = G_SEXT [[IDX]] +; CHECK: [[IDXEXT:%[0-9]+]]:_(s64) = G_ZEXT [[IDX]] ; CHECK: [[RES:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[VEC]](<2 x s32>), [[IDXEXT]](s64) ; CHECK: $w0 = COPY [[RES]](s32) %res = extractelement <2 x i32> %vec, i32 %idx @@ -1566,6 +1566,27 @@ define i32 @test_extractelement_const_idx(<2 x i32> %vec) { ret i32 %res } +define i32 @test_extractelement_const_idx_zext_i1(<2 x i32> %vec) { +; CHECK-LABEL: name: test_extractelement +; CHECK: [[VEC:%[0-9]+]]:_(<2 x s32>) = COPY $d0 +; CHECK: [[IDX:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 +; CHECK: [[RES:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[VEC]](<2 x s32>), [[IDX]](s64) +; CHECK: $w0 = COPY [[RES]](s32) + %res = extractelement <2 x i32> %vec, i1 true + ret i32 %res +} + +define i32 @test_extractelement_const_idx_zext_i8(<2 x i32> %vec) { +; CHECK-LABEL: name: test_extractelement +; CHECK: [[VEC:%[0-9]+]]:_(<2 x s32>) = COPY $d0 +; CHECK: [[IDX:%[0-9]+]]:_(s64) = G_CONSTANT i64 255 +; CHECK: [[RES:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[VEC]](<2 x s32>), [[IDX]](s64) +; CHECK: $w0 = COPY [[RES]](s32) + %res = extractelement <2 x i32> %vec, i8 255 + ret i32 %res +} + + define i32 @test_singleelementvector(i32 %elt){ ; CHECK-LABEL: name: test_singleelementvector ; CHECK: [[ELT:%[0-9]+]]:_(s32) = COPY $w0 diff --git a/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll b/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll index 800d1584e9b473..04940fbe942355 100644 --- a/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll +++ b/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll @@ -9143,8 +9143,7 @@ define i32 @load_single_extract_variable_index_i32(<4 x i32>* %A, i32 %idx) { define i32 @load_single_extract_variable_index_v3i32_small_align(<3 x i32>* %A, i32 %idx) { ; CHECK-LABEL: load_single_extract_variable_index_v3i32_small_align: ; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $w1 killed $w1 def $x1 -; CHECK-NEXT: sxtw x9, w1 +; CHECK-NEXT: mov w9, w1 ; CHECK-NEXT: mov w8, #2 ; CHECK-NEXT: cmp x9, #2 ; CHECK-NEXT: csel x8, x9, x8, lo @@ -9158,8 +9157,7 @@ define i32 @load_single_extract_variable_index_v3i32_small_align(<3 x i32>* %A, define i32 @load_single_extract_variable_index_v3i32_default_align(<3 x i32>* %A, i32 %idx) { ; CHECK-LABEL: load_single_extract_variable_index_v3i32_default_align: ; CHECK: ; %bb.0: -; CHECK-NEXT: ; kill: def $w1 killed $w1 def $x1 -; CHECK-NEXT: sxtw x9, w1 +; CHECK-NEXT: mov w9, w1 ; CHECK-NEXT: mov w8, #2 ; CHECK-NEXT: cmp x9, #2 ; CHECK-NEXT: csel x8, x9, x8, lo diff --git a/llvm/test/CodeGen/AArch64/sve-extract-element.ll b/llvm/test/CodeGen/AArch64/sve-extract-element.ll index 6e3da13ae77ba1..6ca9934a21cafa 100644 --- a/llvm/test/CodeGen/AArch64/sve-extract-element.ll +++ b/llvm/test/CodeGen/AArch64/sve-extract-element.ll @@ -292,8 +292,7 @@ define double @test_lane2_2xf64( %a) #0 { define i8 @test_lanex_16xi8( %a, i32 %x) #0 { ; CHECK-LABEL: test_lanex_16xi8: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 -; CHECK-NEXT: sxtw x8, w0 +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: whilels p0.b, xzr, x8 ; CHECK-NEXT: lastb w0, p0, z0.b ; CHECK-NEXT: ret @@ -304,8 +303,7 @@ define i8 @test_lanex_16xi8( %a, i32 %x) #0 { define i16 @test_lanex_8xi16( %a, i32 %x) #0 { ; CHECK-LABEL: test_lanex_8xi16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 -; CHECK-NEXT: sxtw x8, w0 +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: whilels p0.h, xzr, x8 ; CHECK-NEXT: lastb w0, p0, z0.h ; CHECK-NEXT: ret @@ -316,8 +314,7 @@ define i16 @test_lanex_8xi16( %a, i32 %x) #0 { define i32 @test_lanex_4xi32( %a, i32 %x) #0 { ; CHECK-LABEL: test_lanex_4xi32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 -; CHECK-NEXT: sxtw x8, w0 +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: whilels p0.s, xzr, x8 ; CHECK-NEXT: lastb w0, p0, z0.s ; CHECK-NEXT: ret @@ -328,8 +325,7 @@ define i32 @test_lanex_4xi32( %a, i32 %x) #0 { define i64 @test_lanex_2xi64( %a, i32 %x) #0 { ; CHECK-LABEL: test_lanex_2xi64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 -; CHECK-NEXT: sxtw x8, w0 +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: whilels p0.d, xzr, x8 ; CHECK-NEXT: lastb x0, p0, z0.d ; CHECK-NEXT: ret @@ -340,8 +336,7 @@ define i64 @test_lanex_2xi64( %a, i32 %x) #0 { define half @test_lanex_8xf16( %a, i32 %x) #0 { ; CHECK-LABEL: test_lanex_8xf16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 -; CHECK-NEXT: sxtw x8, w0 +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: whilels p0.h, xzr, x8 ; CHECK-NEXT: lastb h0, p0, z0.h ; CHECK-NEXT: ret @@ -352,8 +347,7 @@ define half @test_lanex_8xf16( %a, i32 %x) #0 { define half @test_lanex_4xf16( %a, i32 %x) #0 { ; CHECK-LABEL: test_lanex_4xf16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 -; CHECK-NEXT: sxtw x8, w0 +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: whilels p0.s, xzr, x8 ; CHECK-NEXT: lastb h0, p0, z0.h ; CHECK-NEXT: ret @@ -364,8 +358,7 @@ define half @test_lanex_4xf16( %a, i32 %x) #0 { define half @test_lanex_2xf16( %a, i32 %x) #0 { ; CHECK-LABEL: test_lanex_2xf16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 -; CHECK-NEXT: sxtw x8, w0 +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: whilels p0.d, xzr, x8 ; CHECK-NEXT: lastb h0, p0, z0.h ; CHECK-NEXT: ret @@ -376,8 +369,7 @@ define half @test_lanex_2xf16( %a, i32 %x) #0 { define float @test_lanex_4xf32( %a, i32 %x) #0 { ; CHECK-LABEL: test_lanex_4xf32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 -; CHECK-NEXT: sxtw x8, w0 +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: whilels p0.s, xzr, x8 ; CHECK-NEXT: lastb s0, p0, z0.s ; CHECK-NEXT: ret @@ -388,8 +380,7 @@ define float @test_lanex_4xf32( %a, i32 %x) #0 { define float @test_lanex_2xf32( %a, i32 %x) #0 { ; CHECK-LABEL: test_lanex_2xf32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 -; CHECK-NEXT: sxtw x8, w0 +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: whilels p0.d, xzr, x8 ; CHECK-NEXT: lastb s0, p0, z0.s ; CHECK-NEXT: ret @@ -400,8 +391,7 @@ define float @test_lanex_2xf32( %a, i32 %x) #0 { define double @test_lanex_2xf64( %a, i32 %x) #0 { ; CHECK-LABEL: test_lanex_2xf64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 -; CHECK-NEXT: sxtw x8, w0 +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: whilels p0.d, xzr, x8 ; CHECK-NEXT: lastb d0, p0, z0.d ; CHECK-NEXT: ret @@ -518,8 +508,7 @@ define i1 @test_last_8xi1( %a) #0 { define i1 @test_lanex_4xi1( %a, i32 %x) #0 { ; CHECK-LABEL: test_lanex_4xi1: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 -; CHECK-NEXT: sxtw x8, w0 +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: mov z0.s, p0/z, #1 // =0x1 ; CHECK-NEXT: whilels p0.s, xzr, x8 ; CHECK-NEXT: lastb w8, p0, z0.s diff --git a/llvm/test/CodeGen/AArch64/sve-insert-element.ll b/llvm/test/CodeGen/AArch64/sve-insert-element.ll index 49951be72a3ca5..b67184eb67ec61 100644 --- a/llvm/test/CodeGen/AArch64/sve-insert-element.ll +++ b/llvm/test/CodeGen/AArch64/sve-insert-element.ll @@ -128,8 +128,7 @@ define @test_lane1_16xi8( %a) { define @test_lanex_16xi8( %a, i32 %x) { ; CHECK-LABEL: test_lanex_16xi8: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 -; CHECK-NEXT: sxtw x8, w0 +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: mov w9, #30 ; CHECK-NEXT: index z2.b, #0, #1 ; CHECK-NEXT: ptrue p0.b @@ -389,8 +388,7 @@ define @test_predicate_insert_4xi1_immediate ( @test_predicate_insert_8xi1_immediate ( %val, i32 %idx) { ; CHECK-LABEL: test_predicate_insert_8xi1_immediate: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 -; CHECK-NEXT: sxtw x8, w0 +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: mov w9, #1 ; CHECK-NEXT: index z1.h, #0, #1 ; CHECK-NEXT: ptrue p1.h @@ -427,8 +425,7 @@ define @test_predicate_insert_16xi1_immediate ( @test_predicate_insert_2xi1( %val, i1 %elt, i32 %idx) { ; CHECK-LABEL: test_predicate_insert_2xi1: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 -; CHECK-NEXT: sxtw x8, w1 +; CHECK-NEXT: mov w8, w1 ; CHECK-NEXT: index z1.d, #0, #1 ; CHECK-NEXT: ptrue p1.d ; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 @@ -446,8 +443,7 @@ define @test_predicate_insert_2xi1( %val, i1 define @test_predicate_insert_4xi1( %val, i1 %elt, i32 %idx) { ; CHECK-LABEL: test_predicate_insert_4xi1: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 -; CHECK-NEXT: sxtw x8, w1 +; CHECK-NEXT: mov w8, w1 ; CHECK-NEXT: index z1.s, #0, #1 ; CHECK-NEXT: ptrue p1.s ; CHECK-NEXT: mov z0.s, w8 @@ -463,8 +459,7 @@ define @test_predicate_insert_4xi1( %val, i1 define @test_predicate_insert_8xi1( %val, i1 %elt, i32 %idx) { ; CHECK-LABEL: test_predicate_insert_8xi1: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 -; CHECK-NEXT: sxtw x8, w1 +; CHECK-NEXT: mov w8, w1 ; CHECK-NEXT: index z1.h, #0, #1 ; CHECK-NEXT: ptrue p1.h ; CHECK-NEXT: mov z0.h, w8 @@ -481,8 +476,7 @@ define @test_predicate_insert_8xi1( %val, i1 define @test_predicate_insert_16xi1( %val, i1 %elt, i32 %idx) { ; CHECK-LABEL: test_predicate_insert_16xi1: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 -; CHECK-NEXT: sxtw x8, w1 +; CHECK-NEXT: mov w8, w1 ; CHECK-NEXT: index z1.b, #0, #1 ; CHECK-NEXT: ptrue p1.b ; CHECK-NEXT: mov z0.b, w8 @@ -505,8 +499,7 @@ define @test_predicate_insert_32xi1( %val, ; CHECK-NEXT: addvl sp, sp, #-2 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG ; CHECK-NEXT: mov x8, #-1 -; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 -; CHECK-NEXT: sxtw x9, w1 +; CHECK-NEXT: mov w9, w1 ; CHECK-NEXT: mov z0.b, p1/z, #1 // =0x1 ; CHECK-NEXT: ptrue p1.b ; CHECK-NEXT: st1b { z0.b }, p1, [sp, #1, mul vl] diff --git a/llvm/test/CodeGen/AArch64/sve-split-extract-elt.ll b/llvm/test/CodeGen/AArch64/sve-split-extract-elt.ll index 1b401ee3da8a03..2ebad78b9bbd8e 100644 --- a/llvm/test/CodeGen/AArch64/sve-split-extract-elt.ll +++ b/llvm/test/CodeGen/AArch64/sve-split-extract-elt.ll @@ -6,8 +6,7 @@ define i32 @promote_extract_2i32_idx( %a, i32 %idx) { ; CHECK-LABEL: promote_extract_2i32_idx: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 -; CHECK-NEXT: sxtw x8, w0 +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: whilels p0.d, xzr, x8 ; CHECK-NEXT: lastb x0, p0, z0.d ; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 @@ -25,8 +24,7 @@ define i8 @split_extract_32i8_idx( %a, i32 %idx) { ; CHECK-NEXT: addvl sp, sp, #-2 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG ; CHECK-NEXT: mov x8, #-1 -; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 -; CHECK-NEXT: sxtw x9, w0 +; CHECK-NEXT: mov w9, w0 ; CHECK-NEXT: ptrue p0.b ; CHECK-NEXT: st1b { z1.b }, p0, [sp, #1, mul vl] ; CHECK-NEXT: st1b { z0.b }, p0, [sp] @@ -51,8 +49,7 @@ define i16 @split_extract_16i16_idx( %a, i32 %idx) { ; CHECK-NEXT: addvl sp, sp, #-2 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG ; CHECK-NEXT: mov x8, #-1 -; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 -; CHECK-NEXT: sxtw x9, w0 +; CHECK-NEXT: mov w9, w0 ; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: st1h { z1.h }, p0, [sp, #1, mul vl] ; CHECK-NEXT: st1h { z0.h }, p0, [sp] @@ -77,8 +74,7 @@ define i32 @split_extract_8i32_idx( %a, i32 %idx) { ; CHECK-NEXT: addvl sp, sp, #-2 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG ; CHECK-NEXT: cnth x8 -; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 -; CHECK-NEXT: sxtw x9, w0 +; CHECK-NEXT: mov w9, w0 ; CHECK-NEXT: sub x8, x8, #1 ; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: cmp x9, x8 @@ -103,8 +99,7 @@ define i64 @split_extract_8i64_idx( %a, i32 %idx) { ; CHECK-NEXT: addvl sp, sp, #-4 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 32 * VG ; CHECK-NEXT: cnth x8 -; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 -; CHECK-NEXT: sxtw x9, w0 +; CHECK-NEXT: mov w9, w0 ; CHECK-NEXT: sub x8, x8, #1 ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: cmp x9, x8 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-zext-vec-index.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-zext-vec-index.ll new file mode 100644 index 00000000000000..5d63ad0c1f17f3 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-zext-vec-index.ll @@ -0,0 +1,28 @@ +; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +; RUN: llc -march=amdgcn -O0 -stop-after=irtranslator -global-isel -verify-machineinstrs %s -o - | FileCheck %s + +define i8 @f_i1_1() { + ; CHECK-LABEL: name: f_i1_1 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<256 x s8>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[DEF]](<256 x s8>), [[C]](s32) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EVEC]](s8) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 + %E1 = extractelement <256 x i8> undef, i1 true + ret i8 %E1 +} + +define i8 @f_i8_255() { + ; CHECK-LABEL: name: f_i8_255 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<256 x s8>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[DEF]](<256 x s8>), [[C]](s32) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EVEC]](s8) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 + %E1 = extractelement <256 x i8> undef, i8 255 + ret i8 %E1 +} diff --git a/llvm/test/CodeGen/Mips/msa/basic_operations.ll b/llvm/test/CodeGen/Mips/msa/basic_operations.ll index 9ddb91f0770d9d..e55f821392c25c 100644 --- a/llvm/test/CodeGen/Mips/msa/basic_operations.ll +++ b/llvm/test/CodeGen/Mips/msa/basic_operations.ll @@ -1315,7 +1315,7 @@ define i32 @extract_sext_v16i8_vidx() nounwind { ; N64-NEXT: ld.b $w0, 0($2) ; N64-NEXT: addv.b $w0, $w0, $w0 ; N64-NEXT: ld $1, %got_disp(i32)($1) -; N64-NEXT: lw $1, 0($1) +; N64-NEXT: lwu $1, 0($1) ; N64-NEXT: splat.b $w0, $w0[$1] ; N64-NEXT: mfc1 $1, $f0 ; N64-NEXT: sra $1, $1, 24 @@ -1371,7 +1371,7 @@ define i32 @extract_sext_v8i16_vidx() nounwind { ; N64-NEXT: ld.h $w0, 0($2) ; N64-NEXT: addv.h $w0, $w0, $w0 ; N64-NEXT: ld $1, %got_disp(i32)($1) -; N64-NEXT: lw $1, 0($1) +; N64-NEXT: lwu $1, 0($1) ; N64-NEXT: splat.h $w0, $w0[$1] ; N64-NEXT: mfc1 $1, $f0 ; N64-NEXT: sra $1, $1, 16 @@ -1423,7 +1423,7 @@ define i32 @extract_sext_v4i32_vidx() nounwind { ; N64-NEXT: ld.w $w0, 0($2) ; N64-NEXT: addv.w $w0, $w0, $w0 ; N64-NEXT: ld $1, %got_disp(i32)($1) -; N64-NEXT: lw $1, 0($1) +; N64-NEXT: lwu $1, 0($1) ; N64-NEXT: splat.w $w0, $w0[$1] ; N64-NEXT: jr $ra ; N64-NEXT: mfc1 $2, $f0 @@ -1495,7 +1495,7 @@ define i64 @extract_sext_v2i64_vidx() nounwind { ; N64-NEXT: ld.d $w0, 0($2) ; N64-NEXT: addv.d $w0, $w0, $w0 ; N64-NEXT: ld $1, %got_disp(i32)($1) -; N64-NEXT: lw $1, 0($1) +; N64-NEXT: lwu $1, 0($1) ; N64-NEXT: splat.d $w0, $w0[$1] ; N64-NEXT: jr $ra ; N64-NEXT: dmfc1 $2, $f0 @@ -1546,7 +1546,7 @@ define i32 @extract_zext_v16i8_vidx() nounwind { ; N64-NEXT: ld.b $w0, 0($2) ; N64-NEXT: addv.b $w0, $w0, $w0 ; N64-NEXT: ld $1, %got_disp(i32)($1) -; N64-NEXT: lw $1, 0($1) +; N64-NEXT: lwu $1, 0($1) ; N64-NEXT: splat.b $w0, $w0[$1] ; N64-NEXT: mfc1 $1, $f0 ; N64-NEXT: jr $ra @@ -1599,7 +1599,7 @@ define i32 @extract_zext_v8i16_vidx() nounwind { ; N64-NEXT: ld.h $w0, 0($2) ; N64-NEXT: addv.h $w0, $w0, $w0 ; N64-NEXT: ld $1, %got_disp(i32)($1) -; N64-NEXT: lw $1, 0($1) +; N64-NEXT: lwu $1, 0($1) ; N64-NEXT: splat.h $w0, $w0[$1] ; N64-NEXT: mfc1 $1, $f0 ; N64-NEXT: jr $ra @@ -1650,7 +1650,7 @@ define i32 @extract_zext_v4i32_vidx() nounwind { ; N64-NEXT: ld.w $w0, 0($2) ; N64-NEXT: addv.w $w0, $w0, $w0 ; N64-NEXT: ld $1, %got_disp(i32)($1) -; N64-NEXT: lw $1, 0($1) +; N64-NEXT: lwu $1, 0($1) ; N64-NEXT: splat.w $w0, $w0[$1] ; N64-NEXT: jr $ra ; N64-NEXT: mfc1 $2, $f0 @@ -1722,7 +1722,7 @@ define i64 @extract_zext_v2i64_vidx() nounwind { ; N64-NEXT: ld.d $w0, 0($2) ; N64-NEXT: addv.d $w0, $w0, $w0 ; N64-NEXT: ld $1, %got_disp(i32)($1) -; N64-NEXT: lw $1, 0($1) +; N64-NEXT: lwu $1, 0($1) ; N64-NEXT: splat.d $w0, $w0[$1] ; N64-NEXT: jr $ra ; N64-NEXT: dmfc1 $2, $f0 @@ -1934,7 +1934,7 @@ define void @insert_v16i8_vidx(i32 signext %a) nounwind { ; N64-NEXT: daddu $1, $1, $25 ; N64-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(insert_v16i8_vidx))) ; N64-NEXT: ld $2, %got_disp(i32)($1) -; N64-NEXT: lw $2, 0($2) +; N64-NEXT: lwu $2, 0($2) ; N64-NEXT: ld $1, %got_disp(v16i8)($1) ; N64-NEXT: ld.b $w0, 0($1) ; N64-NEXT: sld.b $w0, $w0[$2] @@ -1994,7 +1994,7 @@ define void @insert_v8i16_vidx(i32 signext %a) nounwind { ; N64-NEXT: daddu $1, $1, $25 ; N64-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(insert_v8i16_vidx))) ; N64-NEXT: ld $2, %got_disp(i32)($1) -; N64-NEXT: lw $2, 0($2) +; N64-NEXT: lwu $2, 0($2) ; N64-NEXT: ld $1, %got_disp(v8i16)($1) ; N64-NEXT: ld.h $w0, 0($1) ; N64-NEXT: dsll $2, $2, 1 @@ -2055,7 +2055,7 @@ define void @insert_v4i32_vidx(i32 signext %a) nounwind { ; N64-NEXT: daddu $1, $1, $25 ; N64-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(insert_v4i32_vidx))) ; N64-NEXT: ld $2, %got_disp(i32)($1) -; N64-NEXT: lw $2, 0($2) +; N64-NEXT: lwu $2, 0($2) ; N64-NEXT: ld $1, %got_disp(v4i32)($1) ; N64-NEXT: ld.w $w0, 0($1) ; N64-NEXT: dsll $2, $2, 2 @@ -2124,7 +2124,7 @@ define void @insert_v2i64_vidx(i64 signext %a) nounwind { ; N64-NEXT: daddu $1, $1, $25 ; N64-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(insert_v2i64_vidx))) ; N64-NEXT: ld $2, %got_disp(i32)($1) -; N64-NEXT: lw $2, 0($2) +; N64-NEXT: lwu $2, 0($2) ; N64-NEXT: ld $1, %got_disp(v2i64)($1) ; N64-NEXT: ld.d $w0, 0($1) ; N64-NEXT: dsll $2, $2, 3 diff --git a/llvm/test/CodeGen/Mips/msa/basic_operations_float.ll b/llvm/test/CodeGen/Mips/msa/basic_operations_float.ll index 5b87dc8307c45e..1359eb2ecdd6c9 100644 --- a/llvm/test/CodeGen/Mips/msa/basic_operations_float.ll +++ b/llvm/test/CodeGen/Mips/msa/basic_operations_float.ll @@ -193,10 +193,9 @@ define float @extract_v4f32_vidx() nounwind { ; O32-DAG: lw [[PTR_I:\$[0-9]+]], %got(i32)( ; N32-DAG: lw [[PTR_I:\$[0-9]+]], %got_disp(i32)( ; N64-DAG: ld [[PTR_I:\$[0-9]+]], %got_disp(i32)( - ; ALL-DAG: lw [[IDX:\$[0-9]+]], 0([[PTR_I]]) %4 = extractelement <4 x float> %2, i32 %3 - ; ALL-DAG: splat.w $w0, [[R1]][[[IDX]]] + ; ALL-DAG: splat.w $w0, [[R1]][[[PTR_I]]] ret float %4 } @@ -259,10 +258,9 @@ define double @extract_v2f64_vidx() nounwind { ; O32-DAG: lw [[PTR_I:\$[0-9]+]], %got(i32)( ; N32-DAG: lw [[PTR_I:\$[0-9]+]], %got_disp(i32)( ; N64-DAG: ld [[PTR_I:\$[0-9]+]], %got_disp(i32)( - ; ALL-DAG: lw [[IDX:\$[0-9]+]], 0([[PTR_I]]) %4 = extractelement <2 x double> %2, i32 %3 - ; ALL-DAG: splat.d $w0, [[R1]][[[IDX]]] + ; ALL-DAG: splat.d $w0, [[R1]][[[PTR_I]]] ret double %4 } @@ -312,11 +310,10 @@ define void @insert_v4f32_vidx(float %a) nounwind { ; O32-DAG: lw [[PTR_I:\$[0-9]+]], %got(i32)( ; N32-DAG: lw [[PTR_I:\$[0-9]+]], %got_disp(i32)( ; N64-DAG: ld [[PTR_I:\$[0-9]+]], %got_disp(i32)( - ; ALL-DAG: lw [[IDX:\$[0-9]+]], 0([[PTR_I]]) %3 = insertelement <4 x float> %1, float %a, i32 %2 ; float argument passed in $f12 - ; ALL-DAG: sll [[BIDX:\$[0-9]+]], [[IDX]], 2 + ; ALL-DAG: sll [[BIDX:\$[0-9]+]], [[PTR_I]], 2 ; ALL-DAG: sld.b [[R1]], [[R1]][[[BIDX]]] ; ALL-DAG: insve.w [[R1]][0], $w12[0] ; ALL-DAG: neg [[NIDX:\$[0-9]+]], [[BIDX]] @@ -341,11 +338,10 @@ define void @insert_v2f64_vidx(double %a) nounwind { ; O32-DAG: lw [[PTR_I:\$[0-9]+]], %got(i32)( ; N32-DAG: lw [[PTR_I:\$[0-9]+]], %got_disp(i32)( ; N64-DAG: ld [[PTR_I:\$[0-9]+]], %got_disp(i32)( - ; ALL-DAG: lw [[IDX:\$[0-9]+]], 0([[PTR_I]]) %3 = insertelement <2 x double> %1, double %a, i32 %2 ; double argument passed in $f12 - ; ALL-DAG: sll [[BIDX:\$[0-9]+]], [[IDX]], 3 + ; ALL-DAG: sll [[BIDX:\$[0-9]+]], [[PTR_I]], 3 ; ALL-DAG: sld.b [[R1]], [[R1]][[[BIDX]]] ; ALL-DAG: insve.d [[R1]][0], $w12[0] ; ALL-DAG: neg [[NIDX:\$[0-9]+]], [[BIDX]] diff --git a/llvm/test/CodeGen/PowerPC/aix-p9-xxinsertw-xxextractuw.ll b/llvm/test/CodeGen/PowerPC/aix-p9-xxinsertw-xxextractuw.ll index c2ff6a6c8ab75b..46ff2280118e13 100644 --- a/llvm/test/CodeGen/PowerPC/aix-p9-xxinsertw-xxextractuw.ll +++ b/llvm/test/CodeGen/PowerPC/aix-p9-xxinsertw-xxextractuw.ll @@ -715,7 +715,6 @@ entry: define double @conv2dlbTestuiVar(<4 x i32> %a, i32 zeroext %elem) { ; CHECK-64-LABEL: conv2dlbTestuiVar: ; CHECK-64: # %bb.0: # %entry -; CHECK-64-NEXT: extsw 3, 3 ; CHECK-64-NEXT: rlwinm 3, 3, 2, 28, 29 ; CHECK-64-NEXT: vextuwlx 3, 3, 2 ; CHECK-64-NEXT: mtfprwz 0, 3 diff --git a/llvm/test/CodeGen/PowerPC/aix-vec_extract_p9.ll b/llvm/test/CodeGen/PowerPC/aix-vec_extract_p9.ll index 0d57bd5bdcd3f0..2fdb142776a20f 100644 --- a/llvm/test/CodeGen/PowerPC/aix-vec_extract_p9.ll +++ b/llvm/test/CodeGen/PowerPC/aix-vec_extract_p9.ll @@ -5,6 +5,7 @@ define zeroext i8 @test1(<16 x i8> %a, i32 signext %index) { ; CHECK-64-LABEL: test1: ; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: clrldi 3, 3, 32 ; CHECK-64-NEXT: vextublx 3, 3, 2 ; CHECK-64-NEXT: clrldi 3, 3, 56 ; CHECK-64-NEXT: blr @@ -24,6 +25,7 @@ entry: define signext i8 @test2(<16 x i8> %a, i32 signext %index) { ; CHECK-64-LABEL: test2: ; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: clrldi 3, 3, 32 ; CHECK-64-NEXT: vextublx 3, 3, 2 ; CHECK-64-NEXT: extsb 3, 3 ; CHECK-64-NEXT: blr @@ -44,6 +46,7 @@ entry: define zeroext i16 @test3(<8 x i16> %a, i32 signext %index) { ; CHECK-64-LABEL: test3: ; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: clrldi 3, 3, 32 ; CHECK-64-NEXT: rlwinm 3, 3, 1, 28, 30 ; CHECK-64-NEXT: vextuhlx 3, 3, 2 ; CHECK-64-NEXT: clrldi 3, 3, 48 @@ -64,6 +67,7 @@ entry: define signext i16 @test4(<8 x i16> %a, i32 signext %index) { ; CHECK-64-LABEL: test4: ; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: clrldi 3, 3, 32 ; CHECK-64-NEXT: rlwinm 3, 3, 1, 28, 30 ; CHECK-64-NEXT: vextuhlx 3, 3, 2 ; CHECK-64-NEXT: extsh 3, 3 @@ -84,6 +88,7 @@ entry: define zeroext i32 @test5(<4 x i32> %a, i32 signext %index) { ; CHECK-64-LABEL: test5: ; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: clrldi 3, 3, 32 ; CHECK-64-NEXT: rlwinm 3, 3, 2, 28, 29 ; CHECK-64-NEXT: vextuwlx 3, 3, 2 ; CHECK-64-NEXT: blr @@ -103,6 +108,7 @@ entry: define signext i32 @test6(<4 x i32> %a, i32 signext %index) { ; CHECK-64-LABEL: test6: ; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: clrldi 3, 3, 32 ; CHECK-64-NEXT: rlwinm 3, 3, 2, 28, 29 ; CHECK-64-NEXT: vextuwlx 3, 3, 2 ; CHECK-64-NEXT: extsw 3, 3 diff --git a/llvm/test/CodeGen/PowerPC/aix-vec_extract_p9_2.ll b/llvm/test/CodeGen/PowerPC/aix-vec_extract_p9_2.ll index eac2330d92f969..f73ac9b9a16e3d 100644 --- a/llvm/test/CodeGen/PowerPC/aix-vec_extract_p9_2.ll +++ b/llvm/test/CodeGen/PowerPC/aix-vec_extract_p9_2.ll @@ -5,6 +5,7 @@ define zeroext i8 @test_add1(<16 x i8> %a, i32 signext %index, i8 zeroext %c) { ; CHECK-64-LABEL: test_add1: ; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: clrldi 3, 3, 32 ; CHECK-64-NEXT: vextublx 3, 3, 2 ; CHECK-64-NEXT: add 3, 3, 4 ; CHECK-64-NEXT: clrldi 3, 3, 56 @@ -31,6 +32,7 @@ entry: define signext i8 @test_add2(<16 x i8> %a, i32 signext %index, i8 signext %c) { ; CHECK-64-LABEL: test_add2: ; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: clrldi 3, 3, 32 ; CHECK-64-NEXT: vextublx 3, 3, 2 ; CHECK-64-NEXT: add 3, 3, 4 ; CHECK-64-NEXT: extsb 3, 3 @@ -57,6 +59,7 @@ entry: define zeroext i16 @test_add3(<8 x i16> %a, i32 signext %index, i16 zeroext %c) { ; CHECK-64-LABEL: test_add3: ; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: clrldi 3, 3, 32 ; CHECK-64-NEXT: rlwinm 3, 3, 1, 28, 30 ; CHECK-64-NEXT: vextuhlx 3, 3, 2 ; CHECK-64-NEXT: add 3, 3, 4 @@ -84,6 +87,7 @@ entry: define signext i16 @test_add4(<8 x i16> %a, i32 signext %index, i16 signext %c) { ; CHECK-64-LABEL: test_add4: ; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: clrldi 3, 3, 32 ; CHECK-64-NEXT: rlwinm 3, 3, 1, 28, 30 ; CHECK-64-NEXT: vextuhlx 3, 3, 2 ; CHECK-64-NEXT: add 3, 3, 4 @@ -111,6 +115,7 @@ entry: define zeroext i32 @test_add5(<4 x i32> %a, i32 signext %index, i32 zeroext %c) { ; CHECK-64-LABEL: test_add5: ; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: clrldi 3, 3, 32 ; CHECK-64-NEXT: rlwinm 3, 3, 2, 28, 29 ; CHECK-64-NEXT: vextuwlx 3, 3, 2 ; CHECK-64-NEXT: add 3, 3, 4 @@ -134,6 +139,7 @@ entry: define signext i32 @test_add6(<4 x i32> %a, i32 signext %index, i32 signext %c) { ; CHECK-64-LABEL: test_add6: ; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: clrldi 3, 3, 32 ; CHECK-64-NEXT: rlwinm 3, 3, 2, 28, 29 ; CHECK-64-NEXT: vextuwlx 3, 3, 2 ; CHECK-64-NEXT: add 3, 3, 4 diff --git a/llvm/test/CodeGen/PowerPC/aix-vec_insert_elt.ll b/llvm/test/CodeGen/PowerPC/aix-vec_insert_elt.ll index e259b33a266feb..20824e094661bd 100644 --- a/llvm/test/CodeGen/PowerPC/aix-vec_insert_elt.ll +++ b/llvm/test/CodeGen/PowerPC/aix-vec_insert_elt.ll @@ -266,8 +266,8 @@ entry: define <4 x float> @testFloat1(<4 x float> %a, float %b, i32 zeroext %idx1) { ; CHECK-64-LABEL: testFloat1: ; CHECK-64: # %bb.0: # %entry -; CHECK-64-DAG: rlwinm 3, 4, 2, 28, 29 -; CHECK-64-DAG: addi 4, 1, -16 +; CHECK-64-NEXT: rlwinm 3, 4, 2, 28, 29 +; CHECK-64-NEXT: addi 4, 1, -16 ; CHECK-64-NEXT: stxv 34, -16(1) ; CHECK-64-NEXT: stfsx 1, 4, 3 ; CHECK-64-NEXT: lxv 34, -16(1) @@ -285,8 +285,7 @@ define <4 x float> @testFloat1(<4 x float> %a, float %b, i32 zeroext %idx1) { ; CHECK-64-P10-LABEL: testFloat1: ; CHECK-64-P10: # %bb.0: # %entry ; CHECK-64-P10-NEXT: xscvdpspn 35, 1 -; CHECK-64-P10-NEXT: extsw 3, 4 -; CHECK-64-P10-NEXT: slwi 3, 3, 2 +; CHECK-64-P10-NEXT: slwi 3, 4, 2 ; CHECK-64-P10-NEXT: vinswvlx 2, 3, 3 ; CHECK-64-P10-NEXT: blr ; @@ -305,16 +304,16 @@ define <4 x float> @testFloat2(<4 x float> %a, ptr %b, i32 zeroext %idx1, i32 ze ; CHECK-64-LABEL: testFloat2: ; CHECK-64: # %bb.0: # %entry ; CHECK-64-NEXT: lwz 6, 0(3) -; CHECK-64-DAG: rlwinm 4, 4, 2, 28, 29 -; CHECK-64-DAG: addi 7, 1, -16 +; CHECK-64-NEXT: addi 7, 1, -16 +; CHECK-64-NEXT: rlwinm 4, 4, 2, 28, 29 ; CHECK-64-NEXT: stxv 34, -16(1) +; CHECK-64-NEXT: rlwinm 5, 5, 2, 28, 29 ; CHECK-64-NEXT: stwx 6, 7, 4 -; CHECK-64-NEXT: rlwinm 4, 5, 2, 28, 29 -; CHECK-64-NEXT: addi 5, 1, -32 +; CHECK-64-NEXT: addi 4, 1, -32 ; CHECK-64-NEXT: lxv 0, -16(1) ; CHECK-64-NEXT: lwz 3, 1(3) ; CHECK-64-NEXT: stxv 0, -32(1) -; CHECK-64-NEXT: stwx 3, 5, 4 +; CHECK-64-NEXT: stwx 3, 4, 5 ; CHECK-64-NEXT: lxv 34, -32(1) ; CHECK-64-NEXT: blr ; @@ -337,12 +336,10 @@ define <4 x float> @testFloat2(<4 x float> %a, ptr %b, i32 zeroext %idx1, i32 ze ; CHECK-64-P10-LABEL: testFloat2: ; CHECK-64-P10: # %bb.0: # %entry ; CHECK-64-P10-NEXT: lwz 6, 0(3) -; CHECK-64-P10-NEXT: extsw 4, 4 ; CHECK-64-P10-NEXT: lwz 3, 1(3) ; CHECK-64-P10-NEXT: slwi 4, 4, 2 ; CHECK-64-P10-NEXT: vinswlx 2, 4, 6 -; CHECK-64-P10-NEXT: extsw 4, 5 -; CHECK-64-P10-NEXT: slwi 4, 4, 2 +; CHECK-64-P10-NEXT: slwi 4, 5, 2 ; CHECK-64-P10-NEXT: vinswlx 2, 4, 3 ; CHECK-64-P10-NEXT: blr ; @@ -368,8 +365,9 @@ define <4 x float> @testFloat3(<4 x float> %a, ptr %b, i32 zeroext %idx1, i32 ze ; CHECK-64-LABEL: testFloat3: ; CHECK-64: # %bb.0: # %entry ; CHECK-64-NEXT: lis 6, 1 -; CHECK-64-DAG: rlwinm 4, 4, 2, 28, 29 -; CHECK-64-DAG: addi 7, 1, -16 +; CHECK-64-NEXT: addi 7, 1, -16 +; CHECK-64-NEXT: rlwinm 4, 4, 2, 28, 29 +; CHECK-64-NEXT: rlwinm 5, 5, 2, 28, 29 ; CHECK-64-NEXT: lwzx 6, 3, 6 ; CHECK-64-NEXT: stxv 34, -16(1) ; CHECK-64-NEXT: stwx 6, 7, 4 @@ -377,10 +375,9 @@ define <4 x float> @testFloat3(<4 x float> %a, ptr %b, i32 zeroext %idx1, i32 ze ; CHECK-64-NEXT: lxv 0, -16(1) ; CHECK-64-NEXT: rldic 4, 4, 36, 27 ; CHECK-64-NEXT: lwzx 3, 3, 4 -; CHECK-64-NEXT: rlwinm 4, 5, 2, 28, 29 -; CHECK-64-NEXT: addi 5, 1, -32 +; CHECK-64-NEXT: addi 4, 1, -32 ; CHECK-64-NEXT: stxv 0, -32(1) -; CHECK-64-NEXT: stwx 3, 5, 4 +; CHECK-64-NEXT: stwx 3, 4, 5 ; CHECK-64-NEXT: lxv 34, -32(1) ; CHECK-64-NEXT: blr ; @@ -404,14 +401,12 @@ define <4 x float> @testFloat3(<4 x float> %a, ptr %b, i32 zeroext %idx1, i32 ze ; CHECK-64-P10-LABEL: testFloat3: ; CHECK-64-P10: # %bb.0: # %entry ; CHECK-64-P10-NEXT: plwz 6, 65536(3), 0 -; CHECK-64-P10-NEXT: extsw 4, 4 ; CHECK-64-P10-NEXT: slwi 4, 4, 2 ; CHECK-64-P10-NEXT: vinswlx 2, 4, 6 ; CHECK-64-P10-NEXT: li 4, 1 ; CHECK-64-P10-NEXT: rldic 4, 4, 36, 27 ; CHECK-64-P10-NEXT: lwzx 3, 3, 4 -; CHECK-64-P10-NEXT: extsw 4, 5 -; CHECK-64-P10-NEXT: slwi 4, 4, 2 +; CHECK-64-P10-NEXT: slwi 4, 5, 2 ; CHECK-64-P10-NEXT: vinswlx 2, 4, 3 ; CHECK-64-P10-NEXT: blr ; @@ -572,7 +567,7 @@ entry: define <2 x double> @testDouble1(<2 x double> %a, double %b, i32 zeroext %idx1) { ; CHECK-64-LABEL: testDouble1: ; CHECK-64: # %bb.0: # %entry -; CHECK-64: rlwinm 3, 4, 3, 28, 28 +; CHECK-64-NEXT: rlwinm 3, 4, 3, 28, 28 ; CHECK-64-NEXT: addi 4, 1, -16 ; CHECK-64-NEXT: stxv 34, -16(1) ; CHECK-64-NEXT: stfdx 1, 4, 3 @@ -590,7 +585,6 @@ define <2 x double> @testDouble1(<2 x double> %a, double %b, i32 zeroext %idx1) ; ; CHECK-64-P10-LABEL: testDouble1: ; CHECK-64-P10: # %bb.0: # %entry -; CHECK-64-P10-NEXT: extsw 4, 4 ; CHECK-64-P10-NEXT: mffprd 3, 1 ; CHECK-64-P10-NEXT: rlwinm 4, 4, 3, 0, 28 ; CHECK-64-P10-NEXT: vinsdlx 2, 4, 3 @@ -598,8 +592,8 @@ define <2 x double> @testDouble1(<2 x double> %a, double %b, i32 zeroext %idx1) ; ; CHECK-32-P10-LABEL: testDouble1: ; CHECK-32-P10: # %bb.0: # %entry -; CHECK-32-P10-DAG: addi 4, 1, -16 -; CHECK-32-P10-DAG: rlwinm 3, 5, 3, 28, 28 +; CHECK-32-P10-NEXT: rlwinm 3, 5, 3, 28, 28 +; CHECK-32-P10-NEXT: addi 4, 1, -16 ; CHECK-32-P10-NEXT: stxv 34, -16(1) ; CHECK-32-P10-NEXT: stfdx 1, 4, 3 ; CHECK-32-P10-NEXT: lxv 34, -16(1) @@ -613,17 +607,17 @@ define <2 x double> @testDouble2(<2 x double> %a, ptr %b, i32 zeroext %idx1, i32 ; CHECK-64-LABEL: testDouble2: ; CHECK-64: # %bb.0: # %entry ; CHECK-64-NEXT: ld 6, 0(3) -; CHECK-64-DAG: rlwinm 4, 4, 3, 28, 28 -; CHECK-64-DAG: addi 7, 1, -32 +; CHECK-64-NEXT: addi 7, 1, -32 +; CHECK-64-NEXT: rlwinm 4, 4, 3, 28, 28 ; CHECK-64-NEXT: stxv 34, -32(1) +; CHECK-64-NEXT: rlwinm 5, 5, 3, 28, 28 ; CHECK-64-NEXT: stdx 6, 7, 4 ; CHECK-64-NEXT: li 4, 1 ; CHECK-64-NEXT: lxv 0, -32(1) ; CHECK-64-NEXT: ldx 3, 3, 4 -; CHECK-64-NEXT: rlwinm 4, 5, 3, 28, 28 -; CHECK-64-NEXT: addi 5, 1, -16 +; CHECK-64-NEXT: addi 4, 1, -16 ; CHECK-64-NEXT: stxv 0, -16(1) -; CHECK-64-NEXT: stdx 3, 5, 4 +; CHECK-64-NEXT: stdx 3, 4, 5 ; CHECK-64-NEXT: lxv 34, -16(1) ; CHECK-64-NEXT: blr ; @@ -646,20 +640,18 @@ define <2 x double> @testDouble2(<2 x double> %a, ptr %b, i32 zeroext %idx1, i32 ; CHECK-64-P10-LABEL: testDouble2: ; CHECK-64-P10: # %bb.0: # %entry ; CHECK-64-P10-NEXT: ld 6, 0(3) -; CHECK-64-P10-NEXT: extsw 4, 4 ; CHECK-64-P10-NEXT: pld 3, 1(3), 0 ; CHECK-64-P10-NEXT: rlwinm 4, 4, 3, 0, 28 ; CHECK-64-P10-NEXT: vinsdlx 2, 4, 6 -; CHECK-64-P10-NEXT: extsw 4, 5 -; CHECK-64-P10-NEXT: rlwinm 4, 4, 3, 0, 28 +; CHECK-64-P10-NEXT: rlwinm 4, 5, 3, 0, 28 ; CHECK-64-P10-NEXT: vinsdlx 2, 4, 3 ; CHECK-64-P10-NEXT: blr ; ; CHECK-32-P10-LABEL: testDouble2: ; CHECK-32-P10: # %bb.0: # %entry ; CHECK-32-P10-NEXT: lfd 0, 0(3) -; CHECK-32-P10-DAG: addi 6, 1, -32 -; CHECK-32-P10-DAG: rlwinm 4, 4, 3, 28, 28 +; CHECK-32-P10-NEXT: rlwinm 4, 4, 3, 28, 28 +; CHECK-32-P10-NEXT: addi 6, 1, -32 ; CHECK-32-P10-NEXT: stxv 34, -32(1) ; CHECK-32-P10-NEXT: rlwinm 5, 5, 3, 28, 28 ; CHECK-32-P10-NEXT: stfdx 0, 6, 4 @@ -683,8 +675,9 @@ define <2 x double> @testDouble3(<2 x double> %a, ptr %b, i32 zeroext %idx1, i32 ; CHECK-64-LABEL: testDouble3: ; CHECK-64: # %bb.0: # %entry ; CHECK-64-NEXT: lis 6, 1 -; CHECK-64-DAG: rlwinm 4, 4, 3, 28, 28 -; CHECK-64-DAG: addi 7, 1, -32 +; CHECK-64-NEXT: addi 7, 1, -32 +; CHECK-64-NEXT: rlwinm 4, 4, 3, 28, 28 +; CHECK-64-NEXT: rlwinm 5, 5, 3, 28, 28 ; CHECK-64-NEXT: ldx 6, 3, 6 ; CHECK-64-NEXT: stxv 34, -32(1) ; CHECK-64-NEXT: stdx 6, 7, 4 @@ -692,10 +685,9 @@ define <2 x double> @testDouble3(<2 x double> %a, ptr %b, i32 zeroext %idx1, i32 ; CHECK-64-NEXT: lxv 0, -32(1) ; CHECK-64-NEXT: rldic 4, 4, 36, 27 ; CHECK-64-NEXT: ldx 3, 3, 4 -; CHECK-64-NEXT: rlwinm 4, 5, 3, 28, 28 -; CHECK-64-NEXT: addi 5, 1, -16 +; CHECK-64-NEXT: addi 4, 1, -16 ; CHECK-64-NEXT: stxv 0, -16(1) -; CHECK-64-NEXT: stdx 3, 5, 4 +; CHECK-64-NEXT: stdx 3, 4, 5 ; CHECK-64-NEXT: lxv 34, -16(1) ; CHECK-64-NEXT: blr ; @@ -719,22 +711,20 @@ define <2 x double> @testDouble3(<2 x double> %a, ptr %b, i32 zeroext %idx1, i32 ; CHECK-64-P10-LABEL: testDouble3: ; CHECK-64-P10: # %bb.0: # %entry ; CHECK-64-P10-NEXT: pld 6, 65536(3), 0 -; CHECK-64-P10-NEXT: extsw 4, 4 ; CHECK-64-P10-NEXT: rlwinm 4, 4, 3, 0, 28 ; CHECK-64-P10-NEXT: vinsdlx 2, 4, 6 ; CHECK-64-P10-NEXT: li 4, 1 ; CHECK-64-P10-NEXT: rldic 4, 4, 36, 27 ; CHECK-64-P10-NEXT: ldx 3, 3, 4 -; CHECK-64-P10-NEXT: extsw 4, 5 -; CHECK-64-P10-NEXT: rlwinm 4, 4, 3, 0, 28 +; CHECK-64-P10-NEXT: rlwinm 4, 5, 3, 0, 28 ; CHECK-64-P10-NEXT: vinsdlx 2, 4, 3 ; CHECK-64-P10-NEXT: blr ; ; CHECK-32-P10-LABEL: testDouble3: ; CHECK-32-P10: # %bb.0: # %entry ; CHECK-32-P10-NEXT: plfd 0, 65536(3), 0 -; CHECK-32-P10-DAG: addi 6, 1, -32 -; CHECK-32-P10-DAG: rlwinm 4, 4, 3, 28, 28 +; CHECK-32-P10-NEXT: rlwinm 4, 4, 3, 28, 28 +; CHECK-32-P10-NEXT: addi 6, 1, -32 ; CHECK-32-P10-NEXT: stxv 34, -32(1) ; CHECK-32-P10-NEXT: rlwinm 5, 5, 3, 28, 28 ; CHECK-32-P10-NEXT: stfdx 0, 6, 4 diff --git a/llvm/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll b/llvm/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll index 29252e68fdf10c..194807f1d3aa45 100644 --- a/llvm/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll +++ b/llvm/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll @@ -1112,10 +1112,11 @@ entry: define signext i8 @getvelsc(<16 x i8> %vsc, i32 signext %i) { ; CHECK-LABEL: getvelsc: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: andi. r4, r5, 8 +; CHECK-NEXT: clrldi r4, r5, 32 ; CHECK-NEXT: li r3, 7 -; CHECK-NEXT: lvsl v3, 0, r4 -; CHECK-NEXT: andc r3, r3, r5 +; CHECK-NEXT: andi. r5, r4, 8 +; CHECK-NEXT: andc r3, r3, r4 +; CHECK-NEXT: lvsl v3, 0, r5 ; CHECK-NEXT: sldi r3, r3, 3 ; CHECK-NEXT: vperm v2, v2, v2, v3 ; CHECK-NEXT: mfvsrd r4, v2 @@ -1126,10 +1127,11 @@ define signext i8 @getvelsc(<16 x i8> %vsc, i32 signext %i) { ; CHECK-LE-LABEL: getvelsc: ; CHECK-LE: # %bb.0: # %entry ; CHECK-LE-NEXT: li r3, 8 -; CHECK-LE-NEXT: andc r3, r3, r5 +; CHECK-LE-NEXT: clrldi r4, r5, 32 +; CHECK-LE-NEXT: andc r3, r3, r4 ; CHECK-LE-NEXT: lvsl v3, 0, r3 ; CHECK-LE-NEXT: li r3, 7 -; CHECK-LE-NEXT: and r3, r3, r5 +; CHECK-LE-NEXT: and r3, r3, r4 ; CHECK-LE-NEXT: vperm v2, v2, v2, v3 ; CHECK-LE-NEXT: sldi r3, r3, 3 ; CHECK-LE-NEXT: mfvsrd r4, v2 @@ -1139,10 +1141,11 @@ define signext i8 @getvelsc(<16 x i8> %vsc, i32 signext %i) { ; ; CHECK-AIX-LABEL: getvelsc: ; CHECK-AIX: # %bb.0: # %entry -; CHECK-AIX-NEXT: andi. 5, 3, 8 +; CHECK-AIX-NEXT: clrldi 3, 3, 32 ; CHECK-AIX-NEXT: li 4, 7 -; CHECK-AIX-NEXT: lvsl 3, 0, 5 +; CHECK-AIX-NEXT: andi. 5, 3, 8 ; CHECK-AIX-NEXT: andc 3, 4, 3 +; CHECK-AIX-NEXT: lvsl 3, 0, 5 ; CHECK-AIX-NEXT: sldi 3, 3, 3 ; CHECK-AIX-NEXT: vperm 2, 2, 2, 3 ; CHECK-AIX-NEXT: mfvsrd 4, 34 @@ -1160,10 +1163,11 @@ entry: define zeroext i8 @getveluc(<16 x i8> %vuc, i32 signext %i) { ; CHECK-LABEL: getveluc: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: andi. r4, r5, 8 +; CHECK-NEXT: clrldi r4, r5, 32 ; CHECK-NEXT: li r3, 7 -; CHECK-NEXT: lvsl v3, 0, r4 -; CHECK-NEXT: andc r3, r3, r5 +; CHECK-NEXT: andi. r5, r4, 8 +; CHECK-NEXT: andc r3, r3, r4 +; CHECK-NEXT: lvsl v3, 0, r5 ; CHECK-NEXT: sldi r3, r3, 3 ; CHECK-NEXT: vperm v2, v2, v2, v3 ; CHECK-NEXT: mfvsrd r4, v2 @@ -1174,10 +1178,11 @@ define zeroext i8 @getveluc(<16 x i8> %vuc, i32 signext %i) { ; CHECK-LE-LABEL: getveluc: ; CHECK-LE: # %bb.0: # %entry ; CHECK-LE-NEXT: li r3, 8 -; CHECK-LE-NEXT: andc r3, r3, r5 +; CHECK-LE-NEXT: clrldi r4, r5, 32 +; CHECK-LE-NEXT: andc r3, r3, r4 ; CHECK-LE-NEXT: lvsl v3, 0, r3 ; CHECK-LE-NEXT: li r3, 7 -; CHECK-LE-NEXT: and r3, r3, r5 +; CHECK-LE-NEXT: and r3, r3, r4 ; CHECK-LE-NEXT: vperm v2, v2, v2, v3 ; CHECK-LE-NEXT: sldi r3, r3, 3 ; CHECK-LE-NEXT: mfvsrd r4, v2 @@ -1187,10 +1192,11 @@ define zeroext i8 @getveluc(<16 x i8> %vuc, i32 signext %i) { ; ; CHECK-AIX-LABEL: getveluc: ; CHECK-AIX: # %bb.0: # %entry -; CHECK-AIX-NEXT: andi. 5, 3, 8 +; CHECK-AIX-NEXT: clrldi 3, 3, 32 ; CHECK-AIX-NEXT: li 4, 7 -; CHECK-AIX-NEXT: lvsl 3, 0, 5 +; CHECK-AIX-NEXT: andi. 5, 3, 8 ; CHECK-AIX-NEXT: andc 3, 4, 3 +; CHECK-AIX-NEXT: lvsl 3, 0, 5 ; CHECK-AIX-NEXT: sldi 3, 3, 3 ; CHECK-AIX-NEXT: vperm 2, 2, 2, 3 ; CHECK-AIX-NEXT: mfvsrd 4, 34 @@ -1672,12 +1678,13 @@ entry: define signext i16 @getvelss(<8 x i16> %vss, i32 signext %i) { ; CHECK-LABEL: getvelss: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: andi. r4, r5, 4 +; CHECK-NEXT: clrldi r4, r5, 32 ; CHECK-NEXT: li r3, 3 -; CHECK-NEXT: sldi r4, r4, 1 -; CHECK-NEXT: andc r3, r3, r5 -; CHECK-NEXT: lvsl v3, 0, r4 +; CHECK-NEXT: andi. r5, r4, 4 +; CHECK-NEXT: andc r3, r3, r4 +; CHECK-NEXT: sldi r5, r5, 1 ; CHECK-NEXT: sldi r3, r3, 4 +; CHECK-NEXT: lvsl v3, 0, r5 ; CHECK-NEXT: vperm v2, v2, v2, v3 ; CHECK-NEXT: mfvsrd r4, v2 ; CHECK-NEXT: srd r3, r4, r3 @@ -1687,11 +1694,12 @@ define signext i16 @getvelss(<8 x i16> %vss, i32 signext %i) { ; CHECK-LE-LABEL: getvelss: ; CHECK-LE: # %bb.0: # %entry ; CHECK-LE-NEXT: li r3, 4 -; CHECK-LE-NEXT: andc r3, r3, r5 +; CHECK-LE-NEXT: clrldi r4, r5, 32 +; CHECK-LE-NEXT: andc r3, r3, r4 ; CHECK-LE-NEXT: sldi r3, r3, 1 ; CHECK-LE-NEXT: lvsl v3, 0, r3 ; CHECK-LE-NEXT: li r3, 3 -; CHECK-LE-NEXT: and r3, r3, r5 +; CHECK-LE-NEXT: and r3, r3, r4 ; CHECK-LE-NEXT: vperm v2, v2, v2, v3 ; CHECK-LE-NEXT: sldi r3, r3, 4 ; CHECK-LE-NEXT: mfvsrd r4, v2 @@ -1701,12 +1709,13 @@ define signext i16 @getvelss(<8 x i16> %vss, i32 signext %i) { ; ; CHECK-AIX-LABEL: getvelss: ; CHECK-AIX: # %bb.0: # %entry -; CHECK-AIX-NEXT: andi. 5, 3, 4 +; CHECK-AIX-NEXT: clrldi 3, 3, 32 ; CHECK-AIX-NEXT: li 4, 3 -; CHECK-AIX-NEXT: sldi 5, 5, 1 +; CHECK-AIX-NEXT: andi. 5, 3, 4 ; CHECK-AIX-NEXT: andc 3, 4, 3 -; CHECK-AIX-NEXT: lvsl 3, 0, 5 +; CHECK-AIX-NEXT: sldi 5, 5, 1 ; CHECK-AIX-NEXT: sldi 3, 3, 4 +; CHECK-AIX-NEXT: lvsl 3, 0, 5 ; CHECK-AIX-NEXT: vperm 2, 2, 2, 3 ; CHECK-AIX-NEXT: mfvsrd 4, 34 ; CHECK-AIX-NEXT: srd 3, 4, 3 @@ -1723,12 +1732,13 @@ entry: define zeroext i16 @getvelus(<8 x i16> %vus, i32 signext %i) { ; CHECK-LABEL: getvelus: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: andi. r4, r5, 4 +; CHECK-NEXT: clrldi r4, r5, 32 ; CHECK-NEXT: li r3, 3 -; CHECK-NEXT: sldi r4, r4, 1 -; CHECK-NEXT: andc r3, r3, r5 -; CHECK-NEXT: lvsl v3, 0, r4 +; CHECK-NEXT: andi. r5, r4, 4 +; CHECK-NEXT: andc r3, r3, r4 +; CHECK-NEXT: sldi r5, r5, 1 ; CHECK-NEXT: sldi r3, r3, 4 +; CHECK-NEXT: lvsl v3, 0, r5 ; CHECK-NEXT: vperm v2, v2, v2, v3 ; CHECK-NEXT: mfvsrd r4, v2 ; CHECK-NEXT: srd r3, r4, r3 @@ -1738,11 +1748,12 @@ define zeroext i16 @getvelus(<8 x i16> %vus, i32 signext %i) { ; CHECK-LE-LABEL: getvelus: ; CHECK-LE: # %bb.0: # %entry ; CHECK-LE-NEXT: li r3, 4 -; CHECK-LE-NEXT: andc r3, r3, r5 +; CHECK-LE-NEXT: clrldi r4, r5, 32 +; CHECK-LE-NEXT: andc r3, r3, r4 ; CHECK-LE-NEXT: sldi r3, r3, 1 ; CHECK-LE-NEXT: lvsl v3, 0, r3 ; CHECK-LE-NEXT: li r3, 3 -; CHECK-LE-NEXT: and r3, r3, r5 +; CHECK-LE-NEXT: and r3, r3, r4 ; CHECK-LE-NEXT: vperm v2, v2, v2, v3 ; CHECK-LE-NEXT: sldi r3, r3, 4 ; CHECK-LE-NEXT: mfvsrd r4, v2 @@ -1752,12 +1763,13 @@ define zeroext i16 @getvelus(<8 x i16> %vus, i32 signext %i) { ; ; CHECK-AIX-LABEL: getvelus: ; CHECK-AIX: # %bb.0: # %entry -; CHECK-AIX-NEXT: andi. 5, 3, 4 +; CHECK-AIX-NEXT: clrldi 3, 3, 32 ; CHECK-AIX-NEXT: li 4, 3 -; CHECK-AIX-NEXT: sldi 5, 5, 1 +; CHECK-AIX-NEXT: andi. 5, 3, 4 ; CHECK-AIX-NEXT: andc 3, 4, 3 -; CHECK-AIX-NEXT: lvsl 3, 0, 5 +; CHECK-AIX-NEXT: sldi 5, 5, 1 ; CHECK-AIX-NEXT: sldi 3, 3, 4 +; CHECK-AIX-NEXT: lvsl 3, 0, 5 ; CHECK-AIX-NEXT: vperm 2, 2, 2, 3 ; CHECK-AIX-NEXT: mfvsrd 4, 34 ; CHECK-AIX-NEXT: srd 3, 4, 3 @@ -1988,12 +2000,13 @@ entry: define signext i32 @getvelsi(<4 x i32> %vsi, i32 signext %i) { ; CHECK-LABEL: getvelsi: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: andi. r4, r5, 2 +; CHECK-NEXT: clrldi r4, r5, 32 ; CHECK-NEXT: li r3, 1 -; CHECK-NEXT: sldi r4, r4, 2 -; CHECK-NEXT: andc r3, r3, r5 -; CHECK-NEXT: lvsl v3, 0, r4 +; CHECK-NEXT: andi. r5, r4, 2 +; CHECK-NEXT: andc r3, r3, r4 +; CHECK-NEXT: sldi r5, r5, 2 ; CHECK-NEXT: sldi r3, r3, 5 +; CHECK-NEXT: lvsl v3, 0, r5 ; CHECK-NEXT: vperm v2, v2, v2, v3 ; CHECK-NEXT: mfvsrd r4, v2 ; CHECK-NEXT: srd r3, r4, r3 @@ -2003,11 +2016,12 @@ define signext i32 @getvelsi(<4 x i32> %vsi, i32 signext %i) { ; CHECK-LE-LABEL: getvelsi: ; CHECK-LE: # %bb.0: # %entry ; CHECK-LE-NEXT: li r3, 2 -; CHECK-LE-NEXT: andc r3, r3, r5 +; CHECK-LE-NEXT: clrldi r4, r5, 32 +; CHECK-LE-NEXT: andc r3, r3, r4 ; CHECK-LE-NEXT: sldi r3, r3, 2 ; CHECK-LE-NEXT: lvsl v3, 0, r3 ; CHECK-LE-NEXT: li r3, 1 -; CHECK-LE-NEXT: and r3, r3, r5 +; CHECK-LE-NEXT: and r3, r3, r4 ; CHECK-LE-NEXT: vperm v2, v2, v2, v3 ; CHECK-LE-NEXT: sldi r3, r3, 5 ; CHECK-LE-NEXT: mfvsrd r4, v2 @@ -2017,12 +2031,13 @@ define signext i32 @getvelsi(<4 x i32> %vsi, i32 signext %i) { ; ; CHECK-AIX-LABEL: getvelsi: ; CHECK-AIX: # %bb.0: # %entry -; CHECK-AIX-NEXT: andi. 5, 3, 2 +; CHECK-AIX-NEXT: clrldi 3, 3, 32 ; CHECK-AIX-NEXT: li 4, 1 -; CHECK-AIX-NEXT: sldi 5, 5, 2 +; CHECK-AIX-NEXT: andi. 5, 3, 2 ; CHECK-AIX-NEXT: andc 3, 4, 3 -; CHECK-AIX-NEXT: lvsl 3, 0, 5 +; CHECK-AIX-NEXT: sldi 5, 5, 2 ; CHECK-AIX-NEXT: sldi 3, 3, 5 +; CHECK-AIX-NEXT: lvsl 3, 0, 5 ; CHECK-AIX-NEXT: vperm 2, 2, 2, 3 ; CHECK-AIX-NEXT: mfvsrd 4, 34 ; CHECK-AIX-NEXT: srd 3, 4, 3 @@ -2038,12 +2053,13 @@ entry: define zeroext i32 @getvelui(<4 x i32> %vui, i32 signext %i) { ; CHECK-LABEL: getvelui: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: andi. r4, r5, 2 +; CHECK-NEXT: clrldi r4, r5, 32 ; CHECK-NEXT: li r3, 1 -; CHECK-NEXT: sldi r4, r4, 2 -; CHECK-NEXT: andc r3, r3, r5 -; CHECK-NEXT: lvsl v3, 0, r4 +; CHECK-NEXT: andi. r5, r4, 2 +; CHECK-NEXT: andc r3, r3, r4 +; CHECK-NEXT: sldi r5, r5, 2 ; CHECK-NEXT: sldi r3, r3, 5 +; CHECK-NEXT: lvsl v3, 0, r5 ; CHECK-NEXT: vperm v2, v2, v2, v3 ; CHECK-NEXT: mfvsrd r4, v2 ; CHECK-NEXT: srd r3, r4, r3 @@ -2053,11 +2069,12 @@ define zeroext i32 @getvelui(<4 x i32> %vui, i32 signext %i) { ; CHECK-LE-LABEL: getvelui: ; CHECK-LE: # %bb.0: # %entry ; CHECK-LE-NEXT: li r3, 2 -; CHECK-LE-NEXT: andc r3, r3, r5 +; CHECK-LE-NEXT: clrldi r4, r5, 32 +; CHECK-LE-NEXT: andc r3, r3, r4 ; CHECK-LE-NEXT: sldi r3, r3, 2 ; CHECK-LE-NEXT: lvsl v3, 0, r3 ; CHECK-LE-NEXT: li r3, 1 -; CHECK-LE-NEXT: and r3, r3, r5 +; CHECK-LE-NEXT: and r3, r3, r4 ; CHECK-LE-NEXT: vperm v2, v2, v2, v3 ; CHECK-LE-NEXT: sldi r3, r3, 5 ; CHECK-LE-NEXT: mfvsrd r4, v2 @@ -2067,12 +2084,13 @@ define zeroext i32 @getvelui(<4 x i32> %vui, i32 signext %i) { ; ; CHECK-AIX-LABEL: getvelui: ; CHECK-AIX: # %bb.0: # %entry -; CHECK-AIX-NEXT: andi. 5, 3, 2 +; CHECK-AIX-NEXT: clrldi 3, 3, 32 ; CHECK-AIX-NEXT: li 4, 1 -; CHECK-AIX-NEXT: sldi 5, 5, 2 +; CHECK-AIX-NEXT: andi. 5, 3, 2 ; CHECK-AIX-NEXT: andc 3, 4, 3 -; CHECK-AIX-NEXT: lvsl 3, 0, 5 +; CHECK-AIX-NEXT: sldi 5, 5, 2 ; CHECK-AIX-NEXT: sldi 3, 3, 5 +; CHECK-AIX-NEXT: lvsl 3, 0, 5 ; CHECK-AIX-NEXT: vperm 2, 2, 2, 3 ; CHECK-AIX-NEXT: mfvsrd 4, 34 ; CHECK-AIX-NEXT: srd 3, 4, 3 @@ -2186,7 +2204,8 @@ entry: define i64 @getvelsl(<2 x i64> %vsl, i32 signext %i) { ; CHECK-LABEL: getvelsl: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: andi. r3, r5, 1 +; CHECK-NEXT: clrldi r3, r5, 32 +; CHECK-NEXT: andi. r3, r3, 1 ; CHECK-NEXT: sldi r3, r3, 3 ; CHECK-NEXT: lvsl v3, 0, r3 ; CHECK-NEXT: vperm v2, v2, v2, v3 @@ -2196,7 +2215,8 @@ define i64 @getvelsl(<2 x i64> %vsl, i32 signext %i) { ; CHECK-LE-LABEL: getvelsl: ; CHECK-LE: # %bb.0: # %entry ; CHECK-LE-NEXT: li r3, 1 -; CHECK-LE-NEXT: andc r3, r3, r5 +; CHECK-LE-NEXT: clrldi r4, r5, 32 +; CHECK-LE-NEXT: andc r3, r3, r4 ; CHECK-LE-NEXT: sldi r3, r3, 3 ; CHECK-LE-NEXT: lvsl v3, 0, r3 ; CHECK-LE-NEXT: vperm v2, v2, v2, v3 @@ -2205,6 +2225,7 @@ define i64 @getvelsl(<2 x i64> %vsl, i32 signext %i) { ; ; CHECK-AIX-LABEL: getvelsl: ; CHECK-AIX: # %bb.0: # %entry +; CHECK-AIX-NEXT: clrldi 3, 3, 32 ; CHECK-AIX-NEXT: andi. 3, 3, 1 ; CHECK-AIX-NEXT: sldi 3, 3, 3 ; CHECK-AIX-NEXT: lvsl 3, 0, 3 @@ -2221,7 +2242,8 @@ entry: define i64 @getvelul(<2 x i64> %vul, i32 signext %i) { ; CHECK-LABEL: getvelul: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: andi. r3, r5, 1 +; CHECK-NEXT: clrldi r3, r5, 32 +; CHECK-NEXT: andi. r3, r3, 1 ; CHECK-NEXT: sldi r3, r3, 3 ; CHECK-NEXT: lvsl v3, 0, r3 ; CHECK-NEXT: vperm v2, v2, v2, v3 @@ -2231,7 +2253,8 @@ define i64 @getvelul(<2 x i64> %vul, i32 signext %i) { ; CHECK-LE-LABEL: getvelul: ; CHECK-LE: # %bb.0: # %entry ; CHECK-LE-NEXT: li r3, 1 -; CHECK-LE-NEXT: andc r3, r3, r5 +; CHECK-LE-NEXT: clrldi r4, r5, 32 +; CHECK-LE-NEXT: andc r3, r3, r4 ; CHECK-LE-NEXT: sldi r3, r3, 3 ; CHECK-LE-NEXT: lvsl v3, 0, r3 ; CHECK-LE-NEXT: vperm v2, v2, v2, v3 @@ -2240,6 +2263,7 @@ define i64 @getvelul(<2 x i64> %vul, i32 signext %i) { ; ; CHECK-AIX-LABEL: getvelul: ; CHECK-AIX: # %bb.0: # %entry +; CHECK-AIX-NEXT: clrldi 3, 3, 32 ; CHECK-AIX-NEXT: andi. 3, 3, 1 ; CHECK-AIX-NEXT: sldi 3, 3, 3 ; CHECK-AIX-NEXT: lvsl 3, 0, 3 @@ -2357,7 +2381,7 @@ entry: define float @getvelf(<4 x float> %vf, i32 signext %i) { ; CHECK-LABEL: getvelf: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: sldi r3, r5, 2 +; CHECK-NEXT: rldic r3, r5, 2, 30 ; CHECK-NEXT: lvsl v3, 0, r3 ; CHECK-NEXT: vperm v2, v2, v2, v3 ; CHECK-NEXT: xscvspdpn f1, v2 @@ -2365,7 +2389,8 @@ define float @getvelf(<4 x float> %vf, i32 signext %i) { ; ; CHECK-LE-LABEL: getvelf: ; CHECK-LE: # %bb.0: # %entry -; CHECK-LE-NEXT: xori r3, r5, 3 +; CHECK-LE-NEXT: clrldi r3, r5, 32 +; CHECK-LE-NEXT: xori r3, r3, 3 ; CHECK-LE-NEXT: sldi r3, r3, 2 ; CHECK-LE-NEXT: lvsl v3, 0, r3 ; CHECK-LE-NEXT: vperm v2, v2, v2, v3 @@ -2374,7 +2399,7 @@ define float @getvelf(<4 x float> %vf, i32 signext %i) { ; ; CHECK-AIX-LABEL: getvelf: ; CHECK-AIX: # %bb.0: # %entry -; CHECK-AIX-NEXT: sldi 3, 3, 2 +; CHECK-AIX-NEXT: rldic 3, 3, 2, 30 ; CHECK-AIX-NEXT: lvsl 3, 0, 3 ; CHECK-AIX-NEXT: vperm 2, 2, 2, 3 ; CHECK-AIX-NEXT: xscvspdpn 1, 34 @@ -2436,7 +2461,8 @@ entry: define double @getveld(<2 x double> %vd, i32 signext %i) { ; CHECK-LABEL: getveld: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: andi. r3, r5, 1 +; CHECK-NEXT: clrldi r3, r5, 32 +; CHECK-NEXT: andi. r3, r3, 1 ; CHECK-NEXT: sldi r3, r3, 3 ; CHECK-NEXT: lvsl v3, 0, r3 ; CHECK-NEXT: vperm v2, v2, v2, v3 @@ -2447,7 +2473,8 @@ define double @getveld(<2 x double> %vd, i32 signext %i) { ; CHECK-LE-LABEL: getveld: ; CHECK-LE: # %bb.0: # %entry ; CHECK-LE-NEXT: li r3, 1 -; CHECK-LE-NEXT: andc r3, r3, r5 +; CHECK-LE-NEXT: clrldi r4, r5, 32 +; CHECK-LE-NEXT: andc r3, r3, r4 ; CHECK-LE-NEXT: sldi r3, r3, 3 ; CHECK-LE-NEXT: lvsl v3, 0, r3 ; CHECK-LE-NEXT: vperm v2, v2, v2, v3 @@ -2457,6 +2484,7 @@ define double @getveld(<2 x double> %vd, i32 signext %i) { ; ; CHECK-AIX-LABEL: getveld: ; CHECK-AIX: # %bb.0: # %entry +; CHECK-AIX-NEXT: clrldi 3, 3, 32 ; CHECK-AIX-NEXT: andi. 3, 3, 1 ; CHECK-AIX-NEXT: sldi 3, 3, 3 ; CHECK-AIX-NEXT: lvsl 3, 0, 3 diff --git a/llvm/test/CodeGen/PowerPC/variable_elem_vec_extracts.ll b/llvm/test/CodeGen/PowerPC/variable_elem_vec_extracts.ll index 9c685eded161d4..3aa84a00070f48 100644 --- a/llvm/test/CodeGen/PowerPC/variable_elem_vec_extracts.ll +++ b/llvm/test/CodeGen/PowerPC/variable_elem_vec_extracts.ll @@ -6,7 +6,7 @@ ; RUN: --check-prefix=CHECK-P7 ; Function Attrs: norecurse nounwind readnone -define signext i32 @geti(<4 x i32> %a, i32 signext %b) { +define zeroext i32 @geti(<4 x i32> %a, i32 zeroext %b) { ; CHECK-LABEL: geti: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: li 3, 2 @@ -19,7 +19,7 @@ define signext i32 @geti(<4 x i32> %a, i32 signext %b) { ; CHECK-NEXT: sldi 3, 3, 5 ; CHECK-NEXT: mfvsrd 4, 34 ; CHECK-NEXT: srd 3, 4, 3 -; CHECK-NEXT: extsw 3, 3 +; CHECK-NEXT: clrldi 3, 3, 32 ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: geti: @@ -33,7 +33,7 @@ define signext i32 @geti(<4 x i32> %a, i32 signext %b) { ; CHECK-BE-NEXT: vperm 2, 2, 2, 3 ; CHECK-BE-NEXT: mfvsrd 4, 34 ; CHECK-BE-NEXT: srd 3, 4, 3 -; CHECK-BE-NEXT: extsw 3, 3 +; CHECK-BE-NEXT: clrldi 3, 3, 32 ; CHECK-BE-NEXT: blr ; ; CHECK-P7-LABEL: geti: @@ -41,7 +41,7 @@ define signext i32 @geti(<4 x i32> %a, i32 signext %b) { ; CHECK-P7-NEXT: addi 3, 1, -16 ; CHECK-P7-NEXT: rlwinm 4, 5, 2, 28, 29 ; CHECK-P7-NEXT: stxvw4x 34, 0, 3 -; CHECK-P7-NEXT: lwax 3, 3, 4 +; CHECK-P7-NEXT: lwzx 3, 3, 4 ; CHECK-P7-NEXT: blr entry: %vecext = extractelement <4 x i32> %a, i32 %b @@ -49,7 +49,7 @@ entry: } ; Function Attrs: norecurse nounwind readnone -define i64 @getl(<2 x i64> %a, i32 signext %b) { +define i64 @getl(<2 x i64> %a, i32 zeroext %b) { ; CHECK-LABEL: getl: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: li 3, 1 @@ -82,7 +82,7 @@ entry: } ; Function Attrs: norecurse nounwind readnone -define float @getf(<4 x float> %a, i32 signext %b) { +define float @getf(<4 x float> %a, i32 zeroext %b) { ; CHECK-LABEL: getf: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xori 3, 5, 3 @@ -113,7 +113,7 @@ entry: } ; Function Attrs: norecurse nounwind readnone -define double @getd(<2 x double> %a, i32 signext %b) { +define double @getd(<2 x double> %a, i32 zeroext %b) { ; CHECK-LABEL: getd: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: li 3, 1 diff --git a/llvm/test/CodeGen/PowerPC/vec_extract_p9.ll b/llvm/test/CodeGen/PowerPC/vec_extract_p9.ll index 127e3c0e3e7c39..3b4fce3f58eea1 100644 --- a/llvm/test/CodeGen/PowerPC/vec_extract_p9.ll +++ b/llvm/test/CodeGen/PowerPC/vec_extract_p9.ll @@ -5,13 +5,15 @@ define zeroext i8 @test1(<16 x i8> %a, i32 signext %index) { ; CHECK-LE-LABEL: test1: ; CHECK-LE: # %bb.0: # %entry -; CHECK-LE-NEXT: vextubrx 3, 5, 2 +; CHECK-LE-NEXT: clrldi 3, 5, 32 +; CHECK-LE-NEXT: vextubrx 3, 3, 2 ; CHECK-LE-NEXT: clrldi 3, 3, 56 ; CHECK-LE-NEXT: blr ; ; CHECK-BE-LABEL: test1: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: vextublx 3, 5, 2 +; CHECK-BE-NEXT: clrldi 3, 5, 32 +; CHECK-BE-NEXT: vextublx 3, 3, 2 ; CHECK-BE-NEXT: clrldi 3, 3, 56 ; CHECK-BE-NEXT: blr @@ -23,13 +25,15 @@ entry: define signext i8 @test2(<16 x i8> %a, i32 signext %index) { ; CHECK-LE-LABEL: test2: ; CHECK-LE: # %bb.0: # %entry -; CHECK-LE-NEXT: vextubrx 3, 5, 2 +; CHECK-LE-NEXT: clrldi 3, 5, 32 +; CHECK-LE-NEXT: vextubrx 3, 3, 2 ; CHECK-LE-NEXT: extsb 3, 3 ; CHECK-LE-NEXT: blr ; ; CHECK-BE-LABEL: test2: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: vextublx 3, 5, 2 +; CHECK-BE-NEXT: clrldi 3, 5, 32 +; CHECK-BE-NEXT: vextublx 3, 3, 2 ; CHECK-BE-NEXT: extsb 3, 3 ; CHECK-BE-NEXT: blr @@ -41,14 +45,16 @@ entry: define zeroext i16 @test3(<8 x i16> %a, i32 signext %index) { ; CHECK-LE-LABEL: test3: ; CHECK-LE: # %bb.0: # %entry -; CHECK-LE-NEXT: rlwinm 3, 5, 1, 28, 30 +; CHECK-LE-NEXT: clrldi 3, 5, 32 +; CHECK-LE-NEXT: rlwinm 3, 3, 1, 28, 30 ; CHECK-LE-NEXT: vextuhrx 3, 3, 2 ; CHECK-LE-NEXT: clrldi 3, 3, 48 ; CHECK-LE-NEXT: blr ; ; CHECK-BE-LABEL: test3: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: rlwinm 3, 5, 1, 28, 30 +; CHECK-BE-NEXT: clrldi 3, 5, 32 +; CHECK-BE-NEXT: rlwinm 3, 3, 1, 28, 30 ; CHECK-BE-NEXT: vextuhlx 3, 3, 2 ; CHECK-BE-NEXT: clrldi 3, 3, 48 ; CHECK-BE-NEXT: blr @@ -61,14 +67,16 @@ entry: define signext i16 @test4(<8 x i16> %a, i32 signext %index) { ; CHECK-LE-LABEL: test4: ; CHECK-LE: # %bb.0: # %entry -; CHECK-LE-NEXT: rlwinm 3, 5, 1, 28, 30 +; CHECK-LE-NEXT: clrldi 3, 5, 32 +; CHECK-LE-NEXT: rlwinm 3, 3, 1, 28, 30 ; CHECK-LE-NEXT: vextuhrx 3, 3, 2 ; CHECK-LE-NEXT: extsh 3, 3 ; CHECK-LE-NEXT: blr ; ; CHECK-BE-LABEL: test4: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: rlwinm 3, 5, 1, 28, 30 +; CHECK-BE-NEXT: clrldi 3, 5, 32 +; CHECK-BE-NEXT: rlwinm 3, 3, 1, 28, 30 ; CHECK-BE-NEXT: vextuhlx 3, 3, 2 ; CHECK-BE-NEXT: extsh 3, 3 ; CHECK-BE-NEXT: blr @@ -81,13 +89,15 @@ entry: define zeroext i32 @test5(<4 x i32> %a, i32 signext %index) { ; CHECK-LE-LABEL: test5: ; CHECK-LE: # %bb.0: # %entry -; CHECK-LE-NEXT: rlwinm 3, 5, 2, 28, 29 +; CHECK-LE-NEXT: clrldi 3, 5, 32 +; CHECK-LE-NEXT: rlwinm 3, 3, 2, 28, 29 ; CHECK-LE-NEXT: vextuwrx 3, 3, 2 ; CHECK-LE-NEXT: blr ; ; CHECK-BE-LABEL: test5: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: rlwinm 3, 5, 2, 28, 29 +; CHECK-BE-NEXT: clrldi 3, 5, 32 +; CHECK-BE-NEXT: rlwinm 3, 3, 2, 28, 29 ; CHECK-BE-NEXT: vextuwlx 3, 3, 2 ; CHECK-BE-NEXT: blr @@ -99,14 +109,16 @@ entry: define signext i32 @test6(<4 x i32> %a, i32 signext %index) { ; CHECK-LE-LABEL: test6: ; CHECK-LE: # %bb.0: # %entry -; CHECK-LE-NEXT: rlwinm 3, 5, 2, 28, 29 +; CHECK-LE-NEXT: clrldi 3, 5, 32 +; CHECK-LE-NEXT: rlwinm 3, 3, 2, 28, 29 ; CHECK-LE-NEXT: vextuwrx 3, 3, 2 ; CHECK-LE-NEXT: extsw 3, 3 ; CHECK-LE-NEXT: blr ; ; CHECK-BE-LABEL: test6: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: rlwinm 3, 5, 2, 28, 29 +; CHECK-BE-NEXT: clrldi 3, 5, 32 +; CHECK-BE-NEXT: rlwinm 3, 3, 2, 28, 29 ; CHECK-BE-NEXT: vextuwlx 3, 3, 2 ; CHECK-BE-NEXT: extsw 3, 3 ; CHECK-BE-NEXT: blr diff --git a/llvm/test/CodeGen/PowerPC/vec_extract_p9_2.ll b/llvm/test/CodeGen/PowerPC/vec_extract_p9_2.ll index e12e06c4248c64..2a66150a65e0c7 100644 --- a/llvm/test/CodeGen/PowerPC/vec_extract_p9_2.ll +++ b/llvm/test/CodeGen/PowerPC/vec_extract_p9_2.ll @@ -5,14 +5,16 @@ define zeroext i8 @test_add1(<16 x i8> %a, i32 signext %index, i8 zeroext %c) { ; CHECK-LE-LABEL: test_add1: ; CHECK-LE: # %bb.0: # %entry -; CHECK-LE-NEXT: vextubrx 3, 5, 2 +; CHECK-LE-NEXT: clrldi 3, 5, 32 +; CHECK-LE-NEXT: vextubrx 3, 3, 2 ; CHECK-LE-NEXT: add 3, 3, 6 ; CHECK-LE-NEXT: clrldi 3, 3, 56 ; CHECK-LE-NEXT: blr ; ; CHECK-BE-LABEL: test_add1: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: vextublx 3, 5, 2 +; CHECK-BE-NEXT: clrldi 3, 5, 32 +; CHECK-BE-NEXT: vextublx 3, 3, 2 ; CHECK-BE-NEXT: add 3, 3, 6 ; CHECK-BE-NEXT: clrldi 3, 3, 56 ; CHECK-BE-NEXT: blr @@ -28,14 +30,16 @@ entry: define signext i8 @test_add2(<16 x i8> %a, i32 signext %index, i8 signext %c) { ; CHECK-LE-LABEL: test_add2: ; CHECK-LE: # %bb.0: # %entry -; CHECK-LE-NEXT: vextubrx 3, 5, 2 +; CHECK-LE-NEXT: clrldi 3, 5, 32 +; CHECK-LE-NEXT: vextubrx 3, 3, 2 ; CHECK-LE-NEXT: add 3, 3, 6 ; CHECK-LE-NEXT: extsb 3, 3 ; CHECK-LE-NEXT: blr ; ; CHECK-BE-LABEL: test_add2: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: vextublx 3, 5, 2 +; CHECK-BE-NEXT: clrldi 3, 5, 32 +; CHECK-BE-NEXT: vextublx 3, 3, 2 ; CHECK-BE-NEXT: add 3, 3, 6 ; CHECK-BE-NEXT: extsb 3, 3 ; CHECK-BE-NEXT: blr @@ -51,7 +55,8 @@ entry: define zeroext i16 @test_add3(<8 x i16> %a, i32 signext %index, i16 zeroext %c) { ; CHECK-LE-LABEL: test_add3: ; CHECK-LE: # %bb.0: # %entry -; CHECK-LE-NEXT: rlwinm 3, 5, 1, 28, 30 +; CHECK-LE-NEXT: clrldi 3, 5, 32 +; CHECK-LE-NEXT: rlwinm 3, 3, 1, 28, 30 ; CHECK-LE-NEXT: vextuhrx 3, 3, 2 ; CHECK-LE-NEXT: add 3, 3, 6 ; CHECK-LE-NEXT: clrldi 3, 3, 48 @@ -59,7 +64,8 @@ define zeroext i16 @test_add3(<8 x i16> %a, i32 signext %index, i16 zeroext %c) ; ; CHECK-BE-LABEL: test_add3: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: rlwinm 3, 5, 1, 28, 30 +; CHECK-BE-NEXT: clrldi 3, 5, 32 +; CHECK-BE-NEXT: rlwinm 3, 3, 1, 28, 30 ; CHECK-BE-NEXT: vextuhlx 3, 3, 2 ; CHECK-BE-NEXT: add 3, 3, 6 ; CHECK-BE-NEXT: clrldi 3, 3, 48 @@ -76,7 +82,8 @@ entry: define signext i16 @test_add4(<8 x i16> %a, i32 signext %index, i16 signext %c) { ; CHECK-LE-LABEL: test_add4: ; CHECK-LE: # %bb.0: # %entry -; CHECK-LE-NEXT: rlwinm 3, 5, 1, 28, 30 +; CHECK-LE-NEXT: clrldi 3, 5, 32 +; CHECK-LE-NEXT: rlwinm 3, 3, 1, 28, 30 ; CHECK-LE-NEXT: vextuhrx 3, 3, 2 ; CHECK-LE-NEXT: add 3, 3, 6 ; CHECK-LE-NEXT: extsh 3, 3 @@ -84,7 +91,8 @@ define signext i16 @test_add4(<8 x i16> %a, i32 signext %index, i16 signext %c) ; ; CHECK-BE-LABEL: test_add4: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: rlwinm 3, 5, 1, 28, 30 +; CHECK-BE-NEXT: clrldi 3, 5, 32 +; CHECK-BE-NEXT: rlwinm 3, 3, 1, 28, 30 ; CHECK-BE-NEXT: vextuhlx 3, 3, 2 ; CHECK-BE-NEXT: add 3, 3, 6 ; CHECK-BE-NEXT: extsh 3, 3 @@ -101,7 +109,8 @@ entry: define zeroext i32 @test_add5(<4 x i32> %a, i32 signext %index, i32 zeroext %c) { ; CHECK-LE-LABEL: test_add5: ; CHECK-LE: # %bb.0: # %entry -; CHECK-LE-NEXT: rlwinm 3, 5, 2, 28, 29 +; CHECK-LE-NEXT: clrldi 3, 5, 32 +; CHECK-LE-NEXT: rlwinm 3, 3, 2, 28, 29 ; CHECK-LE-NEXT: vextuwrx 3, 3, 2 ; CHECK-LE-NEXT: add 3, 3, 6 ; CHECK-LE-NEXT: clrldi 3, 3, 32 @@ -109,7 +118,8 @@ define zeroext i32 @test_add5(<4 x i32> %a, i32 signext %index, i32 zeroext %c) ; ; CHECK-BE-LABEL: test_add5: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: rlwinm 3, 5, 2, 28, 29 +; CHECK-BE-NEXT: clrldi 3, 5, 32 +; CHECK-BE-NEXT: rlwinm 3, 3, 2, 28, 29 ; CHECK-BE-NEXT: vextuwlx 3, 3, 2 ; CHECK-BE-NEXT: add 3, 3, 6 ; CHECK-BE-NEXT: clrldi 3, 3, 32 @@ -123,7 +133,8 @@ entry: define signext i32 @test_add6(<4 x i32> %a, i32 signext %index, i32 signext %c) { ; CHECK-LE-LABEL: test_add6: ; CHECK-LE: # %bb.0: # %entry -; CHECK-LE-NEXT: rlwinm 3, 5, 2, 28, 29 +; CHECK-LE-NEXT: clrldi 3, 5, 32 +; CHECK-LE-NEXT: rlwinm 3, 3, 2, 28, 29 ; CHECK-LE-NEXT: vextuwrx 3, 3, 2 ; CHECK-LE-NEXT: add 3, 3, 6 ; CHECK-LE-NEXT: extsw 3, 3 @@ -131,7 +142,8 @@ define signext i32 @test_add6(<4 x i32> %a, i32 signext %index, i32 signext %c) ; ; CHECK-BE-LABEL: test_add6: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: rlwinm 3, 5, 2, 28, 29 +; CHECK-BE-NEXT: clrldi 3, 5, 32 +; CHECK-BE-NEXT: rlwinm 3, 3, 2, 28, 29 ; CHECK-BE-NEXT: vextuwlx 3, 3, 2 ; CHECK-BE-NEXT: add 3, 3, 6 ; CHECK-BE-NEXT: extsw 3, 3 diff --git a/llvm/test/CodeGen/PowerPC/vec_insert_elt.ll b/llvm/test/CodeGen/PowerPC/vec_insert_elt.ll index a17114763149ff..99fb4cd2c5a5da 100644 --- a/llvm/test/CodeGen/PowerPC/vec_insert_elt.ll +++ b/llvm/test/CodeGen/PowerPC/vec_insert_elt.ll @@ -353,16 +353,14 @@ define <4 x float> @testFloat1(<4 x float> %a, float %b, i32 zeroext %idx1) { ; CHECK-LABEL: testFloat1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xscvdpspn v3, f1 -; CHECK-NEXT: extsw r3, r6 -; CHECK-NEXT: slwi r3, r3, 2 +; CHECK-NEXT: slwi r3, r6, 2 ; CHECK-NEXT: vinswvrx v2, r3, v3 ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: testFloat1: ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: xscvdpspn v3, f1 -; CHECK-BE-NEXT: extsw r3, r6 -; CHECK-BE-NEXT: slwi r3, r3, 2 +; CHECK-BE-NEXT: slwi r3, r6, 2 ; CHECK-BE-NEXT: vinswvlx v2, r3, v3 ; CHECK-BE-NEXT: blr ; @@ -392,74 +390,54 @@ define <4 x float> @testFloat2(<4 x float> %a, ptr %b, i32 zeroext %idx1, i32 ze ; CHECK-LABEL: testFloat2: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: lwz r3, 0(r5) -; CHECK-NEXT: extsw r4, r6 -; CHECK-NEXT: slwi r4, r4, 2 +; CHECK-NEXT: slwi r4, r6, 2 ; CHECK-NEXT: vinswrx v2, r4, r3 ; CHECK-NEXT: lwz r3, 1(r5) -; CHECK-NEXT: extsw r4, r7 -; CHECK-NEXT: slwi r4, r4, 2 +; CHECK-NEXT: slwi r4, r7, 2 ; CHECK-NEXT: vinswrx v2, r4, r3 ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: testFloat2: ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: lwz r3, 0(r5) -; CHECK-BE-NEXT: extsw r4, r6 -; CHECK-BE-NEXT: slwi r4, r4, 2 +; CHECK-BE-NEXT: slwi r4, r6, 2 ; CHECK-BE-NEXT: vinswlx v2, r4, r3 ; CHECK-BE-NEXT: lwz r3, 1(r5) -; CHECK-BE-NEXT: extsw r4, r7 -; CHECK-BE-NEXT: slwi r4, r4, 2 +; CHECK-BE-NEXT: slwi r4, r7, 2 ; CHECK-BE-NEXT: vinswlx v2, r4, r3 ; CHECK-BE-NEXT: blr ; ; CHECK-P9-LABEL: testFloat2: ; CHECK-P9: # %bb.0: # %entry -; CHECK-P9-NEXT: lwz r3, 0(r5) ; CHECK-P9-NEXT: rlwinm r4, r6, 2, 28, 29 -; CHECK-P9-NEXT: addi r6, r1, -16 +; CHECK-P9-NEXT: lwz r6, 0(r5) +; CHECK-P9-NEXT: rlwinm r3, r7, 2, 28, 29 +; CHECK-P9-NEXT: addi r7, r1, -16 ; CHECK-P9-NEXT: stxv v2, -16(r1) -; CHECK-P9-NEXT: stwx r3, r6, r4 -; CHECK-P9-NEXT: rlwinm r4, r7, 2, 28, 29 +; CHECK-P9-NEXT: stwx r6, r7, r4 ; CHECK-P9-NEXT: lxv vs0, -16(r1) -; CHECK-P9-NEXT: lwz r3, 1(r5) +; CHECK-P9-NEXT: lwz r4, 1(r5) ; CHECK-P9-NEXT: addi r5, r1, -32 ; CHECK-P9-NEXT: stxv vs0, -32(r1) -; CHECK-P9-NEXT: stwx r3, r5, r4 +; CHECK-P9-NEXT: stwx r4, r5, r3 ; CHECK-P9-NEXT: lxv v2, -32(r1) ; CHECK-P9-NEXT: blr ; -; AIX-P8-64-LABEL: testFloat2: -; AIX-P8-64: # %bb.0: # %entry -; AIX-P8-64-NEXT: lwz r7, 0(r3) -; AIX-P8-64-NEXT: addi r6, r1, -32 -; AIX-P8-64-NEXT: rlwinm r4, r4, 2, 28, 29 -; AIX-P8-64-NEXT: rlwinm r5, r5, 2, 28, 29 -; AIX-P8-64-NEXT: stxvw4x v2, 0, r6 -; AIX-P8-64-NEXT: stwx r7, r6, r4 -; AIX-P8-64-NEXT: addi r4, r1, -16 -; AIX-P8-64-NEXT: lxvw4x vs0, 0, r6 -; AIX-P8-64-NEXT: lwz r3, 1(r3) -; AIX-P8-64-NEXT: stxvw4x vs0, 0, r4 -; AIX-P8-64-NEXT: stwx r3, r4, r5 -; AIX-P8-64-NEXT: lxvw4x v2, 0, r4 -; AIX-P8-64-NEXT: blr -; -; AIX-P8-32-LABEL: testFloat2: -; AIX-P8-32: # %bb.0: # %entry -; AIX-P8-32-NEXT: lwz r7, 0(r3) -; AIX-P8-32-NEXT: addi r6, r1, -32 -; AIX-P8-32-NEXT: rlwinm r4, r4, 2, 28, 29 -; AIX-P8-32-NEXT: stxvw4x v2, 0, r6 -; AIX-P8-32-NEXT: stwx r7, r6, r4 -; AIX-P8-32-NEXT: rlwinm r4, r5, 2, 28, 29 -; AIX-P8-32-NEXT: addi r5, r1, -16 -; AIX-P8-32-NEXT: lxvw4x vs0, 0, r6 -; AIX-P8-32-NEXT: lwz r3, 1(r3) -; AIX-P8-32-NEXT: stxvw4x vs0, 0, r5 -; AIX-P8-32-NEXT: stwx r3, r5, r4 -; AIX-P8-32-NEXT: lxvw4x v2, 0, r5 -; AIX-P8-32-NEXT: blr +; AIX-P8-LABEL: testFloat2: +; AIX-P8: # %bb.0: # %entry +; AIX-P8-NEXT: lwz r7, 0(r3) +; AIX-P8-NEXT: addi r6, r1, -32 +; AIX-P8-NEXT: rlwinm r4, r4, 2, 28, 29 +; AIX-P8-NEXT: stxvw4x v2, 0, r6 +; AIX-P8-NEXT: stwx r7, r6, r4 +; AIX-P8-NEXT: rlwinm r4, r5, 2, 28, 29 +; AIX-P8-NEXT: addi r5, r1, -16 +; AIX-P8-NEXT: lxvw4x vs0, 0, r6 +; AIX-P8-NEXT: lwz r3, 1(r3) +; AIX-P8-NEXT: stxvw4x vs0, 0, r5 +; AIX-P8-NEXT: stwx r3, r5, r4 +; AIX-P8-NEXT: lxvw4x v2, 0, r5 +; AIX-P8-NEXT: blr entry: %add.ptr1 = getelementptr inbounds i8, ptr %b, i64 1 %0 = load float, ptr %b, align 4 @@ -473,13 +451,11 @@ define <4 x float> @testFloat3(<4 x float> %a, ptr %b, i32 zeroext %idx1, i32 ze ; CHECK-LABEL: testFloat3: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: plwz r3, 65536(r5), 0 -; CHECK-NEXT: extsw r4, r6 -; CHECK-NEXT: slwi r4, r4, 2 +; CHECK-NEXT: slwi r4, r6, 2 ; CHECK-NEXT: vinswrx v2, r4, r3 ; CHECK-NEXT: li r3, 1 -; CHECK-NEXT: extsw r4, r7 +; CHECK-NEXT: slwi r4, r7, 2 ; CHECK-NEXT: rldic r3, r3, 36, 27 -; CHECK-NEXT: slwi r4, r4, 2 ; CHECK-NEXT: lwzx r3, r5, r3 ; CHECK-NEXT: vinswrx v2, r4, r3 ; CHECK-NEXT: blr @@ -487,42 +463,39 @@ define <4 x float> @testFloat3(<4 x float> %a, ptr %b, i32 zeroext %idx1, i32 ze ; CHECK-BE-LABEL: testFloat3: ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: plwz r3, 65536(r5), 0 -; CHECK-BE-NEXT: extsw r4, r6 -; CHECK-BE-NEXT: slwi r4, r4, 2 +; CHECK-BE-NEXT: slwi r4, r6, 2 ; CHECK-BE-NEXT: vinswlx v2, r4, r3 ; CHECK-BE-NEXT: li r3, 1 -; CHECK-BE-NEXT: extsw r4, r7 +; CHECK-BE-NEXT: slwi r4, r7, 2 ; CHECK-BE-NEXT: rldic r3, r3, 36, 27 -; CHECK-BE-NEXT: slwi r4, r4, 2 ; CHECK-BE-NEXT: lwzx r3, r5, r3 ; CHECK-BE-NEXT: vinswlx v2, r4, r3 ; CHECK-BE-NEXT: blr ; ; CHECK-P9-LABEL: testFloat3: ; CHECK-P9: # %bb.0: # %entry -; CHECK-P9-NEXT: lis r3, 1 ; CHECK-P9-NEXT: rlwinm r4, r6, 2, 28, 29 -; CHECK-P9-NEXT: addi r6, r1, -16 -; CHECK-P9-NEXT: lwzx r3, r5, r3 +; CHECK-P9-NEXT: lis r6, 1 +; CHECK-P9-NEXT: rlwinm r3, r7, 2, 28, 29 +; CHECK-P9-NEXT: addi r7, r1, -16 +; CHECK-P9-NEXT: lwzx r6, r5, r6 ; CHECK-P9-NEXT: stxv v2, -16(r1) -; CHECK-P9-NEXT: stwx r3, r6, r4 -; CHECK-P9-NEXT: li r3, 1 -; CHECK-P9-NEXT: rlwinm r4, r7, 2, 28, 29 +; CHECK-P9-NEXT: stwx r6, r7, r4 +; CHECK-P9-NEXT: li r4, 1 ; CHECK-P9-NEXT: lxv vs0, -16(r1) -; CHECK-P9-NEXT: rldic r3, r3, 36, 27 -; CHECK-P9-NEXT: lwzx r3, r5, r3 +; CHECK-P9-NEXT: rldic r4, r4, 36, 27 +; CHECK-P9-NEXT: lwzx r4, r5, r4 ; CHECK-P9-NEXT: addi r5, r1, -32 ; CHECK-P9-NEXT: stxv vs0, -32(r1) -; CHECK-P9-NEXT: stwx r3, r5, r4 +; CHECK-P9-NEXT: stwx r4, r5, r3 ; CHECK-P9-NEXT: lxv v2, -32(r1) ; CHECK-P9-NEXT: blr ; ; AIX-P8-64-LABEL: testFloat3: ; AIX-P8-64: # %bb.0: # %entry ; AIX-P8-64-NEXT: lis r6, 1 -; AIX-P8-64-NEXT: addi r7, r1, -32 ; AIX-P8-64-NEXT: rlwinm r4, r4, 2, 28, 29 -; AIX-P8-64-NEXT: rlwinm r5, r5, 2, 28, 29 +; AIX-P8-64-NEXT: addi r7, r1, -32 ; AIX-P8-64-NEXT: lwzx r6, r3, r6 ; AIX-P8-64-NEXT: stxvw4x v2, 0, r7 ; AIX-P8-64-NEXT: stwx r6, r7, r4 @@ -530,10 +503,11 @@ define <4 x float> @testFloat3(<4 x float> %a, ptr %b, i32 zeroext %idx1, i32 ze ; AIX-P8-64-NEXT: lxvw4x vs0, 0, r7 ; AIX-P8-64-NEXT: rldic r4, r4, 36, 27 ; AIX-P8-64-NEXT: lwzx r3, r3, r4 -; AIX-P8-64-NEXT: addi r4, r1, -16 -; AIX-P8-64-NEXT: stxvw4x vs0, 0, r4 -; AIX-P8-64-NEXT: stwx r3, r4, r5 -; AIX-P8-64-NEXT: lxvw4x v2, 0, r4 +; AIX-P8-64-NEXT: rlwinm r4, r5, 2, 28, 29 +; AIX-P8-64-NEXT: addi r5, r1, -16 +; AIX-P8-64-NEXT: stxvw4x vs0, 0, r5 +; AIX-P8-64-NEXT: stwx r3, r5, r4 +; AIX-P8-64-NEXT: lxvw4x v2, 0, r5 ; AIX-P8-64-NEXT: blr ; ; AIX-P8-32-LABEL: testFloat3: @@ -750,17 +724,15 @@ entry: define <2 x double> @testDouble1(<2 x double> %a, double %b, i32 zeroext %idx1) { ; CHECK-LABEL: testDouble1: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: extsw r4, r6 ; CHECK-NEXT: mffprd r3, f1 -; CHECK-NEXT: rlwinm r4, r4, 3, 0, 28 +; CHECK-NEXT: rlwinm r4, r6, 3, 0, 28 ; CHECK-NEXT: vinsdrx v2, r4, r3 ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: testDouble1: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: extsw r4, r6 ; CHECK-BE-NEXT: mffprd r3, f1 -; CHECK-BE-NEXT: rlwinm r4, r4, 3, 0, 28 +; CHECK-BE-NEXT: rlwinm r4, r6, 3, 0, 28 ; CHECK-BE-NEXT: vinsdlx v2, r4, r3 ; CHECK-BE-NEXT: blr ; @@ -799,41 +771,37 @@ define <2 x double> @testDouble2(<2 x double> %a, ptr %b, i32 zeroext %idx1, i32 ; CHECK-LABEL: testDouble2: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: ld r3, 0(r5) -; CHECK-NEXT: extsw r4, r6 -; CHECK-NEXT: rlwinm r4, r4, 3, 0, 28 +; CHECK-NEXT: rlwinm r4, r6, 3, 0, 28 ; CHECK-NEXT: vinsdrx v2, r4, r3 ; CHECK-NEXT: pld r3, 1(r5), 0 -; CHECK-NEXT: extsw r4, r7 -; CHECK-NEXT: rlwinm r4, r4, 3, 0, 28 +; CHECK-NEXT: rlwinm r4, r7, 3, 0, 28 ; CHECK-NEXT: vinsdrx v2, r4, r3 ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: testDouble2: ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: ld r3, 0(r5) -; CHECK-BE-NEXT: extsw r4, r6 -; CHECK-BE-NEXT: rlwinm r4, r4, 3, 0, 28 +; CHECK-BE-NEXT: rlwinm r4, r6, 3, 0, 28 ; CHECK-BE-NEXT: vinsdlx v2, r4, r3 ; CHECK-BE-NEXT: pld r3, 1(r5), 0 -; CHECK-BE-NEXT: extsw r4, r7 -; CHECK-BE-NEXT: rlwinm r4, r4, 3, 0, 28 +; CHECK-BE-NEXT: rlwinm r4, r7, 3, 0, 28 ; CHECK-BE-NEXT: vinsdlx v2, r4, r3 ; CHECK-BE-NEXT: blr ; ; CHECK-P9-LABEL: testDouble2: ; CHECK-P9: # %bb.0: # %entry -; CHECK-P9-NEXT: ld r3, 0(r5) ; CHECK-P9-NEXT: rlwinm r4, r6, 3, 28, 28 -; CHECK-P9-NEXT: addi r6, r1, -32 +; CHECK-P9-NEXT: ld r6, 0(r5) +; CHECK-P9-NEXT: rlwinm r3, r7, 3, 28, 28 +; CHECK-P9-NEXT: addi r7, r1, -32 ; CHECK-P9-NEXT: stxv v2, -32(r1) -; CHECK-P9-NEXT: stdx r3, r6, r4 -; CHECK-P9-NEXT: li r3, 1 -; CHECK-P9-NEXT: rlwinm r4, r7, 3, 28, 28 +; CHECK-P9-NEXT: stdx r6, r7, r4 +; CHECK-P9-NEXT: li r4, 1 ; CHECK-P9-NEXT: lxv vs0, -32(r1) -; CHECK-P9-NEXT: ldx r3, r5, r3 +; CHECK-P9-NEXT: ldx r4, r5, r4 ; CHECK-P9-NEXT: addi r5, r1, -16 ; CHECK-P9-NEXT: stxv vs0, -16(r1) -; CHECK-P9-NEXT: stdx r3, r5, r4 +; CHECK-P9-NEXT: stdx r4, r5, r3 ; CHECK-P9-NEXT: lxv v2, -16(r1) ; CHECK-P9-NEXT: blr ; @@ -842,16 +810,16 @@ define <2 x double> @testDouble2(<2 x double> %a, ptr %b, i32 zeroext %idx1, i32 ; AIX-P8-64-NEXT: ld r7, 0(r3) ; AIX-P8-64-NEXT: addi r6, r1, -32 ; AIX-P8-64-NEXT: rlwinm r4, r4, 3, 28, 28 -; AIX-P8-64-NEXT: rlwinm r5, r5, 3, 28, 28 ; AIX-P8-64-NEXT: stxvd2x v2, 0, r6 ; AIX-P8-64-NEXT: stdx r7, r6, r4 ; AIX-P8-64-NEXT: li r4, 1 ; AIX-P8-64-NEXT: lxvd2x vs0, 0, r6 ; AIX-P8-64-NEXT: ldx r3, r3, r4 -; AIX-P8-64-NEXT: addi r4, r1, -16 -; AIX-P8-64-NEXT: stxvd2x vs0, 0, r4 -; AIX-P8-64-NEXT: stdx r3, r4, r5 -; AIX-P8-64-NEXT: lxvd2x v2, 0, r4 +; AIX-P8-64-NEXT: rlwinm r4, r5, 3, 28, 28 +; AIX-P8-64-NEXT: addi r5, r1, -16 +; AIX-P8-64-NEXT: stxvd2x vs0, 0, r5 +; AIX-P8-64-NEXT: stdx r3, r5, r4 +; AIX-P8-64-NEXT: lxvd2x v2, 0, r5 ; AIX-P8-64-NEXT: blr ; ; AIX-P8-32-LABEL: testDouble2: @@ -882,13 +850,11 @@ define <2 x double> @testDouble3(<2 x double> %a, ptr %b, i32 zeroext %idx1, i32 ; CHECK-LABEL: testDouble3: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: pld r3, 65536(r5), 0 -; CHECK-NEXT: extsw r4, r6 -; CHECK-NEXT: rlwinm r4, r4, 3, 0, 28 +; CHECK-NEXT: rlwinm r4, r6, 3, 0, 28 ; CHECK-NEXT: vinsdrx v2, r4, r3 ; CHECK-NEXT: li r3, 1 -; CHECK-NEXT: extsw r4, r7 +; CHECK-NEXT: rlwinm r4, r7, 3, 0, 28 ; CHECK-NEXT: rldic r3, r3, 36, 27 -; CHECK-NEXT: rlwinm r4, r4, 3, 0, 28 ; CHECK-NEXT: ldx r3, r5, r3 ; CHECK-NEXT: vinsdrx v2, r4, r3 ; CHECK-NEXT: blr @@ -896,53 +862,51 @@ define <2 x double> @testDouble3(<2 x double> %a, ptr %b, i32 zeroext %idx1, i32 ; CHECK-BE-LABEL: testDouble3: ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: pld r3, 65536(r5), 0 -; CHECK-BE-NEXT: extsw r4, r6 -; CHECK-BE-NEXT: rlwinm r4, r4, 3, 0, 28 +; CHECK-BE-NEXT: rlwinm r4, r6, 3, 0, 28 ; CHECK-BE-NEXT: vinsdlx v2, r4, r3 ; CHECK-BE-NEXT: li r3, 1 -; CHECK-BE-NEXT: extsw r4, r7 +; CHECK-BE-NEXT: rlwinm r4, r7, 3, 0, 28 ; CHECK-BE-NEXT: rldic r3, r3, 36, 27 -; CHECK-BE-NEXT: rlwinm r4, r4, 3, 0, 28 ; CHECK-BE-NEXT: ldx r3, r5, r3 ; CHECK-BE-NEXT: vinsdlx v2, r4, r3 ; CHECK-BE-NEXT: blr ; ; CHECK-P9-LABEL: testDouble3: ; CHECK-P9: # %bb.0: # %entry -; CHECK-P9-NEXT: lis r3, 1 ; CHECK-P9-NEXT: rlwinm r4, r6, 3, 28, 28 -; CHECK-P9-NEXT: addi r6, r1, -32 -; CHECK-P9-NEXT: ldx r3, r5, r3 +; CHECK-P9-NEXT: lis r6, 1 +; CHECK-P9-NEXT: rlwinm r3, r7, 3, 28, 28 +; CHECK-P9-NEXT: addi r7, r1, -32 +; CHECK-P9-NEXT: ldx r6, r5, r6 ; CHECK-P9-NEXT: stxv v2, -32(r1) -; CHECK-P9-NEXT: stdx r3, r6, r4 -; CHECK-P9-NEXT: li r3, 1 -; CHECK-P9-NEXT: rlwinm r4, r7, 3, 28, 28 +; CHECK-P9-NEXT: stdx r6, r7, r4 +; CHECK-P9-NEXT: li r4, 1 ; CHECK-P9-NEXT: lxv vs0, -32(r1) -; CHECK-P9-NEXT: rldic r3, r3, 36, 27 -; CHECK-P9-NEXT: ldx r3, r5, r3 +; CHECK-P9-NEXT: rldic r4, r4, 36, 27 +; CHECK-P9-NEXT: ldx r4, r5, r4 ; CHECK-P9-NEXT: addi r5, r1, -16 ; CHECK-P9-NEXT: stxv vs0, -16(r1) -; CHECK-P9-NEXT: stdx r3, r5, r4 +; CHECK-P9-NEXT: stdx r4, r5, r3 ; CHECK-P9-NEXT: lxv v2, -16(r1) ; CHECK-P9-NEXT: blr ; ; AIX-P8-64-LABEL: testDouble3: ; AIX-P8-64: # %bb.0: # %entry ; AIX-P8-64-NEXT: lis r6, 1 -; AIX-P8-64-NEXT: addi r7, r1, -32 ; AIX-P8-64-NEXT: rlwinm r4, r4, 3, 28, 28 +; AIX-P8-64-NEXT: addi r7, r1, -32 ; AIX-P8-64-NEXT: li r8, 1 -; AIX-P8-64-NEXT: rlwinm r5, r5, 3, 28, 28 ; AIX-P8-64-NEXT: ldx r6, r3, r6 ; AIX-P8-64-NEXT: stxvd2x v2, 0, r7 ; AIX-P8-64-NEXT: stdx r6, r7, r4 ; AIX-P8-64-NEXT: rldic r4, r8, 36, 27 ; AIX-P8-64-NEXT: lxvd2x vs0, 0, r7 ; AIX-P8-64-NEXT: ldx r3, r3, r4 -; AIX-P8-64-NEXT: addi r4, r1, -16 -; AIX-P8-64-NEXT: stxvd2x vs0, 0, r4 -; AIX-P8-64-NEXT: stdx r3, r4, r5 -; AIX-P8-64-NEXT: lxvd2x v2, 0, r4 +; AIX-P8-64-NEXT: rlwinm r4, r5, 3, 28, 28 +; AIX-P8-64-NEXT: addi r5, r1, -16 +; AIX-P8-64-NEXT: stxvd2x vs0, 0, r5 +; AIX-P8-64-NEXT: stdx r3, r5, r4 +; AIX-P8-64-NEXT: lxvd2x v2, 0, r5 ; AIX-P8-64-NEXT: blr ; ; AIX-P8-32-LABEL: testDouble3: diff --git a/llvm/test/CodeGen/RISCV/rvv/extractelt-fp.ll b/llvm/test/CodeGen/RISCV/rvv/extractelt-fp.ll index 739bc4ffdf57d8..cf7910504a0458 100644 --- a/llvm/test/CodeGen/RISCV/rvv/extractelt-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/extractelt-fp.ll @@ -25,7 +25,7 @@ define half @extractelt_nxv1f16_imm( %v) { ret half %r } -define half @extractelt_nxv1f16_idx( %v, i32 signext %idx) { +define half @extractelt_nxv1f16_idx( %v, i32 zeroext %idx) { ; CHECK-LABEL: extractelt_nxv1f16_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma @@ -57,7 +57,7 @@ define half @extractelt_nxv2f16_imm( %v) { ret half %r } -define half @extractelt_nxv2f16_idx( %v, i32 signext %idx) { +define half @extractelt_nxv2f16_idx( %v, i32 zeroext %idx) { ; CHECK-LABEL: extractelt_nxv2f16_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e16, mf2, ta, ma @@ -89,7 +89,7 @@ define half @extractelt_nxv4f16_imm( %v) { ret half %r } -define half @extractelt_nxv4f16_idx( %v, i32 signext %idx) { +define half @extractelt_nxv4f16_idx( %v, i32 zeroext %idx) { ; CHECK-LABEL: extractelt_nxv4f16_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma @@ -121,7 +121,7 @@ define half @extractelt_nxv8f16_imm( %v) { ret half %r } -define half @extractelt_nxv8f16_idx( %v, i32 signext %idx) { +define half @extractelt_nxv8f16_idx( %v, i32 zeroext %idx) { ; CHECK-LABEL: extractelt_nxv8f16_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e16, m2, ta, ma @@ -153,7 +153,7 @@ define half @extractelt_nxv16f16_imm( %v) { ret half %r } -define half @extractelt_nxv16f16_idx( %v, i32 signext %idx) { +define half @extractelt_nxv16f16_idx( %v, i32 zeroext %idx) { ; CHECK-LABEL: extractelt_nxv16f16_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e16, m4, ta, ma @@ -185,7 +185,7 @@ define half @extractelt_nxv32f16_imm( %v) { ret half %r } -define half @extractelt_nxv32f16_idx( %v, i32 signext %idx) { +define half @extractelt_nxv32f16_idx( %v, i32 zeroext %idx) { ; CHECK-LABEL: extractelt_nxv32f16_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e16, m8, ta, ma @@ -217,7 +217,7 @@ define float @extractelt_nxv1f32_imm( %v) { ret float %r } -define float @extractelt_nxv1f32_idx( %v, i32 signext %idx) { +define float @extractelt_nxv1f32_idx( %v, i32 zeroext %idx) { ; CHECK-LABEL: extractelt_nxv1f32_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma @@ -249,7 +249,7 @@ define float @extractelt_nxv2f32_imm( %v) { ret float %r } -define float @extractelt_nxv2f32_idx( %v, i32 signext %idx) { +define float @extractelt_nxv2f32_idx( %v, i32 zeroext %idx) { ; CHECK-LABEL: extractelt_nxv2f32_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma @@ -281,7 +281,7 @@ define float @extractelt_nxv4f32_imm( %v) { ret float %r } -define float @extractelt_nxv4f32_idx( %v, i32 signext %idx) { +define float @extractelt_nxv4f32_idx( %v, i32 zeroext %idx) { ; CHECK-LABEL: extractelt_nxv4f32_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e32, m2, ta, ma @@ -313,7 +313,7 @@ define float @extractelt_nxv8f32_imm( %v) { ret float %r } -define float @extractelt_nxv8f32_idx( %v, i32 signext %idx) { +define float @extractelt_nxv8f32_idx( %v, i32 zeroext %idx) { ; CHECK-LABEL: extractelt_nxv8f32_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e32, m4, ta, ma @@ -345,7 +345,7 @@ define float @extractelt_nxv16f32_imm( %v) { ret float %r } -define float @extractelt_nxv16f32_idx( %v, i32 signext %idx) { +define float @extractelt_nxv16f32_idx( %v, i32 zeroext %idx) { ; CHECK-LABEL: extractelt_nxv16f32_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e32, m8, ta, ma @@ -377,7 +377,7 @@ define double @extractelt_nxv1f64_imm( %v) { ret double %r } -define double @extractelt_nxv1f64_idx( %v, i32 signext %idx) { +define double @extractelt_nxv1f64_idx( %v, i32 zeroext %idx) { ; CHECK-LABEL: extractelt_nxv1f64_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma @@ -409,7 +409,7 @@ define double @extractelt_nxv2f64_imm( %v) { ret double %r } -define double @extractelt_nxv2f64_idx( %v, i32 signext %idx) { +define double @extractelt_nxv2f64_idx( %v, i32 zeroext %idx) { ; CHECK-LABEL: extractelt_nxv2f64_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e64, m2, ta, ma @@ -441,7 +441,7 @@ define double @extractelt_nxv4f64_imm( %v) { ret double %r } -define double @extractelt_nxv4f64_idx( %v, i32 signext %idx) { +define double @extractelt_nxv4f64_idx( %v, i32 zeroext %idx) { ; CHECK-LABEL: extractelt_nxv4f64_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e64, m4, ta, ma @@ -473,7 +473,7 @@ define double @extractelt_nxv8f64_imm( %v) { ret double %r } -define double @extractelt_nxv8f64_idx( %v, i32 signext %idx) { +define double @extractelt_nxv8f64_idx( %v, i32 zeroext %idx) { ; CHECK-LABEL: extractelt_nxv8f64_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e64, m8, ta, ma @@ -591,28 +591,6 @@ define double @extractelt_nxv16f64_0( %v) { } define double @extractelt_nxv16f64_neg1( %v) { -; CHECK-LABEL: extractelt_nxv16f64_neg1: -; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -64 -; CHECK-NEXT: .cfi_def_cfa_offset 64 -; CHECK-NEXT: addi s0, sp, 64 -; CHECK-NEXT: .cfi_def_cfa s0, 0 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 -; CHECK-NEXT: sub sp, sp, a0 -; CHECK-NEXT: andi sp, sp, -64 -; CHECK-NEXT: addi a0, sp, 64 -; CHECK-NEXT: vs8r.v v8, (a0) -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a2, a1, 3 -; CHECK-NEXT: add a2, a0, a2 -; CHECK-NEXT: vs8r.v v16, (a2) -; CHECK-NEXT: slli a1, a1, 4 -; CHECK-NEXT: add a0, a1, a0 -; CHECK-NEXT: fld fa0, -8(a0) -; CHECK-NEXT: addi sp, s0, -64 -; CHECK-NEXT: addi sp, sp, 64 -; CHECK-NEXT: ret %r = extractelement %v, i32 -1 ret double %r } @@ -628,7 +606,7 @@ define double @extractelt_nxv16f64_imm( %v) { ret double %r } -define double @extractelt_nxv16f64_idx( %v, i32 signext %idx) { +define double @extractelt_nxv16f64_idx( %v, i32 zeroext %idx) { ; CHECK-LABEL: extractelt_nxv16f64_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: csrr a1, vlenb diff --git a/llvm/test/CodeGen/RISCV/rvv/extractelt-int-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/extractelt-int-rv64.ll index 59ff8e6f71c692..1570df6cfd07c5 100644 --- a/llvm/test/CodeGen/RISCV/rvv/extractelt-int-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/extractelt-int-rv64.ll @@ -23,7 +23,7 @@ define signext i8 @extractelt_nxv1i8_imm( %v) { ret i8 %r } -define signext i8 @extractelt_nxv1i8_idx( %v, i32 signext %idx) { +define signext i8 @extractelt_nxv1i8_idx( %v, i32 zeroext %idx) { ; CHECK-LABEL: extractelt_nxv1i8_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma @@ -55,7 +55,7 @@ define signext i8 @extractelt_nxv2i8_imm( %v) { ret i8 %r } -define signext i8 @extractelt_nxv2i8_idx( %v, i32 signext %idx) { +define signext i8 @extractelt_nxv2i8_idx( %v, i32 zeroext %idx) { ; CHECK-LABEL: extractelt_nxv2i8_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e8, mf4, ta, ma @@ -87,7 +87,7 @@ define signext i8 @extractelt_nxv4i8_imm( %v) { ret i8 %r } -define signext i8 @extractelt_nxv4i8_idx( %v, i32 signext %idx) { +define signext i8 @extractelt_nxv4i8_idx( %v, i32 zeroext %idx) { ; CHECK-LABEL: extractelt_nxv4i8_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e8, mf2, ta, ma @@ -119,7 +119,7 @@ define signext i8 @extractelt_nxv8i8_imm( %v) { ret i8 %r } -define signext i8 @extractelt_nxv8i8_idx( %v, i32 signext %idx) { +define signext i8 @extractelt_nxv8i8_idx( %v, i32 zeroext %idx) { ; CHECK-LABEL: extractelt_nxv8i8_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma @@ -151,7 +151,7 @@ define signext i8 @extractelt_nxv16i8_imm( %v) { ret i8 %r } -define signext i8 @extractelt_nxv16i8_idx( %v, i32 signext %idx) { +define signext i8 @extractelt_nxv16i8_idx( %v, i32 zeroext %idx) { ; CHECK-LABEL: extractelt_nxv16i8_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e8, m2, ta, ma @@ -183,7 +183,7 @@ define signext i8 @extractelt_nxv32i8_imm( %v) { ret i8 %r } -define signext i8 @extractelt_nxv32i8_idx( %v, i32 signext %idx) { +define signext i8 @extractelt_nxv32i8_idx( %v, i32 zeroext %idx) { ; CHECK-LABEL: extractelt_nxv32i8_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e8, m4, ta, ma @@ -215,7 +215,7 @@ define signext i8 @extractelt_nxv64i8_imm( %v) { ret i8 %r } -define signext i8 @extractelt_nxv64i8_idx( %v, i32 signext %idx) { +define signext i8 @extractelt_nxv64i8_idx( %v, i32 zeroext %idx) { ; CHECK-LABEL: extractelt_nxv64i8_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e8, m8, ta, ma @@ -247,7 +247,7 @@ define signext i16 @extractelt_nxv1i16_imm( %v) { ret i16 %r } -define signext i16 @extractelt_nxv1i16_idx( %v, i32 signext %idx) { +define signext i16 @extractelt_nxv1i16_idx( %v, i32 zeroext %idx) { ; CHECK-LABEL: extractelt_nxv1i16_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma @@ -279,7 +279,7 @@ define signext i16 @extractelt_nxv2i16_imm( %v) { ret i16 %r } -define signext i16 @extractelt_nxv2i16_idx( %v, i32 signext %idx) { +define signext i16 @extractelt_nxv2i16_idx( %v, i32 zeroext %idx) { ; CHECK-LABEL: extractelt_nxv2i16_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e16, mf2, ta, ma @@ -311,7 +311,7 @@ define signext i16 @extractelt_nxv4i16_imm( %v) { ret i16 %r } -define signext i16 @extractelt_nxv4i16_idx( %v, i32 signext %idx) { +define signext i16 @extractelt_nxv4i16_idx( %v, i32 zeroext %idx) { ; CHECK-LABEL: extractelt_nxv4i16_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma @@ -343,7 +343,7 @@ define signext i16 @extractelt_nxv8i16_imm( %v) { ret i16 %r } -define signext i16 @extractelt_nxv8i16_idx( %v, i32 signext %idx) { +define signext i16 @extractelt_nxv8i16_idx( %v, i32 zeroext %idx) { ; CHECK-LABEL: extractelt_nxv8i16_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e16, m2, ta, ma @@ -375,7 +375,7 @@ define signext i16 @extractelt_nxv16i16_imm( %v) { ret i16 %r } -define signext i16 @extractelt_nxv16i16_idx( %v, i32 signext %idx) { +define signext i16 @extractelt_nxv16i16_idx( %v, i32 zeroext %idx) { ; CHECK-LABEL: extractelt_nxv16i16_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e16, m4, ta, ma @@ -407,7 +407,7 @@ define signext i16 @extractelt_nxv32i16_imm( %v) { ret i16 %r } -define signext i16 @extractelt_nxv32i16_idx( %v, i32 signext %idx) { +define signext i16 @extractelt_nxv32i16_idx( %v, i32 zeroext %idx) { ; CHECK-LABEL: extractelt_nxv32i16_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e16, m8, ta, ma @@ -439,7 +439,7 @@ define signext i32 @extractelt_nxv1i32_imm( %v) { ret i32 %r } -define signext i32 @extractelt_nxv1i32_idx( %v, i32 signext %idx) { +define signext i32 @extractelt_nxv1i32_idx( %v, i32 zeroext %idx) { ; CHECK-LABEL: extractelt_nxv1i32_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma @@ -471,7 +471,7 @@ define signext i32 @extractelt_nxv2i32_imm( %v) { ret i32 %r } -define signext i32 @extractelt_nxv2i32_idx( %v, i32 signext %idx) { +define signext i32 @extractelt_nxv2i32_idx( %v, i32 zeroext %idx) { ; CHECK-LABEL: extractelt_nxv2i32_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma @@ -503,7 +503,7 @@ define signext i32 @extractelt_nxv4i32_imm( %v) { ret i32 %r } -define signext i32 @extractelt_nxv4i32_idx( %v, i32 signext %idx) { +define signext i32 @extractelt_nxv4i32_idx( %v, i32 zeroext %idx) { ; CHECK-LABEL: extractelt_nxv4i32_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e32, m2, ta, ma @@ -535,7 +535,7 @@ define signext i32 @extractelt_nxv8i32_imm( %v) { ret i32 %r } -define signext i32 @extractelt_nxv8i32_idx( %v, i32 signext %idx) { +define signext i32 @extractelt_nxv8i32_idx( %v, i32 zeroext %idx) { ; CHECK-LABEL: extractelt_nxv8i32_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e32, m4, ta, ma @@ -567,7 +567,7 @@ define signext i32 @extractelt_nxv16i32_imm( %v) { ret i32 %r } -define signext i32 @extractelt_nxv16i32_idx( %v, i32 signext %idx) { +define signext i32 @extractelt_nxv16i32_idx( %v, i32 zeroext %idx) { ; CHECK-LABEL: extractelt_nxv16i32_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e32, m8, ta, ma @@ -599,7 +599,7 @@ define i64 @extractelt_nxv1i64_imm( %v) { ret i64 %r } -define i64 @extractelt_nxv1i64_idx( %v, i32 signext %idx) { +define i64 @extractelt_nxv1i64_idx( %v, i32 zeroext %idx) { ; CHECK-LABEL: extractelt_nxv1i64_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma @@ -631,7 +631,7 @@ define i64 @extractelt_nxv2i64_imm( %v) { ret i64 %r } -define i64 @extractelt_nxv2i64_idx( %v, i32 signext %idx) { +define i64 @extractelt_nxv2i64_idx( %v, i32 zeroext %idx) { ; CHECK-LABEL: extractelt_nxv2i64_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e64, m2, ta, ma @@ -663,7 +663,7 @@ define i64 @extractelt_nxv4i64_imm( %v) { ret i64 %r } -define i64 @extractelt_nxv4i64_idx( %v, i32 signext %idx) { +define i64 @extractelt_nxv4i64_idx( %v, i32 zeroext %idx) { ; CHECK-LABEL: extractelt_nxv4i64_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e64, m4, ta, ma @@ -695,7 +695,7 @@ define i64 @extractelt_nxv8i64_imm( %v) { ret i64 %r } -define i64 @extractelt_nxv8i64_idx( %v, i32 signext %idx) { +define i64 @extractelt_nxv8i64_idx( %v, i32 zeroext %idx) { ; CHECK-LABEL: extractelt_nxv8i64_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e64, m8, ta, ma @@ -814,13 +814,21 @@ define i64 @extractelt_nxv16i64_neg1( %v) { ; CHECK-NEXT: andi sp, sp, -64 ; CHECK-NEXT: addi a0, sp, 64 ; CHECK-NEXT: vs8r.v v8, (a0) -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a2, a1, 3 -; CHECK-NEXT: add a2, a0, a2 -; CHECK-NEXT: vs8r.v v16, (a2) -; CHECK-NEXT: slli a1, a1, 4 -; CHECK-NEXT: add a0, a1, a0 -; CHECK-NEXT: ld a0, -8(a0) +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a1, a2, 3 +; CHECK-NEXT: add a3, a0, a1 +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: srli a1, a1, 32 +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: addi a2, a2, -1 +; CHECK-NEXT: vs8r.v v16, (a3) +; CHECK-NEXT: bltu a2, a1, .LBB72_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a2, a1 +; CHECK-NEXT: .LBB72_2: +; CHECK-NEXT: slli a1, a2, 3 +; CHECK-NEXT: add a0, a0, a1 +; CHECK-NEXT: ld a0, 0(a0) ; CHECK-NEXT: addi sp, s0, -64 ; CHECK-NEXT: addi sp, sp, 64 ; CHECK-NEXT: ret @@ -839,7 +847,7 @@ define i64 @extractelt_nxv16i64_imm( %v) { ret i64 %r } -define i64 @extractelt_nxv16i64_idx( %v, i32 signext %idx) { +define i64 @extractelt_nxv16i64_idx( %v, i32 zeroext %idx) { ; CHECK-LABEL: extractelt_nxv16i64_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: csrr a1, vlenb diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll index 06672d9ed88ab0..2323f1120883c4 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll @@ -247,7 +247,7 @@ define i64 @extractelt_v3i64(<3 x i64>* %x) nounwind { ret i64 %b } -define i8 @extractelt_v16i8_idx(<16 x i8>* %x, i32 signext %idx) nounwind { +define i8 @extractelt_v16i8_idx(<16 x i8>* %x, i32 zeroext %idx) nounwind { ; CHECK-LABEL: extractelt_v16i8_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma @@ -261,7 +261,7 @@ define i8 @extractelt_v16i8_idx(<16 x i8>* %x, i32 signext %idx) nounwind { ret i8 %b } -define i16 @extractelt_v8i16_idx(<8 x i16>* %x, i32 signext %idx) nounwind { +define i16 @extractelt_v8i16_idx(<8 x i16>* %x, i32 zeroext %idx) nounwind { ; CHECK-LABEL: extractelt_v8i16_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma @@ -275,7 +275,7 @@ define i16 @extractelt_v8i16_idx(<8 x i16>* %x, i32 signext %idx) nounwind { ret i16 %b } -define i32 @extractelt_v4i32_idx(<4 x i32>* %x, i32 signext %idx) nounwind { +define i32 @extractelt_v4i32_idx(<4 x i32>* %x, i32 zeroext %idx) nounwind { ; CHECK-LABEL: extractelt_v4i32_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma @@ -291,7 +291,7 @@ define i32 @extractelt_v4i32_idx(<4 x i32>* %x, i32 signext %idx) nounwind { ret i32 %c } -define i64 @extractelt_v2i64_idx(<2 x i64>* %x, i32 signext %idx) nounwind { +define i64 @extractelt_v2i64_idx(<2 x i64>* %x, i32 zeroext %idx) nounwind { ; RV32-LABEL: extractelt_v2i64_idx: ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma @@ -320,7 +320,7 @@ define i64 @extractelt_v2i64_idx(<2 x i64>* %x, i32 signext %idx) nounwind { ret i64 %c } -define half @extractelt_v8f16_idx(<8 x half>* %x, i32 signext %idx) nounwind { +define half @extractelt_v8f16_idx(<8 x half>* %x, i32 zeroext %idx) nounwind { ; CHECK-LABEL: extractelt_v8f16_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma @@ -336,7 +336,7 @@ define half @extractelt_v8f16_idx(<8 x half>* %x, i32 signext %idx) nounwind { ret half %c } -define float @extractelt_v4f32_idx(<4 x float>* %x, i32 signext %idx) nounwind { +define float @extractelt_v4f32_idx(<4 x float>* %x, i32 zeroext %idx) nounwind { ; CHECK-LABEL: extractelt_v4f32_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma @@ -352,7 +352,7 @@ define float @extractelt_v4f32_idx(<4 x float>* %x, i32 signext %idx) nounwind { ret float %c } -define double @extractelt_v2f64_idx(<2 x double>* %x, i32 signext %idx) nounwind { +define double @extractelt_v2f64_idx(<2 x double>* %x, i32 zeroext %idx) nounwind { ; CHECK-LABEL: extractelt_v2f64_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma @@ -368,7 +368,7 @@ define double @extractelt_v2f64_idx(<2 x double>* %x, i32 signext %idx) nounwind ret double %c } -define i8 @extractelt_v32i8_idx(<32 x i8>* %x, i32 signext %idx) nounwind { +define i8 @extractelt_v32i8_idx(<32 x i8>* %x, i32 zeroext %idx) nounwind { ; CHECK-LABEL: extractelt_v32i8_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: li a2, 32 @@ -383,7 +383,7 @@ define i8 @extractelt_v32i8_idx(<32 x i8>* %x, i32 signext %idx) nounwind { ret i8 %b } -define i16 @extractelt_v16i16_idx(<16 x i16>* %x, i32 signext %idx) nounwind { +define i16 @extractelt_v16i16_idx(<16 x i16>* %x, i32 zeroext %idx) nounwind { ; CHECK-LABEL: extractelt_v16i16_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma @@ -397,7 +397,7 @@ define i16 @extractelt_v16i16_idx(<16 x i16>* %x, i32 signext %idx) nounwind { ret i16 %b } -define i32 @extractelt_v8i32_idx(<8 x i32>* %x, i32 signext %idx) nounwind { +define i32 @extractelt_v8i32_idx(<8 x i32>* %x, i32 zeroext %idx) nounwind { ; CHECK-LABEL: extractelt_v8i32_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma @@ -413,7 +413,7 @@ define i32 @extractelt_v8i32_idx(<8 x i32>* %x, i32 signext %idx) nounwind { ret i32 %c } -define i64 @extractelt_v4i64_idx(<4 x i64>* %x, i32 signext %idx) nounwind { +define i64 @extractelt_v4i64_idx(<4 x i64>* %x, i32 zeroext %idx) nounwind { ; RV32-LABEL: extractelt_v4i64_idx: ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma @@ -442,7 +442,7 @@ define i64 @extractelt_v4i64_idx(<4 x i64>* %x, i32 signext %idx) nounwind { ret i64 %c } -define half @extractelt_v16f16_idx(<16 x half>* %x, i32 signext %idx) nounwind { +define half @extractelt_v16f16_idx(<16 x half>* %x, i32 zeroext %idx) nounwind { ; CHECK-LABEL: extractelt_v16f16_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma @@ -458,7 +458,7 @@ define half @extractelt_v16f16_idx(<16 x half>* %x, i32 signext %idx) nounwind { ret half %c } -define float @extractelt_v8f32_idx(<8 x float>* %x, i32 signext %idx) nounwind { +define float @extractelt_v8f32_idx(<8 x float>* %x, i32 zeroext %idx) nounwind { ; CHECK-LABEL: extractelt_v8f32_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma @@ -474,7 +474,7 @@ define float @extractelt_v8f32_idx(<8 x float>* %x, i32 signext %idx) nounwind { ret float %c } -define double @extractelt_v4f64_idx(<4 x double>* %x, i32 signext %idx) nounwind { +define double @extractelt_v4f64_idx(<4 x double>* %x, i32 zeroext %idx) nounwind { ; CHECK-LABEL: extractelt_v4f64_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma @@ -491,10 +491,10 @@ define double @extractelt_v4f64_idx(<4 x double>* %x, i32 signext %idx) nounwind } ; This uses a non-power of 2 type so that it isn't an MVT to catch an -; incorrect use of getSimpleValueType_idx(, i32 signext %idx). +; incorrect use of getSimpleValueType_idx(, i32 zeroext %idx). ; NOTE: Type legalization is bitcasting to vXi32 and doing 2 independent ; slidedowns and extracts. -define i64 @extractelt_v3i64_idx(<3 x i64>* %x, i32 signext %idx) nounwind { +define i64 @extractelt_v3i64_idx(<3 x i64>* %x, i32 zeroext %idx) nounwind { ; RV32-LABEL: extractelt_v3i64_idx: ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-i1.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-i1.ll index 02849c9662b291..8e6ff0de2de0bb 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-i1.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-i1.ll @@ -17,34 +17,19 @@ define <1 x i1> @insertelt_v1i1(<1 x i1> %x, i1 %elt) nounwind { } define <1 x i1> @insertelt_idx_v1i1(<1 x i1> %x, i1 %elt, i32 zeroext %idx) nounwind { -; RV32-LABEL: insertelt_idx_v1i1: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, ma -; RV32-NEXT: vmv.s.x v8, a0 -; RV32-NEXT: vmv.v.i v9, 0 -; RV32-NEXT: vmerge.vim v9, v9, 1, v0 -; RV32-NEXT: addi a0, a1, 1 -; RV32-NEXT: vsetvli zero, a0, e8, mf8, tu, ma -; RV32-NEXT: vslideup.vx v9, v8, a1 -; RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, ma -; RV32-NEXT: vand.vi v8, v9, 1 -; RV32-NEXT: vmsne.vi v0, v8, 0 -; RV32-NEXT: ret -; -; RV64-LABEL: insertelt_idx_v1i1: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e8, mf8, ta, ma -; RV64-NEXT: vmv.s.x v8, a0 -; RV64-NEXT: vmv.v.i v9, 0 -; RV64-NEXT: vmerge.vim v9, v9, 1, v0 -; RV64-NEXT: sext.w a0, a1 -; RV64-NEXT: addi a1, a0, 1 -; RV64-NEXT: vsetvli zero, a1, e8, mf8, tu, ma -; RV64-NEXT: vslideup.vx v9, v8, a0 -; RV64-NEXT: vsetivli zero, 1, e8, mf8, ta, ma -; RV64-NEXT: vand.vi v8, v9, 1 -; RV64-NEXT: vmsne.vi v0, v8, 0 -; RV64-NEXT: ret +; CHECK-LABEL: insertelt_idx_v1i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma +; CHECK-NEXT: vmv.s.x v8, a0 +; CHECK-NEXT: vmv.v.i v9, 0 +; CHECK-NEXT: vmerge.vim v9, v9, 1, v0 +; CHECK-NEXT: addi a0, a1, 1 +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, tu, ma +; CHECK-NEXT: vslideup.vx v9, v8, a1 +; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma +; CHECK-NEXT: vand.vi v8, v9, 1 +; CHECK-NEXT: vmsne.vi v0, v8, 0 +; CHECK-NEXT: ret %y = insertelement <1 x i1> %x, i1 %elt, i32 %idx ret <1 x i1> %y } @@ -67,34 +52,19 @@ define <2 x i1> @insertelt_v2i1(<2 x i1> %x, i1 %elt) nounwind { } define <2 x i1> @insertelt_idx_v2i1(<2 x i1> %x, i1 %elt, i32 zeroext %idx) nounwind { -; RV32-LABEL: insertelt_idx_v2i1: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 2, e8, mf8, ta, ma -; RV32-NEXT: vmv.s.x v8, a0 -; RV32-NEXT: vmv.v.i v9, 0 -; RV32-NEXT: vmerge.vim v9, v9, 1, v0 -; RV32-NEXT: addi a0, a1, 1 -; RV32-NEXT: vsetvli zero, a0, e8, mf8, tu, ma -; RV32-NEXT: vslideup.vx v9, v8, a1 -; RV32-NEXT: vsetivli zero, 2, e8, mf8, ta, ma -; RV32-NEXT: vand.vi v8, v9, 1 -; RV32-NEXT: vmsne.vi v0, v8, 0 -; RV32-NEXT: ret -; -; RV64-LABEL: insertelt_idx_v2i1: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 2, e8, mf8, ta, ma -; RV64-NEXT: vmv.s.x v8, a0 -; RV64-NEXT: vmv.v.i v9, 0 -; RV64-NEXT: vmerge.vim v9, v9, 1, v0 -; RV64-NEXT: sext.w a0, a1 -; RV64-NEXT: addi a1, a0, 1 -; RV64-NEXT: vsetvli zero, a1, e8, mf8, tu, ma -; RV64-NEXT: vslideup.vx v9, v8, a0 -; RV64-NEXT: vsetivli zero, 2, e8, mf8, ta, ma -; RV64-NEXT: vand.vi v8, v9, 1 -; RV64-NEXT: vmsne.vi v0, v8, 0 -; RV64-NEXT: ret +; CHECK-LABEL: insertelt_idx_v2i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; CHECK-NEXT: vmv.s.x v8, a0 +; CHECK-NEXT: vmv.v.i v9, 0 +; CHECK-NEXT: vmerge.vim v9, v9, 1, v0 +; CHECK-NEXT: addi a0, a1, 1 +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, tu, ma +; CHECK-NEXT: vslideup.vx v9, v8, a1 +; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; CHECK-NEXT: vand.vi v8, v9, 1 +; CHECK-NEXT: vmsne.vi v0, v8, 0 +; CHECK-NEXT: ret %y = insertelement <2 x i1> %x, i1 %elt, i32 %idx ret <2 x i1> %y } @@ -117,34 +87,19 @@ define <8 x i1> @insertelt_v8i1(<8 x i1> %x, i1 %elt) nounwind { } define <8 x i1> @insertelt_idx_v8i1(<8 x i1> %x, i1 %elt, i32 zeroext %idx) nounwind { -; RV32-LABEL: insertelt_idx_v8i1: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV32-NEXT: vmv.s.x v8, a0 -; RV32-NEXT: vmv.v.i v9, 0 -; RV32-NEXT: vmerge.vim v9, v9, 1, v0 -; RV32-NEXT: addi a0, a1, 1 -; RV32-NEXT: vsetvli zero, a0, e8, mf2, tu, ma -; RV32-NEXT: vslideup.vx v9, v8, a1 -; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV32-NEXT: vand.vi v8, v9, 1 -; RV32-NEXT: vmsne.vi v0, v8, 0 -; RV32-NEXT: ret -; -; RV64-LABEL: insertelt_idx_v8i1: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV64-NEXT: vmv.s.x v8, a0 -; RV64-NEXT: vmv.v.i v9, 0 -; RV64-NEXT: vmerge.vim v9, v9, 1, v0 -; RV64-NEXT: sext.w a0, a1 -; RV64-NEXT: addi a1, a0, 1 -; RV64-NEXT: vsetvli zero, a1, e8, mf2, tu, ma -; RV64-NEXT: vslideup.vx v9, v8, a0 -; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV64-NEXT: vand.vi v8, v9, 1 -; RV64-NEXT: vmsne.vi v0, v8, 0 -; RV64-NEXT: ret +; CHECK-LABEL: insertelt_idx_v8i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vmv.s.x v8, a0 +; CHECK-NEXT: vmv.v.i v9, 0 +; CHECK-NEXT: vmerge.vim v9, v9, 1, v0 +; CHECK-NEXT: addi a0, a1, 1 +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, tu, ma +; CHECK-NEXT: vslideup.vx v9, v8, a1 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vand.vi v8, v9, 1 +; CHECK-NEXT: vmsne.vi v0, v8, 0 +; CHECK-NEXT: ret %y = insertelement <8 x i1> %x, i1 %elt, i32 %idx ret <8 x i1> %y } @@ -168,36 +123,23 @@ define <64 x i1> @insertelt_v64i1(<64 x i1> %x, i1 %elt) nounwind { } define <64 x i1> @insertelt_idx_v64i1(<64 x i1> %x, i1 %elt, i32 zeroext %idx) nounwind { -; RV32-LABEL: insertelt_idx_v64i1: -; RV32: # %bb.0: -; RV32-NEXT: li a2, 64 -; RV32-NEXT: vsetvli zero, a2, e8, m4, ta, ma -; RV32-NEXT: vmv.s.x v8, a0 -; RV32-NEXT: vmv.v.i v12, 0 -; RV32-NEXT: vmerge.vim v12, v12, 1, v0 -; RV32-NEXT: addi a0, a1, 1 -; RV32-NEXT: vsetvli zero, a0, e8, m4, tu, ma -; RV32-NEXT: vslideup.vx v12, v8, a1 -; RV32-NEXT: vsetvli zero, a2, e8, m4, ta, ma -; RV32-NEXT: vand.vi v8, v12, 1 -; RV32-NEXT: vmsne.vi v0, v8, 0 -; RV32-NEXT: ret -; -; RV64-LABEL: insertelt_idx_v64i1: -; RV64: # %bb.0: -; RV64-NEXT: li a2, 64 -; RV64-NEXT: vsetvli zero, a2, e8, m4, ta, ma -; RV64-NEXT: vmv.s.x v8, a0 -; RV64-NEXT: vmv.v.i v12, 0 -; RV64-NEXT: vmerge.vim v12, v12, 1, v0 -; RV64-NEXT: sext.w a0, a1 -; RV64-NEXT: addi a1, a0, 1 -; RV64-NEXT: vsetvli zero, a1, e8, m4, tu, ma -; RV64-NEXT: vslideup.vx v12, v8, a0 -; RV64-NEXT: vsetvli zero, a2, e8, m4, ta, ma -; RV64-NEXT: vand.vi v8, v12, 1 -; RV64-NEXT: vmsne.vi v0, v8, 0 -; RV64-NEXT: ret +; CHECK-LABEL: insertelt_idx_v64i1: +; CHECK: # %bb.0: +; CHECK-NEXT: li a2, 64 +; CHECK-NEXT: vsetvli zero, a2, e8, m4, ta, ma +; CHECK-NEXT: vmv.s.x v8, a0 +; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vmerge.vim v12, v12, 1, v0 +; CHECK-NEXT: addi a0, a1, 1 +; CHECK-NEXT: vsetvli zero, a0, e8, m4, tu, ma +; CHECK-NEXT: vslideup.vx v12, v8, a1 +; CHECK-NEXT: vsetvli zero, a2, e8, m4, ta, ma +; CHECK-NEXT: vand.vi v8, v12, 1 +; CHECK-NEXT: vmsne.vi v0, v8, 0 +; CHECK-NEXT: ret %y = insertelement <64 x i1> %x, i1 %elt, i32 %idx ret <64 x i1> %y } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; RV32: {{.*}} +; RV64: {{.*}} diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll index 173e6906f5e724..d86535f2d18444 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll @@ -110,7 +110,8 @@ define void @insertelt_v32i16(<32 x i16>* %x, i16 %y, i32 %idx) { ; RV64-NEXT: vsetvli zero, a3, e16, m4, ta, ma ; RV64-NEXT: vle16.v v8, (a0) ; RV64-NEXT: vmv.s.x v12, a1 -; RV64-NEXT: sext.w a1, a2 +; RV64-NEXT: slli a1, a2, 32 +; RV64-NEXT: srli a1, a1, 32 ; RV64-NEXT: addi a2, a1, 1 ; RV64-NEXT: vsetvli zero, a2, e16, m4, tu, ma ; RV64-NEXT: vslideup.vx v8, v12, a1 @@ -141,7 +142,8 @@ define void @insertelt_v8f32(<8 x float>* %x, float %y, i32 %idx) { ; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV64-NEXT: vle32.v v8, (a0) ; RV64-NEXT: vfmv.s.f v10, fa0 -; RV64-NEXT: sext.w a1, a1 +; RV64-NEXT: slli a1, a1, 32 +; RV64-NEXT: srli a1, a1, 32 ; RV64-NEXT: addi a2, a1, 1 ; RV64-NEXT: vsetvli zero, a2, e32, m2, tu, ma ; RV64-NEXT: vslideup.vx v8, v10, a1 @@ -190,7 +192,8 @@ define void @insertelt_v8i64(<8 x i64>* %x, i32 %idx) { ; RV64-NEXT: vle64.v v8, (a0) ; RV64-NEXT: li a2, -1 ; RV64-NEXT: vmv.s.x v12, a2 -; RV64-NEXT: sext.w a1, a1 +; RV64-NEXT: slli a1, a1, 32 +; RV64-NEXT: srli a1, a1, 32 ; RV64-NEXT: addi a2, a1, 1 ; RV64-NEXT: vsetvli zero, a2, e64, m4, tu, ma ; RV64-NEXT: vslideup.vx v8, v12, a1 @@ -239,7 +242,8 @@ define void @insertelt_c6_v8i64(<8 x i64>* %x, i32 %idx) { ; RV64-NEXT: vle64.v v8, (a0) ; RV64-NEXT: li a2, 6 ; RV64-NEXT: vmv.s.x v12, a2 -; RV64-NEXT: sext.w a1, a1 +; RV64-NEXT: slli a1, a1, 32 +; RV64-NEXT: srli a1, a1, 32 ; RV64-NEXT: addi a2, a1, 1 ; RV64-NEXT: vsetvli zero, a2, e64, m4, tu, ma ; RV64-NEXT: vslideup.vx v8, v12, a1 diff --git a/llvm/test/CodeGen/RISCV/rvv/insertelt-fp.ll b/llvm/test/CodeGen/RISCV/rvv/insertelt-fp.ll index 57e149afea51fa..9184e199da4764 100644 --- a/llvm/test/CodeGen/RISCV/rvv/insertelt-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/insertelt-fp.ll @@ -26,7 +26,7 @@ define @insertelt_nxv1f16_imm( %v, half % ret %r } -define @insertelt_nxv1f16_idx( %v, half %elt, i32 signext %idx) { +define @insertelt_nxv1f16_idx( %v, half %elt, i32 zeroext %idx) { ; CHECK-LABEL: insertelt_nxv1f16_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma @@ -61,7 +61,7 @@ define @insertelt_nxv2f16_imm( %v, half % ret %r } -define @insertelt_nxv2f16_idx( %v, half %elt, i32 signext %idx) { +define @insertelt_nxv2f16_idx( %v, half %elt, i32 zeroext %idx) { ; CHECK-LABEL: insertelt_nxv2f16_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma @@ -96,7 +96,7 @@ define @insertelt_nxv4f16_imm( %v, half % ret %r } -define @insertelt_nxv4f16_idx( %v, half %elt, i32 signext %idx) { +define @insertelt_nxv4f16_idx( %v, half %elt, i32 zeroext %idx) { ; CHECK-LABEL: insertelt_nxv4f16_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma @@ -131,7 +131,7 @@ define @insertelt_nxv8f16_imm( %v, half % ret %r } -define @insertelt_nxv8f16_idx( %v, half %elt, i32 signext %idx) { +define @insertelt_nxv8f16_idx( %v, half %elt, i32 zeroext %idx) { ; CHECK-LABEL: insertelt_nxv8f16_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma @@ -166,7 +166,7 @@ define @insertelt_nxv16f16_imm( %v, hal ret %r } -define @insertelt_nxv16f16_idx( %v, half %elt, i32 signext %idx) { +define @insertelt_nxv16f16_idx( %v, half %elt, i32 zeroext %idx) { ; CHECK-LABEL: insertelt_nxv16f16_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma @@ -201,7 +201,7 @@ define @insertelt_nxv32f16_imm( %v, hal ret %r } -define @insertelt_nxv32f16_idx( %v, half %elt, i32 signext %idx) { +define @insertelt_nxv32f16_idx( %v, half %elt, i32 zeroext %idx) { ; CHECK-LABEL: insertelt_nxv32f16_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, ma @@ -236,7 +236,7 @@ define @insertelt_nxv1f32_imm( %v, floa ret %r } -define @insertelt_nxv1f32_idx( %v, float %elt, i32 signext %idx) { +define @insertelt_nxv1f32_idx( %v, float %elt, i32 zeroext %idx) { ; CHECK-LABEL: insertelt_nxv1f32_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e32, mf2, ta, ma @@ -271,7 +271,7 @@ define @insertelt_nxv2f32_imm( %v, floa ret %r } -define @insertelt_nxv2f32_idx( %v, float %elt, i32 signext %idx) { +define @insertelt_nxv2f32_idx( %v, float %elt, i32 zeroext %idx) { ; CHECK-LABEL: insertelt_nxv2f32_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma @@ -306,7 +306,7 @@ define @insertelt_nxv4f32_imm( %v, floa ret %r } -define @insertelt_nxv4f32_idx( %v, float %elt, i32 signext %idx) { +define @insertelt_nxv4f32_idx( %v, float %elt, i32 zeroext %idx) { ; CHECK-LABEL: insertelt_nxv4f32_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, ma @@ -341,7 +341,7 @@ define @insertelt_nxv8f32_imm( %v, floa ret %r } -define @insertelt_nxv8f32_idx( %v, float %elt, i32 signext %idx) { +define @insertelt_nxv8f32_idx( %v, float %elt, i32 zeroext %idx) { ; CHECK-LABEL: insertelt_nxv8f32_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma @@ -376,7 +376,7 @@ define @insertelt_nxv16f32_imm( %v, f ret %r } -define @insertelt_nxv16f32_idx( %v, float %elt, i32 signext %idx) { +define @insertelt_nxv16f32_idx( %v, float %elt, i32 zeroext %idx) { ; CHECK-LABEL: insertelt_nxv16f32_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e32, m8, ta, ma @@ -411,7 +411,7 @@ define @insertelt_nxv1f64_imm( %v, do ret %r } -define @insertelt_nxv1f64_idx( %v, double %elt, i32 signext %idx) { +define @insertelt_nxv1f64_idx( %v, double %elt, i32 zeroext %idx) { ; CHECK-LABEL: insertelt_nxv1f64_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, ma @@ -446,7 +446,7 @@ define @insertelt_nxv2f64_imm( %v, do ret %r } -define @insertelt_nxv2f64_idx( %v, double %elt, i32 signext %idx) { +define @insertelt_nxv2f64_idx( %v, double %elt, i32 zeroext %idx) { ; CHECK-LABEL: insertelt_nxv2f64_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e64, m2, ta, ma @@ -481,7 +481,7 @@ define @insertelt_nxv4f64_imm( %v, do ret %r } -define @insertelt_nxv4f64_idx( %v, double %elt, i32 signext %idx) { +define @insertelt_nxv4f64_idx( %v, double %elt, i32 zeroext %idx) { ; CHECK-LABEL: insertelt_nxv4f64_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e64, m4, ta, ma @@ -516,7 +516,7 @@ define @insertelt_nxv8f64_imm( %v, do ret %r } -define @insertelt_nxv8f64_idx( %v, double %elt, i32 signext %idx) { +define @insertelt_nxv8f64_idx( %v, double %elt, i32 zeroext %idx) { ; CHECK-LABEL: insertelt_nxv8f64_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e64, m8, ta, ma diff --git a/llvm/test/CodeGen/RISCV/rvv/insertelt-int-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/insertelt-int-rv64.ll index 0638a07e44bef9..46b8085d7fe77a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/insertelt-int-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/insertelt-int-rv64.ll @@ -24,7 +24,7 @@ define @insertelt_nxv1i8_imm( %v, i8 signext ret %r } -define @insertelt_nxv1i8_idx( %v, i8 signext %elt, i32 signext %idx) { +define @insertelt_nxv1i8_idx( %v, i8 signext %elt, i32 zeroext %idx) { ; CHECK-LABEL: insertelt_nxv1i8_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a2, zero, e8, mf8, ta, ma @@ -59,7 +59,7 @@ define @insertelt_nxv2i8_imm( %v, i8 signext ret %r } -define @insertelt_nxv2i8_idx( %v, i8 signext %elt, i32 signext %idx) { +define @insertelt_nxv2i8_idx( %v, i8 signext %elt, i32 zeroext %idx) { ; CHECK-LABEL: insertelt_nxv2i8_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a2, zero, e8, mf4, ta, ma @@ -94,7 +94,7 @@ define @insertelt_nxv4i8_imm( %v, i8 signext ret %r } -define @insertelt_nxv4i8_idx( %v, i8 signext %elt, i32 signext %idx) { +define @insertelt_nxv4i8_idx( %v, i8 signext %elt, i32 zeroext %idx) { ; CHECK-LABEL: insertelt_nxv4i8_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a2, zero, e8, mf2, ta, ma @@ -129,7 +129,7 @@ define @insertelt_nxv8i8_imm( %v, i8 signext ret %r } -define @insertelt_nxv8i8_idx( %v, i8 signext %elt, i32 signext %idx) { +define @insertelt_nxv8i8_idx( %v, i8 signext %elt, i32 zeroext %idx) { ; CHECK-LABEL: insertelt_nxv8i8_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a2, zero, e8, m1, ta, ma @@ -164,7 +164,7 @@ define @insertelt_nxv16i8_imm( %v, i8 signe ret %r } -define @insertelt_nxv16i8_idx( %v, i8 signext %elt, i32 signext %idx) { +define @insertelt_nxv16i8_idx( %v, i8 signext %elt, i32 zeroext %idx) { ; CHECK-LABEL: insertelt_nxv16i8_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a2, zero, e8, m2, ta, ma @@ -199,7 +199,7 @@ define @insertelt_nxv32i8_imm( %v, i8 signe ret %r } -define @insertelt_nxv32i8_idx( %v, i8 signext %elt, i32 signext %idx) { +define @insertelt_nxv32i8_idx( %v, i8 signext %elt, i32 zeroext %idx) { ; CHECK-LABEL: insertelt_nxv32i8_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a2, zero, e8, m4, ta, ma @@ -234,7 +234,7 @@ define @insertelt_nxv64i8_imm( %v, i8 signe ret %r } -define @insertelt_nxv64i8_idx( %v, i8 signext %elt, i32 signext %idx) { +define @insertelt_nxv64i8_idx( %v, i8 signext %elt, i32 zeroext %idx) { ; CHECK-LABEL: insertelt_nxv64i8_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a2, zero, e8, m8, ta, ma @@ -269,7 +269,7 @@ define @insertelt_nxv1i16_imm( %v, i16 sign ret %r } -define @insertelt_nxv1i16_idx( %v, i16 signext %elt, i32 signext %idx) { +define @insertelt_nxv1i16_idx( %v, i16 signext %elt, i32 zeroext %idx) { ; CHECK-LABEL: insertelt_nxv1i16_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a2, zero, e16, mf4, ta, ma @@ -304,7 +304,7 @@ define @insertelt_nxv2i16_imm( %v, i16 sign ret %r } -define @insertelt_nxv2i16_idx( %v, i16 signext %elt, i32 signext %idx) { +define @insertelt_nxv2i16_idx( %v, i16 signext %elt, i32 zeroext %idx) { ; CHECK-LABEL: insertelt_nxv2i16_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a2, zero, e16, mf2, ta, ma @@ -339,7 +339,7 @@ define @insertelt_nxv4i16_imm( %v, i16 sign ret %r } -define @insertelt_nxv4i16_idx( %v, i16 signext %elt, i32 signext %idx) { +define @insertelt_nxv4i16_idx( %v, i16 signext %elt, i32 zeroext %idx) { ; CHECK-LABEL: insertelt_nxv4i16_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma @@ -374,7 +374,7 @@ define @insertelt_nxv8i16_imm( %v, i16 sign ret %r } -define @insertelt_nxv8i16_idx( %v, i16 signext %elt, i32 signext %idx) { +define @insertelt_nxv8i16_idx( %v, i16 signext %elt, i32 zeroext %idx) { ; CHECK-LABEL: insertelt_nxv8i16_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a2, zero, e16, m2, ta, ma @@ -409,7 +409,7 @@ define @insertelt_nxv16i16_imm( %v, i16 s ret %r } -define @insertelt_nxv16i16_idx( %v, i16 signext %elt, i32 signext %idx) { +define @insertelt_nxv16i16_idx( %v, i16 signext %elt, i32 zeroext %idx) { ; CHECK-LABEL: insertelt_nxv16i16_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma @@ -444,7 +444,7 @@ define @insertelt_nxv32i16_imm( %v, i16 s ret %r } -define @insertelt_nxv32i16_idx( %v, i16 signext %elt, i32 signext %idx) { +define @insertelt_nxv32i16_idx( %v, i16 signext %elt, i32 zeroext %idx) { ; CHECK-LABEL: insertelt_nxv32i16_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a2, zero, e16, m8, ta, ma @@ -479,7 +479,7 @@ define @insertelt_nxv1i32_imm( %v, i32 sign ret %r } -define @insertelt_nxv1i32_idx( %v, i32 signext %elt, i32 signext %idx) { +define @insertelt_nxv1i32_idx( %v, i32 signext %elt, i32 zeroext %idx) { ; CHECK-LABEL: insertelt_nxv1i32_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a2, zero, e32, mf2, ta, ma @@ -514,7 +514,7 @@ define @insertelt_nxv2i32_imm( %v, i32 sign ret %r } -define @insertelt_nxv2i32_idx( %v, i32 signext %elt, i32 signext %idx) { +define @insertelt_nxv2i32_idx( %v, i32 signext %elt, i32 zeroext %idx) { ; CHECK-LABEL: insertelt_nxv2i32_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a2, zero, e32, m1, ta, ma @@ -549,7 +549,7 @@ define @insertelt_nxv4i32_imm( %v, i32 sign ret %r } -define @insertelt_nxv4i32_idx( %v, i32 signext %elt, i32 signext %idx) { +define @insertelt_nxv4i32_idx( %v, i32 signext %elt, i32 zeroext %idx) { ; CHECK-LABEL: insertelt_nxv4i32_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a2, zero, e32, m2, ta, ma @@ -584,7 +584,7 @@ define @insertelt_nxv8i32_imm( %v, i32 sign ret %r } -define @insertelt_nxv8i32_idx( %v, i32 signext %elt, i32 signext %idx) { +define @insertelt_nxv8i32_idx( %v, i32 signext %elt, i32 zeroext %idx) { ; CHECK-LABEL: insertelt_nxv8i32_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a2, zero, e32, m4, ta, ma @@ -619,7 +619,7 @@ define @insertelt_nxv16i32_imm( %v, i32 s ret %r } -define @insertelt_nxv16i32_idx( %v, i32 signext %elt, i32 signext %idx) { +define @insertelt_nxv16i32_idx( %v, i32 signext %elt, i32 zeroext %idx) { ; CHECK-LABEL: insertelt_nxv16i32_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a2, zero, e32, m8, ta, ma @@ -659,7 +659,8 @@ define @insertelt_nxv1i64_idx( %v, i64 %elt ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a2, zero, e64, m1, ta, ma ; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: sext.w a0, a1 +; CHECK-NEXT: slli a0, a1, 32 +; CHECK-NEXT: srli a0, a0, 32 ; CHECK-NEXT: addi a1, a0, 1 ; CHECK-NEXT: vsetvli zero, a1, e64, m1, tu, ma ; CHECK-NEXT: vslideup.vx v8, v9, a0 @@ -695,7 +696,8 @@ define @insertelt_nxv2i64_idx( %v, i64 %elt ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a2, zero, e64, m2, ta, ma ; CHECK-NEXT: vmv.s.x v10, a0 -; CHECK-NEXT: sext.w a0, a1 +; CHECK-NEXT: slli a0, a1, 32 +; CHECK-NEXT: srli a0, a0, 32 ; CHECK-NEXT: addi a1, a0, 1 ; CHECK-NEXT: vsetvli zero, a1, e64, m2, tu, ma ; CHECK-NEXT: vslideup.vx v8, v10, a0 @@ -731,7 +733,8 @@ define @insertelt_nxv4i64_idx( %v, i64 %elt ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a2, zero, e64, m4, ta, ma ; CHECK-NEXT: vmv.s.x v12, a0 -; CHECK-NEXT: sext.w a0, a1 +; CHECK-NEXT: slli a0, a1, 32 +; CHECK-NEXT: srli a0, a0, 32 ; CHECK-NEXT: addi a1, a0, 1 ; CHECK-NEXT: vsetvli zero, a1, e64, m4, tu, ma ; CHECK-NEXT: vslideup.vx v8, v12, a0 @@ -767,7 +770,8 @@ define @insertelt_nxv8i64_idx( %v, i64 %elt ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; CHECK-NEXT: vmv.s.x v16, a0 -; CHECK-NEXT: sext.w a0, a1 +; CHECK-NEXT: slli a0, a1, 32 +; CHECK-NEXT: srli a0, a0, 32 ; CHECK-NEXT: addi a1, a0, 1 ; CHECK-NEXT: vsetvli zero, a1, e64, m8, tu, ma ; CHECK-NEXT: vslideup.vx v8, v16, a0 diff --git a/llvm/test/CodeGen/VE/Vector/extract_elt.ll b/llvm/test/CodeGen/VE/Vector/extract_elt.ll index c9c2228f74b0fc..132abc87ef110d 100644 --- a/llvm/test/CodeGen/VE/Vector/extract_elt.ll +++ b/llvm/test/CodeGen/VE/Vector/extract_elt.ll @@ -6,6 +6,7 @@ define fastcc i64 @extract_rr_v256i64(i32 signext %idx, <256 x i64> %v) { ; CHECK-LABEL: extract_rr_v256i64: ; CHECK: # %bb.0: +; CHECK-NEXT: and %s0, %s0, (32)0 ; CHECK-NEXT: lvs %s0, %v0(%s0) ; CHECK-NEXT: b.l.t (, %s10) %ret = extractelement <256 x i64> %v, i32 %idx @@ -45,6 +46,7 @@ define fastcc i64 @extract_ri_v512i64(<512 x i64> %v) { define fastcc i32 @extract_rr_v256i32(i32 signext %idx, <256 x i32> %v) { ; CHECK-LABEL: extract_rr_v256i32: ; CHECK: # %bb.0: +; CHECK-NEXT: and %s0, %s0, (32)0 ; CHECK-NEXT: lvs %s0, %v0(%s0) ; CHECK-NEXT: b.l.t (, %s10) %ret = extractelement <256 x i32> %v, i32 %idx @@ -84,7 +86,10 @@ define fastcc i32 @extract_ri_v512i32(<512 x i32> %v) { define fastcc i32 @extract_rr_v512i32(<512 x i32> %v, i32 signext %idx) { ; CHECK-LABEL: extract_rr_v512i32: ; CHECK: # %bb.0: -; CHECK-NEXT: srl %s1, %s0, 1 +; CHECK-NEXT: lea %s1, -2 +; CHECK-NEXT: and %s1, %s1, (32)0 +; CHECK-NEXT: and %s1, %s0, %s1 +; CHECK-NEXT: srl %s1, %s1, 1 ; CHECK-NEXT: lvs %s1, %v0(%s1) ; CHECK-NEXT: nnd %s0, %s0, (63)0 ; CHECK-NEXT: sla.w.sx %s0, %s0, 5 @@ -100,6 +105,7 @@ define fastcc i32 @extract_rr_v512i32(<512 x i32> %v, i32 signext %idx) { define fastcc double @extract_rr_v256f64(i32 signext %idx, <256 x double> %v) { ; CHECK-LABEL: extract_rr_v256f64: ; CHECK: # %bb.0: +; CHECK-NEXT: and %s0, %s0, (32)0 ; CHECK-NEXT: lvs %s0, %v0(%s0) ; CHECK-NEXT: b.l.t (, %s10) %ret = extractelement <256 x double> %v, i32 %idx @@ -139,6 +145,7 @@ define fastcc double @extract_ri_v512f64(<512 x double> %v) { define fastcc float @extract_rr_v256f32(i32 signext %idx, <256 x float> %v) { ; CHECK-LABEL: extract_rr_v256f32: ; CHECK: # %bb.0: +; CHECK-NEXT: and %s0, %s0, (32)0 ; CHECK-NEXT: lvs %s0, %v0(%s0) ; CHECK-NEXT: b.l.t (, %s10) %ret = extractelement <256 x float> %v, i32 %idx @@ -179,7 +186,10 @@ define fastcc float @extract_ri_v512f32(<512 x float> %v) { define fastcc float @extract_rr_v512f32(<512 x float> %v, i32 signext %idx) { ; CHECK-LABEL: extract_rr_v512f32: ; CHECK: # %bb.0: -; CHECK-NEXT: srl %s1, %s0, 1 +; CHECK-NEXT: lea %s1, -2 +; CHECK-NEXT: and %s1, %s1, (32)0 +; CHECK-NEXT: and %s1, %s0, %s1 +; CHECK-NEXT: srl %s1, %s1, 1 ; CHECK-NEXT: lvs %s1, %v0(%s1) ; CHECK-NEXT: nnd %s0, %s0, (63)0 ; CHECK-NEXT: sla.w.sx %s0, %s0, 5 diff --git a/llvm/test/CodeGen/VE/Vector/insert_elt.ll b/llvm/test/CodeGen/VE/Vector/insert_elt.ll index 3004699e26d4e9..e44c58cf1b0655 100644 --- a/llvm/test/CodeGen/VE/Vector/insert_elt.ll +++ b/llvm/test/CodeGen/VE/Vector/insert_elt.ll @@ -6,6 +6,7 @@ define fastcc <256 x i64> @insert_rr_v256i64(i32 signext %idx, i64 %s) { ; CHECK-LABEL: insert_rr_v256i64: ; CHECK: # %bb.0: +; CHECK-NEXT: and %s0, %s0, (32)0 ; CHECK-NEXT: lsv %v0(%s0), %s1 ; CHECK-NEXT: b.l.t (, %s10) %ret = insertelement <256 x i64> undef, i64 %s, i32 %idx @@ -46,6 +47,7 @@ define fastcc <256 x i32> @insert_rr_v256i32(i32 signext %idx, i32 signext %s) { ; CHECK-LABEL: insert_rr_v256i32: ; CHECK: # %bb.0: ; CHECK-NEXT: and %s1, %s1, (32)0 +; CHECK-NEXT: and %s0, %s0, (32)0 ; CHECK-NEXT: lsv %v0(%s0), %s1 ; CHECK-NEXT: b.l.t (, %s10) %ret = insertelement <256 x i32> undef, i32 %s, i32 %idx @@ -94,6 +96,9 @@ define fastcc <512 x i32> @insert_rr_v512i32(i32 signext %idx, i32 signext %s) { ; CHECK-NEXT: nnd %s2, %s0, (63)0 ; CHECK-NEXT: sla.w.sx %s2, %s2, 5 ; CHECK-NEXT: sll %s1, %s1, %s2 +; CHECK-NEXT: lea %s3, -2 +; CHECK-NEXT: and %s3, %s3, (32)0 +; CHECK-NEXT: and %s0, %s0, %s3 ; CHECK-NEXT: srl %s0, %s0, 1 ; CHECK-NEXT: lvs %s3, %v0(%s0) ; CHECK-NEXT: srl %s2, (32)1, %s2 @@ -110,6 +115,7 @@ define fastcc <512 x i32> @insert_rr_v512i32(i32 signext %idx, i32 signext %s) { define fastcc <256 x double> @insert_rr_v256f64(i32 signext %idx, double %s) { ; CHECK-LABEL: insert_rr_v256f64: ; CHECK: # %bb.0: +; CHECK-NEXT: and %s0, %s0, (32)0 ; CHECK-NEXT: lsv %v0(%s0), %s1 ; CHECK-NEXT: b.l.t (, %s10) %ret = insertelement <256 x double> undef, double %s, i32 %idx @@ -149,6 +155,7 @@ define fastcc <512 x double> @insert_ri_v512f64(double %s) { define fastcc <256 x float> @insert_rr_v256f32(i32 signext %idx, float %s) { ; CHECK-LABEL: insert_rr_v256f32: ; CHECK: # %bb.0: +; CHECK-NEXT: and %s0, %s0, (32)0 ; CHECK-NEXT: lsv %v0(%s0), %s1 ; CHECK-NEXT: b.l.t (, %s10) %ret = insertelement <256 x float> undef, float %s, i32 %idx @@ -193,7 +200,10 @@ define fastcc <512 x float> @insert_rr_v512f32(i32 signext %idx, float %s) { ; CHECK-LABEL: insert_rr_v512f32: ; CHECK: # %bb.0: ; CHECK-NEXT: sra.l %s1, %s1, 32 -; CHECK-NEXT: srl %s2, %s0, 1 +; CHECK-NEXT: lea %s2, -2 +; CHECK-NEXT: and %s2, %s2, (32)0 +; CHECK-NEXT: and %s2, %s0, %s2 +; CHECK-NEXT: srl %s2, %s2, 1 ; CHECK-NEXT: lvs %s3, %v0(%s2) ; CHECK-NEXT: nnd %s0, %s0, (63)0 ; CHECK-NEXT: sla.w.sx %s0, %s0, 5 diff --git a/llvm/test/CodeGen/WebAssembly/simd-build-vector.ll b/llvm/test/CodeGen/WebAssembly/simd-build-vector.ll index 9be4176ab6beb2..1cc05fcf80f15f 100644 --- a/llvm/test/CodeGen/WebAssembly/simd-build-vector.ll +++ b/llvm/test/CodeGen/WebAssembly/simd-build-vector.ll @@ -97,8 +97,17 @@ define <16 x i8> @swizzle_one_i8x16(<16 x i8> %src, <16 x i8> %mask) { ; CHECK-LABEL: swizzle_one_i8x16: ; CHECK: .functype swizzle_one_i8x16 (v128, v128) -> (v128) ; CHECK-NEXT: # %bb.0: -; CHECK-NEXT: i8x16.swizzle $push0=, $0, $1 -; CHECK-NEXT: return $pop0 +; CHECK-NEXT: global.get $push5=, __stack_pointer +; CHECK-NEXT: i32.const $push6=, 16 +; CHECK-NEXT: i32.sub $push8=, $pop5, $pop6 +; CHECK-NEXT: local.tee $push7=, $2=, $pop8 +; CHECK-NEXT: v128.store 0($pop7), $0 +; CHECK-NEXT: i8x16.extract_lane_u $push0=, $1, 0 +; CHECK-NEXT: i32.const $push1=, 15 +; CHECK-NEXT: i32.and $push2=, $pop0, $pop1 +; CHECK-NEXT: i32.or $push3=, $2, $pop2 +; CHECK-NEXT: v128.load8_splat $push4=, 0($pop3) +; CHECK-NEXT: return $pop4 %m0 = extractelement <16 x i8> %mask, i32 0 %s0 = extractelement <16 x i8> %src, i8 %m0 %v0 = insertelement <16 x i8> undef, i8 %s0, i32 0 @@ -109,8 +118,107 @@ define <16 x i8> @swizzle_all_i8x16(<16 x i8> %src, <16 x i8> %mask) { ; CHECK-LABEL: swizzle_all_i8x16: ; CHECK: .functype swizzle_all_i8x16 (v128, v128) -> (v128) ; CHECK-NEXT: # %bb.0: -; CHECK-NEXT: i8x16.swizzle $push0=, $0, $1 -; CHECK-NEXT: return $pop0 +; CHECK-NEXT: global.get $push80=, __stack_pointer +; CHECK-NEXT: i32.const $push81=, 16 +; CHECK-NEXT: i32.sub $push98=, $pop80, $pop81 +; CHECK-NEXT: local.tee $push97=, $2=, $pop98 +; CHECK-NEXT: v128.store 0($pop97), $0 +; CHECK-NEXT: i8x16.extract_lane_u $push61=, $1, 0 +; CHECK-NEXT: i32.const $push1=, 15 +; CHECK-NEXT: i32.and $push62=, $pop61, $pop1 +; CHECK-NEXT: i32.or $push63=, $2, $pop62 +; CHECK-NEXT: v128.load8_splat $push64=, 0($pop63) +; CHECK-NEXT: i8x16.extract_lane_u $push57=, $1, 1 +; CHECK-NEXT: i32.const $push96=, 15 +; CHECK-NEXT: i32.and $push58=, $pop57, $pop96 +; CHECK-NEXT: i32.or $push59=, $2, $pop58 +; CHECK-NEXT: i32.load8_u $push60=, 0($pop59) +; CHECK-NEXT: i8x16.replace_lane $push65=, $pop64, 1, $pop60 +; CHECK-NEXT: i8x16.extract_lane_u $push53=, $1, 2 +; CHECK-NEXT: i32.const $push95=, 15 +; CHECK-NEXT: i32.and $push54=, $pop53, $pop95 +; CHECK-NEXT: i32.or $push55=, $2, $pop54 +; CHECK-NEXT: i32.load8_u $push56=, 0($pop55) +; CHECK-NEXT: i8x16.replace_lane $push66=, $pop65, 2, $pop56 +; CHECK-NEXT: i8x16.extract_lane_u $push49=, $1, 3 +; CHECK-NEXT: i32.const $push94=, 15 +; CHECK-NEXT: i32.and $push50=, $pop49, $pop94 +; CHECK-NEXT: i32.or $push51=, $2, $pop50 +; CHECK-NEXT: i32.load8_u $push52=, 0($pop51) +; CHECK-NEXT: i8x16.replace_lane $push67=, $pop66, 3, $pop52 +; CHECK-NEXT: i8x16.extract_lane_u $push45=, $1, 4 +; CHECK-NEXT: i32.const $push93=, 15 +; CHECK-NEXT: i32.and $push46=, $pop45, $pop93 +; CHECK-NEXT: i32.or $push47=, $2, $pop46 +; CHECK-NEXT: i32.load8_u $push48=, 0($pop47) +; CHECK-NEXT: i8x16.replace_lane $push68=, $pop67, 4, $pop48 +; CHECK-NEXT: i8x16.extract_lane_u $push41=, $1, 5 +; CHECK-NEXT: i32.const $push92=, 15 +; CHECK-NEXT: i32.and $push42=, $pop41, $pop92 +; CHECK-NEXT: i32.or $push43=, $2, $pop42 +; CHECK-NEXT: i32.load8_u $push44=, 0($pop43) +; CHECK-NEXT: i8x16.replace_lane $push69=, $pop68, 5, $pop44 +; CHECK-NEXT: i8x16.extract_lane_u $push37=, $1, 6 +; CHECK-NEXT: i32.const $push91=, 15 +; CHECK-NEXT: i32.and $push38=, $pop37, $pop91 +; CHECK-NEXT: i32.or $push39=, $2, $pop38 +; CHECK-NEXT: i32.load8_u $push40=, 0($pop39) +; CHECK-NEXT: i8x16.replace_lane $push70=, $pop69, 6, $pop40 +; CHECK-NEXT: i8x16.extract_lane_u $push33=, $1, 7 +; CHECK-NEXT: i32.const $push90=, 15 +; CHECK-NEXT: i32.and $push34=, $pop33, $pop90 +; CHECK-NEXT: i32.or $push35=, $2, $pop34 +; CHECK-NEXT: i32.load8_u $push36=, 0($pop35) +; CHECK-NEXT: i8x16.replace_lane $push71=, $pop70, 7, $pop36 +; CHECK-NEXT: i8x16.extract_lane_u $push29=, $1, 8 +; CHECK-NEXT: i32.const $push89=, 15 +; CHECK-NEXT: i32.and $push30=, $pop29, $pop89 +; CHECK-NEXT: i32.or $push31=, $2, $pop30 +; CHECK-NEXT: i32.load8_u $push32=, 0($pop31) +; CHECK-NEXT: i8x16.replace_lane $push72=, $pop71, 8, $pop32 +; CHECK-NEXT: i8x16.extract_lane_u $push25=, $1, 9 +; CHECK-NEXT: i32.const $push88=, 15 +; CHECK-NEXT: i32.and $push26=, $pop25, $pop88 +; CHECK-NEXT: i32.or $push27=, $2, $pop26 +; CHECK-NEXT: i32.load8_u $push28=, 0($pop27) +; CHECK-NEXT: i8x16.replace_lane $push73=, $pop72, 9, $pop28 +; CHECK-NEXT: i8x16.extract_lane_u $push21=, $1, 10 +; CHECK-NEXT: i32.const $push87=, 15 +; CHECK-NEXT: i32.and $push22=, $pop21, $pop87 +; CHECK-NEXT: i32.or $push23=, $2, $pop22 +; CHECK-NEXT: i32.load8_u $push24=, 0($pop23) +; CHECK-NEXT: i8x16.replace_lane $push74=, $pop73, 10, $pop24 +; CHECK-NEXT: i8x16.extract_lane_u $push17=, $1, 11 +; CHECK-NEXT: i32.const $push86=, 15 +; CHECK-NEXT: i32.and $push18=, $pop17, $pop86 +; CHECK-NEXT: i32.or $push19=, $2, $pop18 +; CHECK-NEXT: i32.load8_u $push20=, 0($pop19) +; CHECK-NEXT: i8x16.replace_lane $push75=, $pop74, 11, $pop20 +; CHECK-NEXT: i8x16.extract_lane_u $push13=, $1, 12 +; CHECK-NEXT: i32.const $push85=, 15 +; CHECK-NEXT: i32.and $push14=, $pop13, $pop85 +; CHECK-NEXT: i32.or $push15=, $2, $pop14 +; CHECK-NEXT: i32.load8_u $push16=, 0($pop15) +; CHECK-NEXT: i8x16.replace_lane $push76=, $pop75, 12, $pop16 +; CHECK-NEXT: i8x16.extract_lane_u $push9=, $1, 13 +; CHECK-NEXT: i32.const $push84=, 15 +; CHECK-NEXT: i32.and $push10=, $pop9, $pop84 +; CHECK-NEXT: i32.or $push11=, $2, $pop10 +; CHECK-NEXT: i32.load8_u $push12=, 0($pop11) +; CHECK-NEXT: i8x16.replace_lane $push77=, $pop76, 13, $pop12 +; CHECK-NEXT: i8x16.extract_lane_u $push5=, $1, 14 +; CHECK-NEXT: i32.const $push83=, 15 +; CHECK-NEXT: i32.and $push6=, $pop5, $pop83 +; CHECK-NEXT: i32.or $push7=, $2, $pop6 +; CHECK-NEXT: i32.load8_u $push8=, 0($pop7) +; CHECK-NEXT: i8x16.replace_lane $push78=, $pop77, 14, $pop8 +; CHECK-NEXT: i8x16.extract_lane_u $push0=, $1, 15 +; CHECK-NEXT: i32.const $push82=, 15 +; CHECK-NEXT: i32.and $push2=, $pop0, $pop82 +; CHECK-NEXT: i32.or $push3=, $2, $pop2 +; CHECK-NEXT: i32.load8_u $push4=, 0($pop3) +; CHECK-NEXT: i8x16.replace_lane $push79=, $pop78, 15, $pop4 +; CHECK-NEXT: return $pop79 %m0 = extractelement <16 x i8> %mask, i32 0 %s0 = extractelement <16 x i8> %src, i8 %m0 %v0 = insertelement <16 x i8> undef, i8 %s0, i32 0 @@ -210,14 +318,25 @@ define <16 x i8> @mashup_swizzle_i8x16(<16 x i8> %src, <16 x i8> %mask, i8 %spla ; CHECK-LABEL: mashup_swizzle_i8x16: ; CHECK: .functype mashup_swizzle_i8x16 (v128, v128, i32) -> (v128) ; CHECK-NEXT: # %bb.0: -; CHECK-NEXT: i8x16.swizzle $push0=, $0, $1 -; CHECK-NEXT: i8x16.replace_lane $push1=, $pop0, 3, $2 -; CHECK-NEXT: i32.const $push2=, 42 -; CHECK-NEXT: i8x16.replace_lane $push3=, $pop1, 4, $pop2 -; CHECK-NEXT: i8x16.replace_lane $push4=, $pop3, 12, $2 -; CHECK-NEXT: i32.const $push6=, 42 -; CHECK-NEXT: i8x16.replace_lane $push5=, $pop4, 14, $pop6 -; CHECK-NEXT: return $pop5 +; CHECK-NEXT: global.get $push12=, __stack_pointer +; CHECK-NEXT: i32.const $push13=, 16 +; CHECK-NEXT: i32.sub $push16=, $pop12, $pop13 +; CHECK-NEXT: local.tee $push15=, $3=, $pop16 +; CHECK-NEXT: v128.store 0($pop15), $0 +; CHECK-NEXT: i8x16.extract_lane_u $push7=, $1, 7 +; CHECK-NEXT: i32.const $push1=, 15 +; CHECK-NEXT: i32.and $push8=, $pop7, $pop1 +; CHECK-NEXT: i32.or $push9=, $3, $pop8 +; CHECK-NEXT: i8x16.extract_lane_u $push0=, $1, 0 +; CHECK-NEXT: i32.const $push14=, 15 +; CHECK-NEXT: i32.and $push2=, $pop0, $pop14 +; CHECK-NEXT: i32.or $push3=, $3, $pop2 +; CHECK-NEXT: v128.const $push4=, 0, 0, 0, 0, 42, 0, 0, 0, 0, 0, 0, 0, 0, 0, 42, 0 +; CHECK-NEXT: v128.load8_lane $push5=, 0($pop3), $pop4, 0 +; CHECK-NEXT: i8x16.replace_lane $push6=, $pop5, 3, $2 +; CHECK-NEXT: v128.load8_lane $push10=, 0($pop9), $pop6, 7 +; CHECK-NEXT: i8x16.replace_lane $push11=, $pop10, 12, $2 +; CHECK-NEXT: return $pop11 %m0 = extractelement <16 x i8> %mask, i32 0 %s0 = extractelement <16 x i8> %src, i8 %m0 %v0 = insertelement <16 x i8> undef, i8 %s0, i32 0 diff --git a/llvm/test/CodeGen/X86/extract-insert.ll b/llvm/test/CodeGen/X86/extract-insert.ll index 23d66b2d77f35f..0728b249eb1f89 100644 --- a/llvm/test/CodeGen/X86/extract-insert.ll +++ b/llvm/test/CodeGen/X86/extract-insert.ll @@ -5,6 +5,7 @@ define i32 @extractelt_undef_insertelt(i32 %x, i32 %y) { ; CHECK-LABEL: extractelt_undef_insertelt: ; CHECK: # %bb.0: +; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: ret{{[l|q]}} %b = insertelement <4 x i32> zeroinitializer, i32 %x, i64 3 %c = icmp uge i32 %y, %y diff --git a/llvm/test/CodeGen/X86/insertelement-var-index.ll b/llvm/test/CodeGen/X86/insertelement-var-index.ll index 88c5c2730ec9c1..7e366e3b00ebe4 100644 --- a/llvm/test/CodeGen/X86/insertelement-var-index.ll +++ b/llvm/test/CodeGen/X86/insertelement-var-index.ll @@ -996,7 +996,7 @@ define <2 x i64> @arg_i64_v2i64(<2 x i64> %v, i64 %x, i32 %y) nounwind { ; ; AVX512-LABEL: arg_i64_v2i64: ; AVX512: # %bb.0: -; AVX512-NEXT: movslq %esi, %rax +; AVX512-NEXT: movl %esi, %eax ; AVX512-NEXT: vpbroadcastq %rax, %xmm1 ; AVX512-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %k1 ; AVX512-NEXT: vpbroadcastq %rdi, %xmm0 {%k1} @@ -1101,7 +1101,7 @@ define <2 x double> @arg_f64_v2f64(<2 x double> %v, double %x, i32 %y) nounwind ; SSE41: # %bb.0: ; SSE41-NEXT: movapd %xmm0, %xmm2 ; SSE41-NEXT: movddup {{.*#+}} xmm1 = xmm1[0,0] -; SSE41-NEXT: movslq %edi, %rax +; SSE41-NEXT: movl %edi, %eax ; SSE41-NEXT: movq %rax, %xmm0 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] ; SSE41-NEXT: pcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 @@ -1112,7 +1112,7 @@ define <2 x double> @arg_f64_v2f64(<2 x double> %v, double %x, i32 %y) nounwind ; AVX1-LABEL: arg_f64_v2f64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = xmm1[0,0] -; AVX1-NEXT: movslq %edi, %rax +; AVX1-NEXT: movl %edi, %eax ; AVX1-NEXT: vmovq %rax, %xmm2 ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,1,0,1] ; AVX1-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 @@ -1122,7 +1122,7 @@ define <2 x double> @arg_f64_v2f64(<2 x double> %v, double %x, i32 %y) nounwind ; AVX2-LABEL: arg_f64_v2f64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovddup {{.*#+}} xmm1 = xmm1[0,0] -; AVX2-NEXT: movslq %edi, %rax +; AVX2-NEXT: movl %edi, %eax ; AVX2-NEXT: vmovq %rax, %xmm2 ; AVX2-NEXT: vpbroadcastq %xmm2, %xmm2 ; AVX2-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 @@ -1131,7 +1131,7 @@ define <2 x double> @arg_f64_v2f64(<2 x double> %v, double %x, i32 %y) nounwind ; ; AVX512-LABEL: arg_f64_v2f64: ; AVX512: # %bb.0: -; AVX512-NEXT: movslq %edi, %rax +; AVX512-NEXT: movl %edi, %eax ; AVX512-NEXT: vpbroadcastq %rax, %xmm2 ; AVX512-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %k1 ; AVX512-NEXT: vmovddup {{.*#+}} xmm0 {%k1} = xmm1[0,0] @@ -1346,7 +1346,7 @@ define <2 x i64> @load_i64_v2i64(<2 x i64> %v, ptr %p, i32 %y) nounwind { ; ; AVX512-LABEL: load_i64_v2i64: ; AVX512: # %bb.0: -; AVX512-NEXT: movslq %esi, %rax +; AVX512-NEXT: movl %esi, %eax ; AVX512-NEXT: vpbroadcastq %rax, %xmm1 ; AVX512-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %k1 ; AVX512-NEXT: vpbroadcastq (%rdi), %xmm0 {%k1} @@ -1458,7 +1458,7 @@ define <2 x double> @load_f64_v2f64(<2 x double> %v, ptr %p, i32 %y) nounwind { ; SSE41: # %bb.0: ; SSE41-NEXT: movapd %xmm0, %xmm1 ; SSE41-NEXT: movddup {{.*#+}} xmm2 = mem[0,0] -; SSE41-NEXT: movslq %esi, %rax +; SSE41-NEXT: movl %esi, %eax ; SSE41-NEXT: movq %rax, %xmm0 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] ; SSE41-NEXT: pcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 @@ -1469,7 +1469,7 @@ define <2 x double> @load_f64_v2f64(<2 x double> %v, ptr %p, i32 %y) nounwind { ; AVX1-LABEL: load_f64_v2f64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] -; AVX1-NEXT: movslq %esi, %rax +; AVX1-NEXT: movl %esi, %eax ; AVX1-NEXT: vmovq %rax, %xmm2 ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,1,0,1] ; AVX1-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 @@ -1479,7 +1479,7 @@ define <2 x double> @load_f64_v2f64(<2 x double> %v, ptr %p, i32 %y) nounwind { ; AVX2-LABEL: load_f64_v2f64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] -; AVX2-NEXT: movslq %esi, %rax +; AVX2-NEXT: movl %esi, %eax ; AVX2-NEXT: vmovq %rax, %xmm2 ; AVX2-NEXT: vpbroadcastq %xmm2, %xmm2 ; AVX2-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 @@ -1488,7 +1488,7 @@ define <2 x double> @load_f64_v2f64(<2 x double> %v, ptr %p, i32 %y) nounwind { ; ; AVX512-LABEL: load_f64_v2f64: ; AVX512: # %bb.0: -; AVX512-NEXT: movslq %esi, %rax +; AVX512-NEXT: movl %esi, %eax ; AVX512-NEXT: vpbroadcastq %rax, %xmm1 ; AVX512-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %k1 ; AVX512-NEXT: vmovddup {{.*#+}} xmm0 {%k1} = mem[0,0] @@ -1733,7 +1733,7 @@ define <4 x i64> @arg_i64_v4i64(<4 x i64> %v, i64 %x, i32 %y) nounwind { ; ; AVX512-LABEL: arg_i64_v4i64: ; AVX512: # %bb.0: -; AVX512-NEXT: movslq %esi, %rax +; AVX512-NEXT: movl %esi, %eax ; AVX512-NEXT: vpbroadcastq %rax, %ymm1 ; AVX512-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %k1 ; AVX512-NEXT: vpbroadcastq %rdi, %ymm0 {%k1} @@ -1834,7 +1834,7 @@ define <4 x double> @arg_f64_v4f64(<4 x double> %v, double %x, i32 %y) nounwind ; AVX1: # %bb.0: ; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = xmm1[0,0] ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1 -; AVX1-NEXT: movslq %edi, %rax +; AVX1-NEXT: movl %edi, %eax ; AVX1-NEXT: vmovq %rax, %xmm2 ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,1,0,1] ; AVX1-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3 @@ -1846,7 +1846,7 @@ define <4 x double> @arg_f64_v4f64(<4 x double> %v, double %x, i32 %y) nounwind ; AVX2-LABEL: arg_f64_v4f64: ; AVX2: # %bb.0: ; AVX2-NEXT: vbroadcastsd %xmm1, %ymm1 -; AVX2-NEXT: movslq %edi, %rax +; AVX2-NEXT: movl %edi, %eax ; AVX2-NEXT: vmovq %rax, %xmm2 ; AVX2-NEXT: vpbroadcastq %xmm2, %ymm2 ; AVX2-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 @@ -1855,7 +1855,7 @@ define <4 x double> @arg_f64_v4f64(<4 x double> %v, double %x, i32 %y) nounwind ; ; AVX512-LABEL: arg_f64_v4f64: ; AVX512: # %bb.0: -; AVX512-NEXT: movslq %edi, %rax +; AVX512-NEXT: movl %edi, %eax ; AVX512-NEXT: vpbroadcastq %rax, %ymm2 ; AVX512-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %k1 ; AVX512-NEXT: vbroadcastsd %xmm1, %ymm0 {%k1} @@ -2114,7 +2114,7 @@ define <4 x i64> @load_i64_v4i64(<4 x i64> %v, ptr %p, i32 %y) nounwind { ; ; AVX512-LABEL: load_i64_v4i64: ; AVX512: # %bb.0: -; AVX512-NEXT: movslq %esi, %rax +; AVX512-NEXT: movl %esi, %eax ; AVX512-NEXT: vpbroadcastq %rax, %ymm1 ; AVX512-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %k1 ; AVX512-NEXT: vpbroadcastq (%rdi), %ymm0 {%k1} @@ -2218,7 +2218,7 @@ define <4 x double> @load_f64_v4f64(<4 x double> %v, ptr %p, i32 %y) nounwind { ; ; AVX1-LABEL: load_f64_v4f64: ; AVX1: # %bb.0: -; AVX1-NEXT: movslq %esi, %rax +; AVX1-NEXT: movl %esi, %eax ; AVX1-NEXT: vmovq %rax, %xmm1 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,0,1] ; AVX1-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm2 @@ -2231,7 +2231,7 @@ define <4 x double> @load_f64_v4f64(<4 x double> %v, ptr %p, i32 %y) nounwind { ; AVX2-LABEL: load_f64_v4f64: ; AVX2: # %bb.0: ; AVX2-NEXT: vbroadcastsd (%rdi), %ymm1 -; AVX2-NEXT: movslq %esi, %rax +; AVX2-NEXT: movl %esi, %eax ; AVX2-NEXT: vmovq %rax, %xmm2 ; AVX2-NEXT: vpbroadcastq %xmm2, %ymm2 ; AVX2-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 @@ -2240,7 +2240,7 @@ define <4 x double> @load_f64_v4f64(<4 x double> %v, ptr %p, i32 %y) nounwind { ; ; AVX512-LABEL: load_f64_v4f64: ; AVX512: # %bb.0: -; AVX512-NEXT: movslq %esi, %rax +; AVX512-NEXT: movl %esi, %eax ; AVX512-NEXT: vpbroadcastq %rax, %ymm1 ; AVX512-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %k1 ; AVX512-NEXT: vbroadcastsd (%rdi), %ymm0 {%k1} @@ -2273,6 +2273,15 @@ define i32 @PR44139(ptr %p) { ; SSE-LABEL: PR44139: ; SSE: # %bb.0: ; SSE-NEXT: movl (%rdi), %eax +; SSE-NEXT: pshufd {{.*#+}} xmm0 = mem[0,1,0,1] +; SSE-NEXT: movdqa %xmm0, 96(%rdi) +; SSE-NEXT: movdqa %xmm0, 112(%rdi) +; SSE-NEXT: movdqa %xmm0, 64(%rdi) +; SSE-NEXT: movdqa %xmm0, 80(%rdi) +; SSE-NEXT: movdqa %xmm0, 32(%rdi) +; SSE-NEXT: movdqa %xmm0, 48(%rdi) +; SSE-NEXT: movdqa %xmm0, (%rdi) +; SSE-NEXT: movdqa %xmm0, 16(%rdi) ; SSE-NEXT: leal 2147483647(%rax), %ecx ; SSE-NEXT: testl %eax, %eax ; SSE-NEXT: cmovnsl %eax, %ecx @@ -2283,23 +2292,51 @@ define i32 @PR44139(ptr %p) { ; SSE-NEXT: divl %ecx ; SSE-NEXT: retq ; -; AVX-LABEL: PR44139: -; AVX: # %bb.0: -; AVX-NEXT: movl (%rdi), %eax -; AVX-NEXT: leal 2147483647(%rax), %ecx -; AVX-NEXT: testl %eax, %eax -; AVX-NEXT: cmovnsl %eax, %ecx -; AVX-NEXT: andl $-2147483648, %ecx # imm = 0x80000000 -; AVX-NEXT: addl %eax, %ecx -; AVX-NEXT: # kill: def $eax killed $eax killed $rax -; AVX-NEXT: xorl %edx, %edx -; AVX-NEXT: divl %ecx -; AVX-NEXT: retq +; AVX1OR2-LABEL: PR44139: +; AVX1OR2: # %bb.0: +; AVX1OR2-NEXT: vbroadcastsd (%rdi), %ymm0 +; AVX1OR2-NEXT: movl (%rdi), %eax +; AVX1OR2-NEXT: vmovaps %ymm0, 64(%rdi) +; AVX1OR2-NEXT: vmovaps %ymm0, 96(%rdi) +; AVX1OR2-NEXT: vmovaps %ymm0, (%rdi) +; AVX1OR2-NEXT: vmovaps %ymm0, 32(%rdi) +; AVX1OR2-NEXT: leal 2147483647(%rax), %ecx +; AVX1OR2-NEXT: testl %eax, %eax +; AVX1OR2-NEXT: cmovnsl %eax, %ecx +; AVX1OR2-NEXT: andl $-2147483648, %ecx # imm = 0x80000000 +; AVX1OR2-NEXT: addl %eax, %ecx +; AVX1OR2-NEXT: # kill: def $eax killed $eax killed $rax +; AVX1OR2-NEXT: xorl %edx, %edx +; AVX1OR2-NEXT: divl %ecx +; AVX1OR2-NEXT: vzeroupper +; AVX1OR2-NEXT: retq +; +; AVX512-LABEL: PR44139: +; AVX512: # %bb.0: +; AVX512-NEXT: vbroadcastsd (%rdi), %zmm0 +; AVX512-NEXT: movl (%rdi), %eax +; AVX512-NEXT: vmovaps %zmm0, (%rdi) +; AVX512-NEXT: vmovaps %zmm0, 64(%rdi) +; AVX512-NEXT: leal 2147483647(%rax), %ecx +; AVX512-NEXT: testl %eax, %eax +; AVX512-NEXT: cmovnsl %eax, %ecx +; AVX512-NEXT: andl $-2147483648, %ecx # imm = 0x80000000 +; AVX512-NEXT: addl %eax, %ecx +; AVX512-NEXT: # kill: def $eax killed $eax killed $rax +; AVX512-NEXT: xorl %edx, %edx +; AVX512-NEXT: divl %ecx +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: retq ; ; X86AVX2-LABEL: PR44139: ; X86AVX2: # %bb.0: -; X86AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86AVX2-NEXT: movl (%eax), %eax +; X86AVX2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86AVX2-NEXT: movl (%ecx), %eax +; X86AVX2-NEXT: vbroadcastsd (%ecx), %ymm0 +; X86AVX2-NEXT: vmovaps %ymm0, 64(%ecx) +; X86AVX2-NEXT: vmovaps %ymm0, 96(%ecx) +; X86AVX2-NEXT: vmovaps %ymm0, (%ecx) +; X86AVX2-NEXT: vmovaps %ymm0, 32(%ecx) ; X86AVX2-NEXT: leal 2147483647(%eax), %ecx ; X86AVX2-NEXT: testl %eax, %eax ; X86AVX2-NEXT: cmovnsl %eax, %ecx @@ -2307,6 +2344,7 @@ define i32 @PR44139(ptr %p) { ; X86AVX2-NEXT: addl %eax, %ecx ; X86AVX2-NEXT: xorl %edx, %edx ; X86AVX2-NEXT: divl %ecx +; X86AVX2-NEXT: vzeroupper ; X86AVX2-NEXT: retl %L = load <16 x i64>, ptr %p %E1 = extractelement <16 x i64> %L, i64 0 diff --git a/llvm/test/CodeGen/X86/var-permute-128.ll b/llvm/test/CodeGen/X86/var-permute-128.ll index eebb2c6f953685..61e648eec855f3 100644 --- a/llvm/test/CodeGen/X86/var-permute-128.ll +++ b/llvm/test/CodeGen/X86/var-permute-128.ll @@ -129,7 +129,7 @@ define <4 x i32> @var_shuffle_v4i32(<4 x i32> %v, <4 x i32> %indices) nounwind { define <8 x i16> @var_shuffle_v8i16(<8 x i16> %v, <8 x i16> %indices) nounwind { ; SSE3-LABEL: var_shuffle_v8i16: ; SSE3: # %bb.0: -; SSE3-NEXT: movd %xmm1, %eax +; SSE3-NEXT: pextrw $0, %xmm1, %eax ; SSE3-NEXT: pextrw $1, %xmm1, %ecx ; SSE3-NEXT: pextrw $2, %xmm1, %edx ; SSE3-NEXT: pextrw $3, %xmm1, %esi diff --git a/llvm/test/CodeGen/X86/var-permute-512.ll b/llvm/test/CodeGen/X86/var-permute-512.ll index c512448ab7db24..b55fd27d4036c8 100644 --- a/llvm/test/CodeGen/X86/var-permute-512.ll +++ b/llvm/test/CodeGen/X86/var-permute-512.ll @@ -101,7 +101,7 @@ define <32 x i16> @var_shuffle_v32i16(<32 x i16> %v, <32 x i16> %indices) nounwi ; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2 ; AVX512F-NEXT: vextracti32x4 $2, %zmm1, %xmm3 ; AVX512F-NEXT: vextracti32x4 $3, %zmm1, %xmm4 -; AVX512F-NEXT: vmovd %xmm4, %eax +; AVX512F-NEXT: vpextrw $0, %xmm4, %eax ; AVX512F-NEXT: vmovaps %zmm0, (%rsp) ; AVX512F-NEXT: andl $31, %eax ; AVX512F-NEXT: movzwl (%rsp,%rax,2), %eax @@ -127,7 +127,7 @@ define <32 x i16> @var_shuffle_v32i16(<32 x i16> %v, <32 x i16> %indices) nounwi ; AVX512F-NEXT: vpextrw $7, %xmm4, %eax ; AVX512F-NEXT: andl $31, %eax ; AVX512F-NEXT: vpinsrw $7, (%rsp,%rax,2), %xmm0, %xmm0 -; AVX512F-NEXT: vmovd %xmm3, %eax +; AVX512F-NEXT: vpextrw $0, %xmm3, %eax ; AVX512F-NEXT: andl $31, %eax ; AVX512F-NEXT: movzwl (%rsp,%rax,2), %eax ; AVX512F-NEXT: vmovd %eax, %xmm4 @@ -152,7 +152,7 @@ define <32 x i16> @var_shuffle_v32i16(<32 x i16> %v, <32 x i16> %indices) nounwi ; AVX512F-NEXT: vpextrw $7, %xmm3, %eax ; AVX512F-NEXT: andl $31, %eax ; AVX512F-NEXT: vpinsrw $7, (%rsp,%rax,2), %xmm4, %xmm3 -; AVX512F-NEXT: vmovd %xmm2, %eax +; AVX512F-NEXT: vpextrw $0, %xmm2, %eax ; AVX512F-NEXT: andl $31, %eax ; AVX512F-NEXT: movzwl (%rsp,%rax,2), %eax ; AVX512F-NEXT: vmovd %eax, %xmm4 @@ -180,7 +180,7 @@ define <32 x i16> @var_shuffle_v32i16(<32 x i16> %v, <32 x i16> %indices) nounwi ; AVX512F-NEXT: andl $31, %eax ; AVX512F-NEXT: movzwl (%rsp,%rax,2), %eax ; AVX512F-NEXT: vpinsrw $7, %eax, %xmm4, %xmm2 -; AVX512F-NEXT: vmovd %xmm1, %eax +; AVX512F-NEXT: vpextrw $0, %xmm1, %eax ; AVX512F-NEXT: andl $31, %eax ; AVX512F-NEXT: movzwl (%rsp,%rax,2), %eax ; AVX512F-NEXT: vmovd %eax, %xmm4 @@ -330,7 +330,7 @@ define <64 x i8> @var_shuffle_v64i8(<64 x i8> %v, <64 x i8> %indices) nounwind { ; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2 ; AVX512F-NEXT: vextracti32x4 $2, %zmm1, %xmm3 ; AVX512F-NEXT: vextracti32x4 $3, %zmm1, %xmm4 -; AVX512F-NEXT: vmovd %xmm4, %eax +; AVX512F-NEXT: vpextrb $0, %xmm4, %eax ; AVX512F-NEXT: vmovaps %zmm0, (%rsp) ; AVX512F-NEXT: andl $63, %eax ; AVX512F-NEXT: movzbl (%rsp,%rax), %eax @@ -380,7 +380,7 @@ define <64 x i8> @var_shuffle_v64i8(<64 x i8> %v, <64 x i8> %indices) nounwind { ; AVX512F-NEXT: vpextrb $15, %xmm4, %eax ; AVX512F-NEXT: andl $63, %eax ; AVX512F-NEXT: vpinsrb $15, (%rsp,%rax), %xmm0, %xmm0 -; AVX512F-NEXT: vmovd %xmm3, %eax +; AVX512F-NEXT: vpextrb $0, %xmm3, %eax ; AVX512F-NEXT: andl $63, %eax ; AVX512F-NEXT: movzbl (%rsp,%rax), %eax ; AVX512F-NEXT: vmovd %eax, %xmm4 @@ -432,7 +432,7 @@ define <64 x i8> @var_shuffle_v64i8(<64 x i8> %v, <64 x i8> %indices) nounwind { ; AVX512F-NEXT: andl $63, %eax ; AVX512F-NEXT: movzbl (%rsp,%rax), %eax ; AVX512F-NEXT: vpinsrb $15, %eax, %xmm4, %xmm3 -; AVX512F-NEXT: vmovd %xmm2, %eax +; AVX512F-NEXT: vpextrb $0, %xmm2, %eax ; AVX512F-NEXT: andl $63, %eax ; AVX512F-NEXT: movzbl (%rsp,%rax), %eax ; AVX512F-NEXT: vmovd %eax, %xmm4 @@ -485,7 +485,7 @@ define <64 x i8> @var_shuffle_v64i8(<64 x i8> %v, <64 x i8> %indices) nounwind { ; AVX512F-NEXT: andl $63, %eax ; AVX512F-NEXT: movzbl (%rsp,%rax), %eax ; AVX512F-NEXT: vpinsrb $15, %eax, %xmm4, %xmm2 -; AVX512F-NEXT: vmovd %xmm1, %eax +; AVX512F-NEXT: vpextrb $0, %xmm1, %eax ; AVX512F-NEXT: andl $63, %eax ; AVX512F-NEXT: movzbl (%rsp,%rax), %eax ; AVX512F-NEXT: vmovd %eax, %xmm4 @@ -555,7 +555,7 @@ define <64 x i8> @var_shuffle_v64i8(<64 x i8> %v, <64 x i8> %indices) nounwind { ; AVX512BW-NEXT: vextracti128 $1, %ymm1, %xmm2 ; AVX512BW-NEXT: vextracti32x4 $2, %zmm1, %xmm3 ; AVX512BW-NEXT: vextracti32x4 $3, %zmm1, %xmm4 -; AVX512BW-NEXT: vmovd %xmm4, %eax +; AVX512BW-NEXT: vpextrb $0, %xmm4, %eax ; AVX512BW-NEXT: vmovaps %zmm0, (%rsp) ; AVX512BW-NEXT: andl $63, %eax ; AVX512BW-NEXT: movzbl (%rsp,%rax), %eax @@ -605,7 +605,7 @@ define <64 x i8> @var_shuffle_v64i8(<64 x i8> %v, <64 x i8> %indices) nounwind { ; AVX512BW-NEXT: vpextrb $15, %xmm4, %eax ; AVX512BW-NEXT: andl $63, %eax ; AVX512BW-NEXT: vpinsrb $15, (%rsp,%rax), %xmm0, %xmm0 -; AVX512BW-NEXT: vmovd %xmm3, %eax +; AVX512BW-NEXT: vpextrb $0, %xmm3, %eax ; AVX512BW-NEXT: andl $63, %eax ; AVX512BW-NEXT: movzbl (%rsp,%rax), %eax ; AVX512BW-NEXT: vmovd %eax, %xmm4 @@ -657,7 +657,7 @@ define <64 x i8> @var_shuffle_v64i8(<64 x i8> %v, <64 x i8> %indices) nounwind { ; AVX512BW-NEXT: andl $63, %eax ; AVX512BW-NEXT: movzbl (%rsp,%rax), %eax ; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm4, %xmm3 -; AVX512BW-NEXT: vmovd %xmm2, %eax +; AVX512BW-NEXT: vpextrb $0, %xmm2, %eax ; AVX512BW-NEXT: andl $63, %eax ; AVX512BW-NEXT: movzbl (%rsp,%rax), %eax ; AVX512BW-NEXT: vmovd %eax, %xmm4 @@ -710,7 +710,7 @@ define <64 x i8> @var_shuffle_v64i8(<64 x i8> %v, <64 x i8> %indices) nounwind { ; AVX512BW-NEXT: andl $63, %eax ; AVX512BW-NEXT: movzbl (%rsp,%rax), %eax ; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm4, %xmm2 -; AVX512BW-NEXT: vmovd %xmm1, %eax +; AVX512BW-NEXT: vpextrb $0, %xmm1, %eax ; AVX512BW-NEXT: andl $63, %eax ; AVX512BW-NEXT: movzbl (%rsp,%rax), %eax ; AVX512BW-NEXT: vmovd %eax, %xmm4 diff --git a/llvm/test/CodeGen/X86/vec_extract.ll b/llvm/test/CodeGen/X86/vec_extract.ll index 04a3aaec5c495d..dd0a50e6ba984b 100644 --- a/llvm/test/CodeGen/X86/vec_extract.ll +++ b/llvm/test/CodeGen/X86/vec_extract.ll @@ -110,11 +110,15 @@ define <4 x i32> @ossfuzz15662(ptr %in) { ; X32-LABEL: ossfuzz15662: ; X32: # %bb.0: ; X32-NEXT: xorps %xmm0, %xmm0 +; X32-NEXT: movaps %xmm0, (%eax) +; X32-NEXT: xorps %xmm0, %xmm0 ; X32-NEXT: retl ; ; X64-LABEL: ossfuzz15662: ; X64: # %bb.0: ; X64-NEXT: xorps %xmm0, %xmm0 +; X64-NEXT: movaps %xmm0, (%rax) +; X64-NEXT: xorps %xmm0, %xmm0 ; X64-NEXT: retq %C10 = icmp ule i1 false, false %C3 = icmp ule i1 true, undef