diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index d033b7c2ef4a92..73218f377a0656 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -1143,9 +1143,7 @@ static void addMask(SmallVectorImpl &Mask, ArrayRef SubMask, assert( (!ExtendingManyInputs || SubMask.size() > Mask.size() || // Check if input scalars were extended to match the size of other node. - (SubMask.size() == Mask.size() && - std::all_of(std::next(Mask.begin(), Mask.size() / 2), Mask.end(), - [](int Idx) { return Idx == PoisonMaskElem; }))) && + (SubMask.size() == Mask.size() && Mask.back() == PoisonMaskElem)) && "SubMask with many inputs support must be larger than the mask."); if (Mask.empty()) { Mask.append(SubMask.begin(), SubMask.end()); diff --git a/llvm/test/Transforms/SLPVectorizer/X86/non-power-2-num-elems-reused.ll b/llvm/test/Transforms/SLPVectorizer/X86/non-power-2-num-elems-reused.ll new file mode 100644 index 00000000000000..4ad02d47fb3858 --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/X86/non-power-2-num-elems-reused.ll @@ -0,0 +1,57 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu -slp-threshold=-10 < %s | FileCheck %s + +define i64 @test() { +; CHECK-LABEL: define i64 @test() { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[OR54_I_I_6:%.*]] = or i32 0, 0 +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <16 x i32> poison, i32 [[OR54_I_I_6]], i32 8 +; CHECK-NEXT: [[TMP1:%.*]] = call <16 x i32> @llvm.vector.insert.v16i32.v8i32(<16 x i32> [[TMP0]], <8 x i32> zeroinitializer, i64 0) +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i32> [[TMP1]], <16 x i32> poison, <16 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = zext <16 x i32> [[TMP2]] to <16 x i64> +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vector.reduce.or.v16i64(<16 x i64> [[TMP3]]) +; CHECK-NEXT: ret i64 [[TMP4]] +; +entry: + %xor148.2.i = xor i32 0, 0 + %conv193.i = zext i32 %xor148.2.i to i64 + %conv193.1.i = zext i32 %xor148.2.i to i64 + %or194.1.i = or i64 %conv193.i, %conv193.1.i + %xor148.2.i.1 = xor i32 0, 0 + %conv193.i.1 = zext i32 %xor148.2.i.1 to i64 + %or194.i.1 = or i64 %or194.1.i, %conv193.i.1 + %conv193.1.i.1 = zext i32 %xor148.2.i.1 to i64 + %or194.1.i.1 = or i64 %or194.i.1, %conv193.1.i.1 + %xor148.2.i.2 = xor i32 0, 0 + %conv193.i.2 = zext i32 %xor148.2.i.2 to i64 + %or194.i.2 = or i64 %or194.1.i.1, %conv193.i.2 + %conv193.1.i.2 = zext i32 %xor148.2.i.2 to i64 + %or194.1.i.2 = or i64 %or194.i.2, %conv193.1.i.2 + %xor148.2.i.3 = xor i32 0, 0 + %conv193.i.3 = zext i32 %xor148.2.i.3 to i64 + %or194.i.3 = or i64 %or194.1.i.2, %conv193.i.3 + %conv193.1.i.3 = zext i32 %xor148.2.i.3 to i64 + %or194.1.i.3 = or i64 %or194.i.3, %conv193.1.i.3 + %xor148.2.i.4 = xor i32 0, 0 + %conv193.i.4 = zext i32 %xor148.2.i.4 to i64 + %or194.i.4 = or i64 %or194.1.i.3, %conv193.i.4 + %conv193.1.i.4 = zext i32 %xor148.2.i.4 to i64 + %or194.1.i.4 = or i64 %or194.i.4, %conv193.1.i.4 + %xor148.2.i.5 = xor i32 0, 0 + %conv193.i.5 = zext i32 %xor148.2.i.5 to i64 + %or194.i.5 = or i64 %or194.1.i.4, %conv193.i.5 + %conv193.1.i.5 = zext i32 %xor148.2.i.5 to i64 + %or194.1.i.5 = or i64 %or194.i.5, %conv193.1.i.5 + %xor148.2.i.6 = xor i32 0, 0 + %conv193.i.6 = zext i32 %xor148.2.i.6 to i64 + %or194.i.6 = or i64 %or194.1.i.5, %conv193.i.6 + %or54.i.i.6 = or i32 %xor148.2.i.6, 0 + %conv193.1.i.6 = zext i32 %or54.i.i.6 to i64 + %xor148.2.i.7 = xor i32 0, 0 + %conv193.i.7 = zext i32 %xor148.2.i.7 to i64 + %0 = or i64 %or194.i.6, %conv193.i.7 + %conv193.1.i.7 = zext i32 %xor148.2.i.7 to i64 + %1 = or i64 %0, %conv193.1.i.7 + %or194.1.i.7 = or i64 %1, %conv193.1.i.6 + ret i64 %or194.1.i.7 +}