Skip to content

Commit

Permalink
[SLP]Relax assertion in mask combine for non-power-of-2 number of ele…
Browse files Browse the repository at this point in the history
…ments

The nodes may contain non-power-of-2 number of elements. Need to relax
the assertion to avoid possible compiler crash

Fixes llvm#117517
  • Loading branch information
alexey-bataev committed Nov 25, 2024
1 parent 7e3187e commit 57bbdbd
Show file tree
Hide file tree
Showing 2 changed files with 58 additions and 3 deletions.
4 changes: 1 addition & 3 deletions llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1143,9 +1143,7 @@ static void addMask(SmallVectorImpl<int> &Mask, ArrayRef<int> SubMask,
assert(
(!ExtendingManyInputs || SubMask.size() > Mask.size() ||
// Check if input scalars were extended to match the size of other node.
(SubMask.size() == Mask.size() &&
std::all_of(std::next(Mask.begin(), Mask.size() / 2), Mask.end(),
[](int Idx) { return Idx == PoisonMaskElem; }))) &&
(SubMask.size() == Mask.size() && Mask.back() == PoisonMaskElem)) &&
"SubMask with many inputs support must be larger than the mask.");
if (Mask.empty()) {
Mask.append(SubMask.begin(), SubMask.end());
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu -slp-threshold=-10 < %s | FileCheck %s

define i64 @test() {
; CHECK-LABEL: define i64 @test() {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[OR54_I_I_6:%.*]] = or i32 0, 0
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <16 x i32> poison, i32 [[OR54_I_I_6]], i32 8
; CHECK-NEXT: [[TMP1:%.*]] = call <16 x i32> @llvm.vector.insert.v16i32.v8i32(<16 x i32> [[TMP0]], <8 x i32> zeroinitializer, i64 0)
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i32> [[TMP1]], <16 x i32> poison, <16 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 7, i32 7, i32 8>
; CHECK-NEXT: [[TMP3:%.*]] = zext <16 x i32> [[TMP2]] to <16 x i64>
; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vector.reduce.or.v16i64(<16 x i64> [[TMP3]])
; CHECK-NEXT: ret i64 [[TMP4]]
;
entry:
%xor148.2.i = xor i32 0, 0
%conv193.i = zext i32 %xor148.2.i to i64
%conv193.1.i = zext i32 %xor148.2.i to i64
%or194.1.i = or i64 %conv193.i, %conv193.1.i
%xor148.2.i.1 = xor i32 0, 0
%conv193.i.1 = zext i32 %xor148.2.i.1 to i64
%or194.i.1 = or i64 %or194.1.i, %conv193.i.1
%conv193.1.i.1 = zext i32 %xor148.2.i.1 to i64
%or194.1.i.1 = or i64 %or194.i.1, %conv193.1.i.1
%xor148.2.i.2 = xor i32 0, 0
%conv193.i.2 = zext i32 %xor148.2.i.2 to i64
%or194.i.2 = or i64 %or194.1.i.1, %conv193.i.2
%conv193.1.i.2 = zext i32 %xor148.2.i.2 to i64
%or194.1.i.2 = or i64 %or194.i.2, %conv193.1.i.2
%xor148.2.i.3 = xor i32 0, 0
%conv193.i.3 = zext i32 %xor148.2.i.3 to i64
%or194.i.3 = or i64 %or194.1.i.2, %conv193.i.3
%conv193.1.i.3 = zext i32 %xor148.2.i.3 to i64
%or194.1.i.3 = or i64 %or194.i.3, %conv193.1.i.3
%xor148.2.i.4 = xor i32 0, 0
%conv193.i.4 = zext i32 %xor148.2.i.4 to i64
%or194.i.4 = or i64 %or194.1.i.3, %conv193.i.4
%conv193.1.i.4 = zext i32 %xor148.2.i.4 to i64
%or194.1.i.4 = or i64 %or194.i.4, %conv193.1.i.4
%xor148.2.i.5 = xor i32 0, 0
%conv193.i.5 = zext i32 %xor148.2.i.5 to i64
%or194.i.5 = or i64 %or194.1.i.4, %conv193.i.5
%conv193.1.i.5 = zext i32 %xor148.2.i.5 to i64
%or194.1.i.5 = or i64 %or194.i.5, %conv193.1.i.5
%xor148.2.i.6 = xor i32 0, 0
%conv193.i.6 = zext i32 %xor148.2.i.6 to i64
%or194.i.6 = or i64 %or194.1.i.5, %conv193.i.6
%or54.i.i.6 = or i32 %xor148.2.i.6, 0
%conv193.1.i.6 = zext i32 %or54.i.i.6 to i64
%xor148.2.i.7 = xor i32 0, 0
%conv193.i.7 = zext i32 %xor148.2.i.7 to i64
%0 = or i64 %or194.i.6, %conv193.i.7
%conv193.1.i.7 = zext i32 %xor148.2.i.7 to i64
%1 = or i64 %0, %conv193.1.i.7
%or194.1.i.7 = or i64 %1, %conv193.1.i.6
ret i64 %or194.1.i.7
}

0 comments on commit 57bbdbd

Please sign in to comment.