diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index c4582df89213d8..894dc68b6146d3 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -5355,11 +5355,10 @@ static bool clusterSortPtrAccesses(ArrayRef VL, SmallPtrSet SecondPointers; Value *P1 = Ptr1; Value *P2 = Ptr2; - if (P1 == P2) - return false; unsigned Depth = 0; - while (!FirstPointers.contains(P2) && !SecondPointers.contains(P1) && - Depth <= RecursionMaxDepth) { + while (!FirstPointers.contains(P2) && !SecondPointers.contains(P1)) { + if (P1 == P2 || Depth > RecursionMaxDepth) + return false; FirstPointers.insert(P1); SecondPointers.insert(P2); P1 = getUnderlyingObject(P1, /*MaxLookup=*/1); diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/long-gep-chains.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/long-gep-chains.ll new file mode 100644 index 00000000000000..cf1ed54149b8b5 --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/long-gep-chains.ll @@ -0,0 +1,76 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -S -passes=slp-vectorizer -mtriple=riscv64-unknown-linux -mattr=+v < %s | FileCheck %s + +define i64 @test(ptr %arg, i32 %arg1, i64 %i) { +; CHECK-LABEL: define i64 @test( +; CHECK-SAME: ptr [[ARG:%.*]], i32 [[ARG1:%.*]], i64 [[I:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[BB:.*:]] +; CHECK-NEXT: [[I2:%.*]] = getelementptr i8, ptr [[ARG]], i64 [[I]] +; CHECK-NEXT: [[I3:%.*]] = getelementptr i8, ptr [[I2]], i64 [[I]] +; CHECK-NEXT: [[I4:%.*]] = getelementptr i8, ptr [[I3]], i64 [[I]] +; CHECK-NEXT: [[I5:%.*]] = getelementptr i8, ptr [[I4]], i64 [[I]] +; CHECK-NEXT: [[I6:%.*]] = getelementptr i8, ptr [[I5]], i64 [[I]] +; CHECK-NEXT: [[I7:%.*]] = getelementptr i8, ptr [[I6]], i64 [[I]] +; CHECK-NEXT: [[I8:%.*]] = getelementptr i8, ptr [[I7]], i64 [[I]] +; CHECK-NEXT: [[I9:%.*]] = getelementptr i8, ptr [[I8]], i64 [[I]] +; CHECK-NEXT: [[I10:%.*]] = getelementptr i8, ptr [[I9]], i64 [[I]] +; CHECK-NEXT: [[I11:%.*]] = getelementptr i8, ptr [[I10]], i64 [[I]] +; CHECK-NEXT: [[I12:%.*]] = getelementptr i8, ptr [[I11]], i64 [[I]] +; CHECK-NEXT: [[I13:%.*]] = getelementptr i8, ptr [[I12]], i64 [[I]] +; CHECK-NEXT: [[I14:%.*]] = getelementptr i8, ptr [[I13]], i64 [[I]] +; CHECK-NEXT: [[I140:%.*]] = load i8, ptr [[I14]], align 1 +; CHECK-NEXT: [[I1412:%.*]] = zext i8 [[I140]] to i32 +; CHECK-NEXT: [[I142:%.*]] = mul i32 [[ARG1]], [[I1412]] +; CHECK-NEXT: [[I143:%.*]] = getelementptr i8, ptr [[I13]], i64 15 +; CHECK-NEXT: [[I144:%.*]] = load i8, ptr [[I143]], align 1 +; CHECK-NEXT: [[I1453:%.*]] = zext i8 [[I144]] to i32 +; CHECK-NEXT: [[I146:%.*]] = mul i32 [[ARG1]], [[I1453]] +; CHECK-NEXT: [[I147:%.*]] = getelementptr i8, ptr [[I13]], i64 14 +; CHECK-NEXT: [[I148:%.*]] = load i8, ptr [[I147]], align 1 +; CHECK-NEXT: [[I1494:%.*]] = zext i8 [[I148]] to i32 +; CHECK-NEXT: [[I150:%.*]] = mul i32 [[ARG1]], [[I1494]] +; CHECK-NEXT: [[I151:%.*]] = getelementptr i8, ptr [[I13]], i64 13 +; CHECK-NEXT: [[I152:%.*]] = load i8, ptr [[I151]], align 1 +; CHECK-NEXT: [[I1535:%.*]] = zext i8 [[I152]] to i32 +; CHECK-NEXT: [[I154:%.*]] = mul i32 [[ARG1]], [[I1535]] +; CHECK-NEXT: [[I1311:%.*]] = or i32 [[I142]], [[I146]] +; CHECK-NEXT: [[I1312:%.*]] = or i32 [[I1311]], [[I150]] +; CHECK-NEXT: [[I1313:%.*]] = or i32 [[I1312]], [[I154]] +; CHECK-NEXT: [[I1536:%.*]] = zext i32 [[I1313]] to i64 +; CHECK-NEXT: ret i64 [[I1536]] +; +bb: + %i2 = getelementptr i8, ptr %arg, i64 %i + %i3 = getelementptr i8, ptr %i2, i64 %i + %i4 = getelementptr i8, ptr %i3, i64 %i + %i5 = getelementptr i8, ptr %i4, i64 %i + %i6 = getelementptr i8, ptr %i5, i64 %i + %i7 = getelementptr i8, ptr %i6, i64 %i + %i8 = getelementptr i8, ptr %i7, i64 %i + %i9 = getelementptr i8, ptr %i8, i64 %i + %i10 = getelementptr i8, ptr %i9, i64 %i + %i11 = getelementptr i8, ptr %i10, i64 %i + %i12 = getelementptr i8, ptr %i11, i64 %i + %i13 = getelementptr i8, ptr %i12, i64 %i + %i14 = getelementptr i8, ptr %i13, i64 %i + %i140 = load i8, ptr %i14, align 1 + %i1412 = zext i8 %i140 to i32 + %i142 = mul i32 %arg1, %i1412 + %i143 = getelementptr i8, ptr %i13, i64 15 + %i144 = load i8, ptr %i143, align 1 + %i1453 = zext i8 %i144 to i32 + %i146 = mul i32 %arg1, %i1453 + %i147 = getelementptr i8, ptr %i13, i64 14 + %i148 = load i8, ptr %i147, align 1 + %i1494 = zext i8 %i148 to i32 + %i150 = mul i32 %arg1, %i1494 + %i151 = getelementptr i8, ptr %i13, i64 13 + %i152 = load i8, ptr %i151, align 1 + %i1535 = zext i8 %i152 to i32 + %i154 = mul i32 %arg1, %i1535 + %i1311 = or i32 %i142, %i146 + %i1312 = or i32 %i1311, %i150 + %i1313 = or i32 %i1312, %i154 + %i1536 = zext i32 %i1313 to i64 + ret i64 %i1536 +}