diff --git a/llvm/include/llvm/Transforms/InstCombine/InstCombine.h b/llvm/include/llvm/Transforms/InstCombine/InstCombine.h index 3e1c6e0fcdc6..f38ec2debb18 100644 --- a/llvm/include/llvm/Transforms/InstCombine/InstCombine.h +++ b/llvm/include/llvm/Transforms/InstCombine/InstCombine.h @@ -25,10 +25,12 @@ namespace llvm { -static constexpr unsigned InstCombineDefaultMaxIterations = 1000; +static constexpr unsigned InstCombineDefaultMaxIterations = 1; struct InstCombineOptions { bool UseLoopInfo = false; + // Verify that a fix point has been reached after MaxIterations. + bool VerifyFixpoint = false; unsigned MaxIterations = InstCombineDefaultMaxIterations; InstCombineOptions() = default; @@ -38,6 +40,11 @@ struct InstCombineOptions { return *this; } + InstCombineOptions &setVerifyFixpoint(bool Value) { + VerifyFixpoint = Value; + return *this; + } + InstCombineOptions &setMaxIterations(unsigned Value) { MaxIterations = Value; return *this; diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp index d0cbbcc0e310..bb6b40b14f8b 100644 --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -845,6 +845,9 @@ Expected parseSimplifyCFGOptions(StringRef Params) { Expected parseInstCombineOptions(StringRef Params) { InstCombineOptions Result; + // When specifying "instcombine" in -passes enable fix-point verification by + // default, as this is what most tests should use. + Result.setVerifyFixpoint(true); while (!Params.empty()) { StringRef ParamName; std::tie(ParamName, Params) = Params.split(';'); @@ -852,6 +855,8 @@ Expected parseInstCombineOptions(StringRef Params) { bool Enable = !ParamName.consume_front("no-"); if (ParamName == "use-loop-info") { Result.setUseLoopInfo(Enable); + } else if (ParamName == "verify-fixpoint") { + Result.setVerifyFixpoint(Enable); } else if (Enable && ParamName.consume_front("max-iterations=")) { APInt MaxIterations; if (ParamName.getAsInteger(0, MaxIterations)) diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def index e10dc995c493..54b0e8a7b6e8 100644 --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -325,7 +325,6 @@ FUNCTION_PASS("gvn-hoist", GVNHoistPass()) FUNCTION_PASS("gvn-sink", GVNSinkPass()) FUNCTION_PASS("helloworld", HelloWorldPass()) FUNCTION_PASS("infer-address-spaces", InferAddressSpacesPass()) -FUNCTION_PASS("instcombine", InstCombinePass()) FUNCTION_PASS("instcount", InstCountPass()) FUNCTION_PASS("instsimplify", InstSimplifyPass()) FUNCTION_PASS("invalidate", InvalidateAllAnalysesPass()) diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index 13ac11f3c124..c9e0865b200a 100644 --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -146,6 +146,8 @@ static cl::opt MaxSinkNumUsers( "instcombine-max-sink-users", cl::init(32), cl::desc("Maximum number of undroppable users for instruction sinking")); +// FIXME: Remove this option, it has been superseded by verify-fixpoint. +// Only keeping it for now to avoid unnecessary test churn in this patch. static cl::opt InfiniteLoopDetectionThreshold( "instcombine-infinite-loop-threshold", cl::desc("Number of instruction combining iterations considered an " @@ -4249,7 +4251,8 @@ static bool combineInstructionsOverFunction( Function &F, InstructionWorklist &Worklist, AliasAnalysis *AA, AssumptionCache &AC, TargetLibraryInfo &TLI, TargetTransformInfo &TTI, DominatorTree &DT, OptimizationRemarkEmitter &ORE, BlockFrequencyInfo *BFI, - ProfileSummaryInfo *PSI, unsigned MaxIterations, LoopInfo *LI) { + ProfileSummaryInfo *PSI, unsigned MaxIterations, bool VerifyFixpoint, + LoopInfo *LI) { auto &DL = F.getParent()->getDataLayout(); /// Builder - This is an IRBuilder that automatically inserts new @@ -4273,35 +4276,35 @@ static bool combineInstructionsOverFunction( // Iterate while there is work to do. unsigned Iteration = 0; while (true) { - ++NumWorklistIterations; ++Iteration; - if (Iteration > InfiniteLoopDetectionThreshold) { - report_fatal_error( - "Instruction Combining seems stuck in an infinite loop after " + - Twine(InfiniteLoopDetectionThreshold) + " iterations."); - } - - if (Iteration > MaxIterations) { + if (Iteration > MaxIterations && !VerifyFixpoint) { LLVM_DEBUG(dbgs() << "\n\n[IC] Iteration limit #" << MaxIterations << " on " << F.getName() - << " reached; stopping before reaching a fixpoint\n"); + << " reached; stopping without verifying fixpoint\n"); break; } + ++NumWorklistIterations; LLVM_DEBUG(dbgs() << "\n\nINSTCOMBINE ITERATION #" << Iteration << " on " << F.getName() << "\n"); - MadeIRChange |= prepareICWorklistFromFunction(F, DL, &TLI, Worklist, RPOT); + bool MadeChangeInThisIteration = + prepareICWorklistFromFunction(F, DL, &TLI, Worklist, RPOT); InstCombinerImpl IC(Worklist, Builder, F.hasMinSize(), AA, AC, TLI, TTI, DT, ORE, BFI, PSI, DL, LI); IC.MaxArraySizeForCombine = MaxArraySize; - - if (!IC.run()) + MadeChangeInThisIteration |= IC.run(); + if (!MadeChangeInThisIteration) break; MadeIRChange = true; + if (Iteration > MaxIterations) { + report_fatal_error( + "Instruction Combining did not reach a fixpoint after " + + Twine(MaxIterations) + " iterations"); + } } if (Iteration == 1) @@ -4324,7 +4327,8 @@ void InstCombinePass::printPipeline( OS, MapClassName2PassName); OS << '<'; OS << "max-iterations=" << Options.MaxIterations << ";"; - OS << (Options.UseLoopInfo ? "" : "no-") << "use-loop-info"; + OS << (Options.UseLoopInfo ? "" : "no-") << "use-loop-info;"; + OS << (Options.VerifyFixpoint ? "" : "no-") << "verify-fixpoint"; OS << '>'; } @@ -4350,7 +4354,8 @@ PreservedAnalyses InstCombinePass::run(Function &F, &AM.getResult(F) : nullptr; if (!combineInstructionsOverFunction(F, Worklist, AA, AC, TLI, TTI, DT, ORE, - BFI, PSI, Options.MaxIterations, LI)) + BFI, PSI, Options.MaxIterations, + Options.VerifyFixpoint, LI)) // No changes, all analyses are preserved. return PreservedAnalyses::all(); @@ -4400,7 +4405,8 @@ bool InstructionCombiningPass::runOnFunction(Function &F) { return combineInstructionsOverFunction(F, Worklist, AA, AC, TLI, TTI, DT, ORE, BFI, PSI, - InstCombineDefaultMaxIterations, LI); + InstCombineDefaultMaxIterations, + /*VerifyFixpoint */ false, LI); } char InstructionCombiningPass::ID = 0; diff --git a/llvm/test/Analysis/ValueTracking/numsignbits-from-assume.ll b/llvm/test/Analysis/ValueTracking/numsignbits-from-assume.ll index 5beb0c7cadfb..00c66eeb5995 100644 --- a/llvm/test/Analysis/ValueTracking/numsignbits-from-assume.ll +++ b/llvm/test/Analysis/ValueTracking/numsignbits-from-assume.ll @@ -1,5 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -passes=instcombine -S | FileCheck %s +; RUN: opt < %s -passes='instcombine' -S | FileCheck %s + +; FIXME: This does not currently reach a fix point, because an assume can only +; be propagated backwards after its argument has been simplified. define i32 @computeNumSignBits_add1(i32 %in) { ; CHECK-LABEL: @computeNumSignBits_add1( @@ -48,7 +51,7 @@ define i32 @computeNumSignBits_sub1(i32 %in) { define i32 @computeNumSignBits_sub2(i32 %in) { ; CHECK-LABEL: @computeNumSignBits_sub2( -; CHECK-NEXT: [[SUB:%.*]] = add nsw i32 [[IN:%.*]], -1 +; CHECK-NEXT: [[SUB:%.*]] = add i32 [[IN:%.*]], -1 ; CHECK-NEXT: [[COND:%.*]] = icmp ult i32 [[SUB]], 43 ; CHECK-NEXT: call void @llvm.assume(i1 [[COND]]) ; CHECK-NEXT: [[SH:%.*]] = shl nuw nsw i32 [[SUB]], 3 diff --git a/llvm/test/Other/new-pm-print-pipeline.ll b/llvm/test/Other/new-pm-print-pipeline.ll index 59c204d0736f..c934f64d2a58 100644 --- a/llvm/test/Other/new-pm-print-pipeline.ll +++ b/llvm/test/Other/new-pm-print-pipeline.ll @@ -95,8 +95,8 @@ ; CHECK-27: function(separate-const-offset-from-gep) ;; Test InstCombine options - the first pass checks default settings, and the second checks customized options. -; RUN: opt -disable-output -disable-verify -print-pipeline-passes -passes='function(instcombine,instcombine)' < %s | FileCheck %s --match-full-lines --check-prefixes=CHECK-28 -; CHECK-28: function(instcombine,instcombine) +; RUN: opt -disable-output -disable-verify -print-pipeline-passes -passes='function(instcombine,instcombine)' < %s | FileCheck %s --match-full-lines --check-prefixes=CHECK-28 +; CHECK-28: function(instcombine,instcombine) ;; Test function-attrs ; RUN: opt -disable-output -disable-verify -print-pipeline-passes -passes='cgscc(function-attrs)' < %s | FileCheck %s --match-full-lines --check-prefixes=CHECK-29 diff --git a/llvm/test/Transforms/InstCombine/constant-fold-iteration.ll b/llvm/test/Transforms/InstCombine/constant-fold-iteration.ll index a4acab5d767a..cfef2540e455 100644 --- a/llvm/test/Transforms/InstCombine/constant-fold-iteration.ll +++ b/llvm/test/Transforms/InstCombine/constant-fold-iteration.ll @@ -1,7 +1,10 @@ -; RUN: opt < %s -passes=instcombine -S -debug 2>&1 | FileCheck %s +; RUN: opt < %s -passes='instcombine' -S -debug 2>&1 | FileCheck %s ; REQUIRES: asserts target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32" +; This test disables fixpoint verification, because that would cause a second +; iteration for verification. + define i32 @a() nounwind readnone { entry: ret i32 zext (i1 icmp eq (i32 0, i32 ptrtoint (ptr @a to i32)) to i32) diff --git a/llvm/test/Transforms/InstCombine/merging-multiple-stores-into-successor.ll b/llvm/test/Transforms/InstCombine/merging-multiple-stores-into-successor.ll index d70508117d1b..e60d80cdf2da 100644 --- a/llvm/test/Transforms/InstCombine/merging-multiple-stores-into-successor.ll +++ b/llvm/test/Transforms/InstCombine/merging-multiple-stores-into-successor.ll @@ -1,5 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -passes=instcombine -instcombine-infinite-loop-threshold=3 -S | FileCheck %s +; RUN: opt < %s -passes='instcombine' -S | FileCheck %s + +; FIXME: This currently doesn't reach a fix point, because we don't +; canonicalize the operand order of newly added phi nodes. @var_7 = external global i8, align 1 @var_1 = external global i32, align 4 @@ -28,11 +31,12 @@ define void @_Z4testv() { ; CHECK-NEXT: br label [[BB12]] ; CHECK: bb12: ; CHECK-NEXT: [[STOREMERGE1:%.*]] = phi i32 [ [[I11]], [[BB10]] ], [ 1, [[BB9]] ] +; CHECK-NEXT: [[STOREMERGE:%.*]] = phi i32 [ 1, [[BB9]] ], [ [[I11]], [[BB10]] ] ; CHECK-NEXT: store i32 [[STOREMERGE1]], ptr @arr_2, align 4 ; CHECK-NEXT: store i16 [[I4]], ptr @arr_4, align 2 ; CHECK-NEXT: [[I8:%.*]] = sext i16 [[I4]] to i32 ; CHECK-NEXT: store i32 [[I8]], ptr @arr_3, align 16 -; CHECK-NEXT: store i32 [[STOREMERGE1]], ptr getelementptr inbounds ([0 x i32], ptr @arr_2, i64 0, i64 1), align 4 +; CHECK-NEXT: store i32 [[STOREMERGE]], ptr getelementptr inbounds ([0 x i32], ptr @arr_2, i64 0, i64 1), align 4 ; CHECK-NEXT: store i16 [[I4]], ptr getelementptr inbounds ([0 x i16], ptr @arr_4, i64 0, i64 1), align 2 ; CHECK-NEXT: store i32 [[I8]], ptr getelementptr inbounds ([8 x i32], ptr @arr_3, i64 0, i64 1), align 4 ; CHECK-NEXT: ret void @@ -275,17 +279,16 @@ sink: } define ptr @inttoptr_merge(i1 %cond, i64 %a, ptr %b) { -; CHECK-LABEL: define ptr @inttoptr_merge -; CHECK-SAME: (i1 [[COND:%.*]], i64 [[A:%.*]], ptr [[B:%.*]]) { +; CHECK-LABEL: @inttoptr_merge( ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 [[COND]], label [[BB0:%.*]], label [[BB1:%.*]] +; CHECK-NEXT: br i1 [[COND:%.*]], label [[BB0:%.*]], label [[BB1:%.*]] ; CHECK: BB0: -; CHECK-NEXT: [[TMP0:%.*]] = inttoptr i64 [[A]] to ptr +; CHECK-NEXT: [[TMP0:%.*]] = inttoptr i64 [[A:%.*]] to ptr ; CHECK-NEXT: br label [[SINK:%.*]] ; CHECK: BB1: ; CHECK-NEXT: br label [[SINK]] ; CHECK: sink: -; CHECK-NEXT: [[STOREMERGE:%.*]] = phi ptr [ [[B]], [[BB1]] ], [ [[TMP0]], [[BB0]] ] +; CHECK-NEXT: [[STOREMERGE:%.*]] = phi ptr [ [[B:%.*]], [[BB1]] ], [ [[TMP0]], [[BB0]] ] ; CHECK-NEXT: ret ptr [[STOREMERGE]] ; entry: diff --git a/llvm/test/Transforms/InstCombine/pr55228.ll b/llvm/test/Transforms/InstCombine/pr55228.ll index 0c7ea432a946..5e34c074346e 100644 --- a/llvm/test/Transforms/InstCombine/pr55228.ll +++ b/llvm/test/Transforms/InstCombine/pr55228.ll @@ -1,5 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -passes=instcombine < %s | FileCheck %s +; RUN: opt -S -passes='instcombine' < %s | FileCheck %s + +; This does not reach a fixpoint, because the global initializer is not in +; folded form. This will not happen if preceded by a GlobalOpt run. target datalayout = "p:8:8" @@ -8,7 +11,7 @@ target datalayout = "p:8:8" define i1 @test(ptr %p) { ; CHECK-LABEL: @test( -; CHECK-NEXT: [[CMP:%.*]] = icmp eq ptr [[P:%.*]], getelementptr inbounds (i8, ptr @g, i8 1) +; CHECK-NEXT: [[CMP:%.*]] = icmp eq ptr [[P:%.*]], getelementptr inbounds (i8, ptr @g, i64 1) ; CHECK-NEXT: ret i1 [[CMP]] ; %alloca = alloca ptr diff --git a/llvm/test/Transforms/InstCombine/shift.ll b/llvm/test/Transforms/InstCombine/shift.ll index 110c46ecbbfc..8dd8004cb9cd 100644 --- a/llvm/test/Transforms/InstCombine/shift.ll +++ b/llvm/test/Transforms/InstCombine/shift.ll @@ -1,5 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -passes=instcombine -S | FileCheck %s +; RUN: opt < %s -passes='instcombine' -S | FileCheck %s + +; The fuzzer-generated @ashr_out_of_range test case does not reach a fixpoint, +; because a logical and it not relaxed to a bitwise and in one iteration. declare void @use(i64) declare void @use_i32(i32) @@ -1719,7 +1722,7 @@ define void @ashr_out_of_range(ptr %A) { ; CHECK-NEXT: [[L7:%.*]] = load i177, ptr [[G11]], align 4 ; CHECK-NEXT: [[L7_FROZEN:%.*]] = freeze i177 [[L7]] ; CHECK-NEXT: [[C171:%.*]] = icmp slt i177 [[L7_FROZEN]], 0 -; CHECK-NEXT: [[C17:%.*]] = and i1 [[TMP1]], [[C171]] +; CHECK-NEXT: [[C17:%.*]] = select i1 [[TMP1]], i1 [[C171]], i1 false ; CHECK-NEXT: [[TMP3:%.*]] = sext i1 [[C17]] to i64 ; CHECK-NEXT: [[G62:%.*]] = getelementptr i177, ptr [[G11]], i64 [[TMP3]] ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i177 [[L7_FROZEN]], -1 diff --git a/llvm/test/Transforms/PGOProfile/chr.ll b/llvm/test/Transforms/PGOProfile/chr.ll index 43cb6a2224de..a9e204d08494 100644 --- a/llvm/test/Transforms/PGOProfile/chr.ll +++ b/llvm/test/Transforms/PGOProfile/chr.ll @@ -1,5 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -passes='require,function(chr,instcombine,simplifycfg)' -S | FileCheck %s +; RUN: opt < %s -passes='require,function(chr,instcombine,simplifycfg)' -S | FileCheck %s + +; FIXME: This does not currently reach a fix point, because we don't make use +; of a freeze that is pushed up the instruction chain later. declare void @foo() declare void @bar() @@ -1932,13 +1935,13 @@ define i32 @test_chr_21(i64 %i, i64 %k, i64 %j) !prof !14 { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[J_FR:%.*]] = freeze i64 [[J:%.*]] ; CHECK-NEXT: [[I_FR:%.*]] = freeze i64 [[I:%.*]] -; CHECK-NEXT: [[K_FR:%.*]] = freeze i64 [[K:%.*]] -; CHECK-NEXT: [[CMP0:%.*]] = icmp ne i64 [[J_FR]], [[K_FR]] +; CHECK-NEXT: [[CMP0:%.*]] = icmp ne i64 [[J_FR]], [[K:%.*]] +; CHECK-NEXT: [[TMP0:%.*]] = freeze i1 [[CMP0]] ; CHECK-NEXT: [[CMP3:%.*]] = icmp ne i64 [[I_FR]], [[J_FR]] ; CHECK-NEXT: [[CMP_I:%.*]] = icmp ne i64 [[I_FR]], 86 -; CHECK-NEXT: [[TMP0:%.*]] = and i1 [[CMP0]], [[CMP3]] -; CHECK-NEXT: [[TMP1:%.*]] = and i1 [[TMP0]], [[CMP_I]] -; CHECK-NEXT: br i1 [[TMP1]], label [[BB1:%.*]], label [[ENTRY_SPLIT_NONCHR:%.*]], !prof [[PROF15]] +; CHECK-NEXT: [[TMP1:%.*]] = and i1 [[TMP0]], [[CMP3]] +; CHECK-NEXT: [[TMP2:%.*]] = and i1 [[TMP1]], [[CMP_I]] +; CHECK-NEXT: br i1 [[TMP2]], label [[BB1:%.*]], label [[ENTRY_SPLIT_NONCHR:%.*]], !prof [[PROF15]] ; CHECK: bb1: ; CHECK-NEXT: [[CMP2:%.*]] = icmp ne i64 [[I_FR]], 2 ; CHECK-NEXT: switch i64 [[I_FR]], label [[BB2:%.*]] [ @@ -1962,7 +1965,7 @@ define i32 @test_chr_21(i64 %i, i64 %k, i64 %j) !prof !14 { ; CHECK-NEXT: call void @foo() ; CHECK-NEXT: br label [[BB10:%.*]] ; CHECK: entry.split.nonchr: -; CHECK-NEXT: br i1 [[CMP0]], label [[BB1_NONCHR:%.*]], label [[BB10]], !prof [[PROF18]] +; CHECK-NEXT: br i1 [[TMP0]], label [[BB1_NONCHR:%.*]], label [[BB10]], !prof [[PROF18]] ; CHECK: bb1.nonchr: ; CHECK-NEXT: [[CMP2_NONCHR:%.*]] = icmp eq i64 [[I_FR]], 2 ; CHECK-NEXT: br i1 [[CMP2_NONCHR]], label [[BB3_NONCHR:%.*]], label [[BB2_NONCHR:%.*]], !prof [[PROF16]] diff --git a/llvm/test/Transforms/PhaseOrdering/AArch64/matrix-extract-insert.ll b/llvm/test/Transforms/PhaseOrdering/AArch64/matrix-extract-insert.ll index 8096f8c8eb97..2db35c4e03e4 100644 --- a/llvm/test/Transforms/PhaseOrdering/AArch64/matrix-extract-insert.ll +++ b/llvm/test/Transforms/PhaseOrdering/AArch64/matrix-extract-insert.ll @@ -115,8 +115,8 @@ define void @matrix_extract_insert_loop(i32 %i, ptr nonnull align 8 dereferencea ; CHECK-NEXT: br label [[FOR_BODY4_US_1:%.*]] ; CHECK: for.body4.us.1: ; CHECK-NEXT: [[K_011_US_1:%.*]] = phi i32 [ 0, [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US]] ], [ [[INC_US_1:%.*]], [[FOR_BODY4_US_1]] ] -; CHECK-NEXT: [[NARROW:%.*]] = add nuw nsw i32 [[K_011_US_1]], 15 -; CHECK-NEXT: [[TMP8:%.*]] = zext i32 [[NARROW]] to i64 +; CHECK-NEXT: [[CONV_US_1:%.*]] = zext i32 [[K_011_US_1]] to i64 +; CHECK-NEXT: [[TMP8:%.*]] = add nuw nsw i64 [[CONV_US_1]], 15 ; CHECK-NEXT: [[TMP9:%.*]] = icmp ult i32 [[K_011_US_1]], 210 ; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP9]]) ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds <225 x double>, ptr [[A]], i64 0, i64 [[TMP8]] @@ -138,8 +138,8 @@ define void @matrix_extract_insert_loop(i32 %i, ptr nonnull align 8 dereferencea ; CHECK-NEXT: br label [[FOR_BODY4_US_2:%.*]] ; CHECK: for.body4.us.2: ; CHECK-NEXT: [[K_011_US_2:%.*]] = phi i32 [ 0, [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US_1]] ], [ [[INC_US_2:%.*]], [[FOR_BODY4_US_2]] ] -; CHECK-NEXT: [[NARROW14:%.*]] = add nuw nsw i32 [[K_011_US_2]], 30 -; CHECK-NEXT: [[TMP15:%.*]] = zext i32 [[NARROW14]] to i64 +; CHECK-NEXT: [[CONV_US_2:%.*]] = zext i32 [[K_011_US_2]] to i64 +; CHECK-NEXT: [[TMP15:%.*]] = add nuw nsw i64 [[CONV_US_2]], 30 ; CHECK-NEXT: [[TMP16:%.*]] = icmp ult i32 [[K_011_US_2]], 195 ; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP16]]) ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds <225 x double>, ptr [[A]], i64 0, i64 [[TMP15]] @@ -161,8 +161,8 @@ define void @matrix_extract_insert_loop(i32 %i, ptr nonnull align 8 dereferencea ; CHECK-NEXT: br label [[FOR_BODY4_US_3:%.*]] ; CHECK: for.body4.us.3: ; CHECK-NEXT: [[K_011_US_3:%.*]] = phi i32 [ 0, [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US_2]] ], [ [[INC_US_3:%.*]], [[FOR_BODY4_US_3]] ] -; CHECK-NEXT: [[NARROW15:%.*]] = add nuw nsw i32 [[K_011_US_3]], 45 -; CHECK-NEXT: [[TMP22:%.*]] = zext i32 [[NARROW15]] to i64 +; CHECK-NEXT: [[CONV_US_3:%.*]] = zext i32 [[K_011_US_3]] to i64 +; CHECK-NEXT: [[TMP22:%.*]] = add nuw nsw i64 [[CONV_US_3]], 45 ; CHECK-NEXT: [[TMP23:%.*]] = icmp ult i32 [[K_011_US_3]], 180 ; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP23]]) ; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds <225 x double>, ptr [[A]], i64 0, i64 [[TMP22]]