Skip to content

Commit 654bb4e

Browse files
committed
[LV] Don't consider branches leaving loop in collectValuesToIgnore.
Branches exiting the loop will remain regardless, so don't consider them in collectValuesToIgnore. This fixes another divergence between legacy and VPlan-based cost model. Fixes #106780.
1 parent 5aa83eb commit 654bb4e

File tree

2 files changed

+162
-0
lines changed

2 files changed

+162
-0
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6817,6 +6817,9 @@ void LoopVectorizationCostModel::collectValuesToIgnore() {
68176817
if (auto *Br = dyn_cast_or_null<BranchInst>(Op)) {
68186818
BasicBlock *ThenBB = Br->getSuccessor(0);
68196819
BasicBlock *ElseBB = Br->getSuccessor(1);
6820+
// Don't considers branches leaving the loop for simplification.
6821+
if (!TheLoop->contains(ThenBB) || !TheLoop->contains(ElseBB))
6822+
continue;
68206823
bool ThenEmpty = IsEmptyBlock(ThenBB);
68216824
bool ElseEmpty = IsEmptyBlock(ElseBB);
68226825
if ((ThenEmpty && ElseEmpty) ||

llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll

Lines changed: 159 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -175,6 +175,158 @@ exit:
175175
ret i8 %r
176176
}
177177

178+
declare i16 @llvm.umax.i16(i16, i16)
179+
180+
; Test case for https://github.com/llvm/llvm-project/issues/106780.
181+
define i32 @cost_of_exit_branch_and_cond_insts(ptr %a, ptr %b, i1 %c, i16 %x) #0 {
182+
; CHECK-LABEL: define i32 @cost_of_exit_branch_and_cond_insts(
183+
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i1 [[C:%.*]], i16 [[X:%.*]]) #[[ATTR0]] {
184+
; CHECK-NEXT: [[ENTRY:.*]]:
185+
; CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[X]] to i32
186+
; CHECK-NEXT: [[UMAX3:%.*]] = call i32 @llvm.umax.i32(i32 [[TMP0]], i32 111)
187+
; CHECK-NEXT: [[TMP1:%.*]] = sub i32 770, [[UMAX3]]
188+
; CHECK-NEXT: [[SMAX4:%.*]] = call i32 @llvm.smax.i32(i32 [[TMP1]], i32 0)
189+
; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i32 [[SMAX4]], 1
190+
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 [[TMP2]], 24
191+
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]]
192+
; CHECK: [[VECTOR_MEMCHECK]]:
193+
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[A]], i64 1
194+
; CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[X]] to i32
195+
; CHECK-NEXT: [[UMAX1:%.*]] = call i32 @llvm.umax.i32(i32 [[TMP3]], i32 111)
196+
; CHECK-NEXT: [[TMP4:%.*]] = sub i32 770, [[UMAX1]]
197+
; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[TMP4]], i32 0)
198+
; CHECK-NEXT: [[TMP5:%.*]] = zext nneg i32 [[SMAX]] to i64
199+
; CHECK-NEXT: [[TMP6:%.*]] = shl nuw nsw i64 [[TMP5]], 2
200+
; CHECK-NEXT: [[TMP7:%.*]] = add nuw nsw i64 [[TMP6]], 4
201+
; CHECK-NEXT: [[SCEVGEP2:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP7]]
202+
; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[A]], [[SCEVGEP2]]
203+
; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[B]], [[SCEVGEP]]
204+
; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
205+
; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
206+
; CHECK: [[VECTOR_PH]]:
207+
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP2]], 8
208+
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[N_MOD_VF]], 0
209+
; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i32 8, i32 [[N_MOD_VF]]
210+
; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP2]], [[TMP9]]
211+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i1> poison, i1 [[C]], i64 0
212+
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i1> [[BROADCAST_SPLATINSERT]], <8 x i1> poison, <8 x i32> zeroinitializer
213+
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
214+
; CHECK: [[VECTOR_BODY]]:
215+
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE18:.*]] ]
216+
; CHECK-NEXT: [[TMP10:%.*]] = add i32 [[INDEX]], 0
217+
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[B]], i32 [[TMP10]]
218+
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <8 x i1> [[BROADCAST_SPLAT]], i32 0
219+
; CHECK-NEXT: br i1 [[TMP12]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
220+
; CHECK: [[PRED_STORE_IF]]:
221+
; CHECK-NEXT: store i1 false, ptr [[A]], align 1, !alias.scope [[META11:![0-9]+]], !noalias [[META14:![0-9]+]]
222+
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]]
223+
; CHECK: [[PRED_STORE_CONTINUE]]:
224+
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <8 x i1> [[BROADCAST_SPLAT]], i32 1
225+
; CHECK-NEXT: br i1 [[TMP13]], label %[[PRED_STORE_IF5:.*]], label %[[PRED_STORE_CONTINUE6:.*]]
226+
; CHECK: [[PRED_STORE_IF5]]:
227+
; CHECK-NEXT: store i1 false, ptr [[A]], align 1, !alias.scope [[META11]], !noalias [[META14]]
228+
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE6]]
229+
; CHECK: [[PRED_STORE_CONTINUE6]]:
230+
; CHECK-NEXT: [[TMP14:%.*]] = extractelement <8 x i1> [[BROADCAST_SPLAT]], i32 2
231+
; CHECK-NEXT: br i1 [[TMP14]], label %[[PRED_STORE_IF7:.*]], label %[[PRED_STORE_CONTINUE8:.*]]
232+
; CHECK: [[PRED_STORE_IF7]]:
233+
; CHECK-NEXT: store i1 false, ptr [[A]], align 1, !alias.scope [[META11]], !noalias [[META14]]
234+
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE8]]
235+
; CHECK: [[PRED_STORE_CONTINUE8]]:
236+
; CHECK-NEXT: [[TMP15:%.*]] = extractelement <8 x i1> [[BROADCAST_SPLAT]], i32 3
237+
; CHECK-NEXT: br i1 [[TMP15]], label %[[PRED_STORE_IF9:.*]], label %[[PRED_STORE_CONTINUE10:.*]]
238+
; CHECK: [[PRED_STORE_IF9]]:
239+
; CHECK-NEXT: store i1 false, ptr [[A]], align 1, !alias.scope [[META11]], !noalias [[META14]]
240+
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE10]]
241+
; CHECK: [[PRED_STORE_CONTINUE10]]:
242+
; CHECK-NEXT: [[TMP16:%.*]] = extractelement <8 x i1> [[BROADCAST_SPLAT]], i32 4
243+
; CHECK-NEXT: br i1 [[TMP16]], label %[[PRED_STORE_IF11:.*]], label %[[PRED_STORE_CONTINUE12:.*]]
244+
; CHECK: [[PRED_STORE_IF11]]:
245+
; CHECK-NEXT: store i1 false, ptr [[A]], align 1, !alias.scope [[META11]], !noalias [[META14]]
246+
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE12]]
247+
; CHECK: [[PRED_STORE_CONTINUE12]]:
248+
; CHECK-NEXT: [[TMP17:%.*]] = extractelement <8 x i1> [[BROADCAST_SPLAT]], i32 5
249+
; CHECK-NEXT: br i1 [[TMP17]], label %[[PRED_STORE_IF13:.*]], label %[[PRED_STORE_CONTINUE14:.*]]
250+
; CHECK: [[PRED_STORE_IF13]]:
251+
; CHECK-NEXT: store i1 false, ptr [[A]], align 1, !alias.scope [[META11]], !noalias [[META14]]
252+
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE14]]
253+
; CHECK: [[PRED_STORE_CONTINUE14]]:
254+
; CHECK-NEXT: [[TMP18:%.*]] = extractelement <8 x i1> [[BROADCAST_SPLAT]], i32 6
255+
; CHECK-NEXT: br i1 [[TMP18]], label %[[PRED_STORE_IF15:.*]], label %[[PRED_STORE_CONTINUE16:.*]]
256+
; CHECK: [[PRED_STORE_IF15]]:
257+
; CHECK-NEXT: store i1 false, ptr [[A]], align 1, !alias.scope [[META11]], !noalias [[META14]]
258+
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE16]]
259+
; CHECK: [[PRED_STORE_CONTINUE16]]:
260+
; CHECK-NEXT: [[TMP19:%.*]] = extractelement <8 x i1> [[BROADCAST_SPLAT]], i32 7
261+
; CHECK-NEXT: br i1 [[TMP19]], label %[[PRED_STORE_IF17:.*]], label %[[PRED_STORE_CONTINUE18]]
262+
; CHECK: [[PRED_STORE_IF17]]:
263+
; CHECK-NEXT: store i1 false, ptr [[A]], align 1, !alias.scope [[META11]], !noalias [[META14]]
264+
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE18]]
265+
; CHECK: [[PRED_STORE_CONTINUE18]]:
266+
; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i32, ptr [[TMP11]], i32 0
267+
; CHECK-NEXT: call void @llvm.masked.store.v8i32.p0(<8 x i32> zeroinitializer, ptr [[TMP20]], i32 4, <8 x i1> [[BROADCAST_SPLAT]]), !alias.scope [[META14]]
268+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
269+
; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
270+
; CHECK-NEXT: br i1 [[TMP21]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
271+
; CHECK: [[MIDDLE_BLOCK]]:
272+
; CHECK-NEXT: br label %[[SCALAR_PH]]
273+
; CHECK: [[SCALAR_PH]]:
274+
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_MEMCHECK]] ]
275+
; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
276+
; CHECK: [[LOOP_HEADER]]:
277+
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
278+
; CHECK-NEXT: br i1 [[C]], label %[[THEN:.*]], label %[[LOOP_EXITING:.*]]
279+
; CHECK: [[THEN]]:
280+
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[IV]]
281+
; CHECK-NEXT: store i1 false, ptr [[A]], align 1
282+
; CHECK-NEXT: store i32 0, ptr [[GEP]], align 4
283+
; CHECK-NEXT: br label %[[LOOP_EXITING]]
284+
; CHECK: [[LOOP_EXITING]]:
285+
; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1
286+
; CHECK-NEXT: [[UMAX:%.*]] = tail call i16 @llvm.umax.i16(i16 [[X]], i16 111)
287+
; CHECK-NEXT: [[UMAX_EXT:%.*]] = zext i16 [[UMAX]] to i32
288+
; CHECK-NEXT: [[SUB:%.*]] = sub i32 770, [[UMAX_EXT]]
289+
; CHECK-NEXT: [[EC:%.*]] = icmp slt i32 [[IV]], [[SUB]]
290+
; CHECK-NEXT: br i1 [[EC]], label %[[LOOP_LATCH]], label %[[EXIT:.*]]
291+
; CHECK: [[LOOP_LATCH]]:
292+
; CHECK-NEXT: br label %[[LOOP_HEADER]], !llvm.loop [[LOOP17:![0-9]+]]
293+
; CHECK: [[EXIT]]:
294+
; CHECK-NEXT: br label %[[RETURN:.*]]
295+
; CHECK: [[RETURN]]:
296+
; CHECK-NEXT: ret i32 0
297+
;
298+
entry:
299+
br label %loop.header
300+
301+
loop.header:
302+
%iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ]
303+
br i1 %c, label %then, label %loop.exiting
304+
305+
then:
306+
%gep = getelementptr inbounds i32, ptr %b, i32 %iv
307+
store i1 false, ptr %a, align 1
308+
store i32 0, ptr %gep, align 4
309+
br label %loop.exiting
310+
311+
loop.exiting:
312+
%iv.next = add i32 %iv, 1
313+
%umax = tail call i16 @llvm.umax.i16(i16 %x, i16 111)
314+
%umax.ext = zext i16 %umax to i32
315+
%sub = sub i32 770, %umax.ext
316+
%ec = icmp slt i32 %iv, %sub
317+
br i1 %ec, label %loop.latch, label %exit
318+
319+
loop.latch:
320+
br label %loop.header
321+
322+
exit:
323+
br label %return
324+
325+
return:
326+
ret i32 0
327+
}
328+
329+
178330

179331
;.
180332
; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
@@ -188,4 +340,11 @@ exit:
188340
; CHECK: [[META8]] = distinct !{[[META8]], [[META6]]}
189341
; CHECK: [[LOOP9]] = distinct !{[[LOOP9]], [[META1]], [[META2]]}
190342
; CHECK: [[LOOP10]] = distinct !{[[LOOP10]], [[META1]]}
343+
; CHECK: [[META11]] = !{[[META12:![0-9]+]]}
344+
; CHECK: [[META12]] = distinct !{[[META12]], [[META13:![0-9]+]]}
345+
; CHECK: [[META13]] = distinct !{[[META13]], !"LVerDomain"}
346+
; CHECK: [[META14]] = !{[[META15:![0-9]+]]}
347+
; CHECK: [[META15]] = distinct !{[[META15]], [[META13]]}
348+
; CHECK: [[LOOP16]] = distinct !{[[LOOP16]], [[META1]], [[META2]]}
349+
; CHECK: [[LOOP17]] = distinct !{[[LOOP17]], [[META1]]}
191350
;.

0 commit comments

Comments
 (0)