Skip to content

Commit

Permalink
[LSR] Drop potentially invalid nowrap flags when switching to post-in…
Browse files Browse the repository at this point in the history
…c IV (PR46943)

When LSR converts a branch on the pre-inc IV into a branch on the
post-inc IV, the nowrap flags on the addition may no longer be valid.
Previously, a poison result of the addition might have been ignored,
in which case the program was well defined. After branching on the
post-inc IV, we might be branching on poison, which is undefined behavior.

Fix this by discarding nowrap flags which are not present on the SCEV
expression. Nowrap flags on the SCEV expression are proven by SCEV
to always hold, independently of how the expression will be used.
This is essentially the same fix we applied to IndVars LFTR, which
also performs this kind of pre-inc to post-inc conversion.

I believe a similar problem can also exist for getelementptr inbounds,
but I was not able to come up with a problematic test case. The
inbounds case would have to be addressed in a differently anyway
(as SCEV does not track this property).

Fixes https://bugs.llvm.org/show_bug.cgi?id=46943.

Differential Revision: https://reviews.llvm.org/D95286
  • Loading branch information
nikic committed Jan 25, 2021
1 parent 15141cd commit 835104a
Show file tree
Hide file tree
Showing 6 changed files with 257 additions and 274 deletions.
11 changes: 11 additions & 0 deletions llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1440,6 +1440,17 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) {
assert(LatchBlock && "PostInc mode requires a unique loop latch!");
Result = PN->getIncomingValueForBlock(LatchBlock);

// We might be introducing a new use of the post-inc IV that is not poison
// safe, in which case we should drop poison generating flags. Only keep
// those flags for which SCEV has proven that they always hold.
if (isa<OverflowingBinaryOperator>(Result)) {
auto *I = cast<Instruction>(Result);
if (!S->hasNoUnsignedWrap())
I->setHasNoUnsignedWrap(false);
if (!S->hasNoSignedWrap())
I->setHasNoSignedWrap(false);
}

// For an expansion to use the postinc form, the client must call
// expandCodeFor with an InsertPoint that is either outside the PostIncLoop
// or dominated by IVIncInsertPos.
Expand Down
58 changes: 27 additions & 31 deletions llvm/test/CodeGen/Thumb2/LowOverheadLoops/fast-fp-loops.ll
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,11 @@ define arm_aapcs_vfpcc void @fast_float_mul(float* nocapture %a, float* nocaptur
; CHECK-NEXT: beq .LBB0_4
; CHECK-NEXT: @ %bb.2: @ %for.body.preheader
; CHECK-NEXT: subs r5, r3, #1
; CHECK-NEXT: and r7, r3, #3
; CHECK-NEXT: and lr, r3, #3
; CHECK-NEXT: cmp r5, #3
; CHECK-NEXT: bhs .LBB0_6
; CHECK-NEXT: @ %bb.3:
; CHECK-NEXT: mov.w r12, #0
; CHECK-NEXT: movs r3, #0
; CHECK-NEXT: b .LBB0_8
; CHECK-NEXT: .LBB0_4: @ %vector.ph
; CHECK-NEXT: mov.w r12, #0
Expand All @@ -46,44 +46,40 @@ define arm_aapcs_vfpcc void @fast_float_mul(float* nocapture %a, float* nocaptur
; CHECK-NEXT: letp lr, .LBB0_5
; CHECK-NEXT: b .LBB0_11
; CHECK-NEXT: .LBB0_6: @ %for.body.preheader.new
; CHECK-NEXT: bic r3, r3, #3
; CHECK-NEXT: movs r5, #1
; CHECK-NEXT: subs r3, #4
; CHECK-NEXT: mov.w r12, #0
; CHECK-NEXT: add.w lr, r5, r3, lsr #2
; CHECK-NEXT: sub.w r12, r3, lr
; CHECK-NEXT: movs r4, #0
; CHECK-NEXT: movs r3, #0
; CHECK-NEXT: dls lr, lr
; CHECK-NEXT: .LBB0_7: @ %for.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: adds r4, r1, r3
; CHECK-NEXT: adds r5, r2, r3
; CHECK-NEXT: adds r6, r0, r3
; CHECK-NEXT: adds r3, #16
; CHECK-NEXT: vldr s0, [r4]
; CHECK-NEXT: add.w r12, r12, #4
; CHECK-NEXT: vldr s2, [r5]
; CHECK-NEXT: adds r5, r1, r4
; CHECK-NEXT: adds r6, r2, r4
; CHECK-NEXT: adds r7, r0, r4
; CHECK-NEXT: adds r3, #4
; CHECK-NEXT: vldr s0, [r5]
; CHECK-NEXT: adds r4, #16
; CHECK-NEXT: vldr s2, [r6]
; CHECK-NEXT: cmp r12, r3
; CHECK-NEXT: vmul.f32 s0, s2, s0
; CHECK-NEXT: vstr s0, [r6]
; CHECK-NEXT: vldr s0, [r4, #4]
; CHECK-NEXT: vldr s2, [r5, #4]
; CHECK-NEXT: vstr s0, [r7]
; CHECK-NEXT: vldr s0, [r5, #4]
; CHECK-NEXT: vldr s2, [r6, #4]
; CHECK-NEXT: vmul.f32 s0, s2, s0
; CHECK-NEXT: vstr s0, [r6, #4]
; CHECK-NEXT: vldr s0, [r4, #8]
; CHECK-NEXT: vldr s2, [r5, #8]
; CHECK-NEXT: vstr s0, [r7, #4]
; CHECK-NEXT: vldr s0, [r5, #8]
; CHECK-NEXT: vldr s2, [r6, #8]
; CHECK-NEXT: vmul.f32 s0, s2, s0
; CHECK-NEXT: vstr s0, [r6, #8]
; CHECK-NEXT: vldr s0, [r4, #12]
; CHECK-NEXT: vldr s2, [r5, #12]
; CHECK-NEXT: vstr s0, [r7, #8]
; CHECK-NEXT: vldr s0, [r5, #12]
; CHECK-NEXT: vldr s2, [r6, #12]
; CHECK-NEXT: vmul.f32 s0, s2, s0
; CHECK-NEXT: vstr s0, [r6, #12]
; CHECK-NEXT: le lr, .LBB0_7
; CHECK-NEXT: vstr s0, [r7, #12]
; CHECK-NEXT: bne .LBB0_7
; CHECK-NEXT: .LBB0_8: @ %for.cond.cleanup.loopexit.unr-lcssa
; CHECK-NEXT: wls lr, r7, .LBB0_11
; CHECK-NEXT: wls lr, lr, .LBB0_11
; CHECK-NEXT: @ %bb.9: @ %for.body.epil.preheader
; CHECK-NEXT: add.w r1, r1, r12, lsl #2
; CHECK-NEXT: add.w r2, r2, r12, lsl #2
; CHECK-NEXT: add.w r0, r0, r12, lsl #2
; CHECK-NEXT: mov lr, r7
; CHECK-NEXT: add.w r1, r1, r3, lsl #2
; CHECK-NEXT: add.w r2, r2, r3, lsl #2
; CHECK-NEXT: add.w r0, r0, r3, lsl #2
; CHECK-NEXT: .LBB0_10: @ %for.body.epil
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vldr s0, [r1]
Expand Down
153 changes: 69 additions & 84 deletions llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-float-loops.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1459,58 +1459,53 @@ define arm_aapcs_vfpcc float @half_half_mac(half* nocapture readonly %a, half* n
; CHECK-NEXT: cbz r2, .LBB9_3
; CHECK-NEXT: @ %bb.1: @ %for.body.preheader
; CHECK-NEXT: subs r3, r2, #1
; CHECK-NEXT: and r5, r2, #3
; CHECK-NEXT: and lr, r2, #3
; CHECK-NEXT: vldr s0, .LCPI9_0
; CHECK-NEXT: cmp r3, #3
; CHECK-NEXT: bhs .LBB9_4
; CHECK-NEXT: @ %bb.2:
; CHECK-NEXT: vldr s0, .LCPI9_0
; CHECK-NEXT: mov.w r12, #0
; CHECK-NEXT: movs r2, #0
; CHECK-NEXT: b .LBB9_6
; CHECK-NEXT: .LBB9_3:
; CHECK-NEXT: vldr s0, .LCPI9_0
; CHECK-NEXT: b .LBB9_9
; CHECK-NEXT: .LBB9_4: @ %for.body.preheader.new
; CHECK-NEXT: bic r2, r2, #3
; CHECK-NEXT: movs r3, #1
; CHECK-NEXT: subs r2, #4
; CHECK-NEXT: vldr s0, .LCPI9_0
; CHECK-NEXT: mov.w r12, #0
; CHECK-NEXT: add.w lr, r3, r2, lsr #2
; CHECK-NEXT: sub.w r12, r2, lr
; CHECK-NEXT: movs r3, #0
; CHECK-NEXT: dls lr, lr
; CHECK-NEXT: movs r2, #0
; CHECK-NEXT: .LBB9_5: @ %for.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: adds r4, r0, r3
; CHECK-NEXT: adds r2, r1, r3
; CHECK-NEXT: vldr.16 s2, [r2, #6]
; CHECK-NEXT: vldr.16 s4, [r4, #6]
; CHECK-NEXT: vldr.16 s6, [r4, #4]
; CHECK-NEXT: vldr.16 s8, [r4, #2]
; CHECK-NEXT: adds r5, r0, r3
; CHECK-NEXT: adds r4, r1, r3
; CHECK-NEXT: vldr.16 s2, [r4, #6]
; CHECK-NEXT: vldr.16 s4, [r5, #6]
; CHECK-NEXT: vldr.16 s6, [r5, #4]
; CHECK-NEXT: vldr.16 s8, [r5, #2]
; CHECK-NEXT: vmul.f16 s2, s4, s2
; CHECK-NEXT: vldr.16 s4, [r2, #4]
; CHECK-NEXT: vldr.16 s10, [r4]
; CHECK-NEXT: vldr.16 s4, [r4, #4]
; CHECK-NEXT: vldr.16 s10, [r5]
; CHECK-NEXT: vcvtb.f32.f16 s2, s2
; CHECK-NEXT: vmul.f16 s4, s6, s4
; CHECK-NEXT: vldr.16 s6, [r2, #2]
; CHECK-NEXT: vldr.16 s6, [r4, #2]
; CHECK-NEXT: vcvtb.f32.f16 s4, s4
; CHECK-NEXT: adds r3, #8
; CHECK-NEXT: adds r2, #4
; CHECK-NEXT: vmul.f16 s6, s8, s6
; CHECK-NEXT: vldr.16 s8, [r2]
; CHECK-NEXT: vldr.16 s8, [r4]
; CHECK-NEXT: vcvtb.f32.f16 s6, s6
; CHECK-NEXT: add.w r12, r12, #4
; CHECK-NEXT: adds r3, #8
; CHECK-NEXT: vmul.f16 s8, s10, s8
; CHECK-NEXT: cmp r12, r2
; CHECK-NEXT: vcvtb.f32.f16 s8, s8
; CHECK-NEXT: vadd.f32 s0, s0, s8
; CHECK-NEXT: vadd.f32 s0, s0, s6
; CHECK-NEXT: vadd.f32 s0, s0, s4
; CHECK-NEXT: vadd.f32 s0, s0, s2
; CHECK-NEXT: le lr, .LBB9_5
; CHECK-NEXT: bne .LBB9_5
; CHECK-NEXT: .LBB9_6: @ %for.cond.cleanup.loopexit.unr-lcssa
; CHECK-NEXT: wls lr, r5, .LBB9_9
; CHECK-NEXT: wls lr, lr, .LBB9_9
; CHECK-NEXT: @ %bb.7: @ %for.body.epil.preheader
; CHECK-NEXT: add.w r0, r0, r12, lsl #1
; CHECK-NEXT: add.w r1, r1, r12, lsl #1
; CHECK-NEXT: mov lr, r5
; CHECK-NEXT: add.w r0, r0, r2, lsl #1
; CHECK-NEXT: add.w r1, r1, r2, lsl #1
; CHECK-NEXT: .LBB9_8: @ %for.body.epil
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vldr.16 s2, [r1]
Expand Down Expand Up @@ -1616,58 +1611,53 @@ define arm_aapcs_vfpcc float @half_half_acc(half* nocapture readonly %a, half* n
; CHECK-NEXT: cbz r2, .LBB10_3
; CHECK-NEXT: @ %bb.1: @ %for.body.preheader
; CHECK-NEXT: subs r3, r2, #1
; CHECK-NEXT: and r5, r2, #3
; CHECK-NEXT: and lr, r2, #3
; CHECK-NEXT: vldr s0, .LCPI10_0
; CHECK-NEXT: cmp r3, #3
; CHECK-NEXT: bhs .LBB10_4
; CHECK-NEXT: @ %bb.2:
; CHECK-NEXT: vldr s0, .LCPI10_0
; CHECK-NEXT: mov.w r12, #0
; CHECK-NEXT: movs r2, #0
; CHECK-NEXT: b .LBB10_6
; CHECK-NEXT: .LBB10_3:
; CHECK-NEXT: vldr s0, .LCPI10_0
; CHECK-NEXT: b .LBB10_9
; CHECK-NEXT: .LBB10_4: @ %for.body.preheader.new
; CHECK-NEXT: bic r2, r2, #3
; CHECK-NEXT: movs r3, #1
; CHECK-NEXT: subs r2, #4
; CHECK-NEXT: vldr s0, .LCPI10_0
; CHECK-NEXT: mov.w r12, #0
; CHECK-NEXT: add.w lr, r3, r2, lsr #2
; CHECK-NEXT: sub.w r12, r2, lr
; CHECK-NEXT: movs r3, #0
; CHECK-NEXT: dls lr, lr
; CHECK-NEXT: movs r2, #0
; CHECK-NEXT: .LBB10_5: @ %for.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: adds r4, r0, r3
; CHECK-NEXT: adds r2, r1, r3
; CHECK-NEXT: vldr.16 s2, [r2, #6]
; CHECK-NEXT: vldr.16 s4, [r4, #6]
; CHECK-NEXT: vldr.16 s6, [r4, #4]
; CHECK-NEXT: vldr.16 s8, [r4, #2]
; CHECK-NEXT: adds r5, r0, r3
; CHECK-NEXT: adds r4, r1, r3
; CHECK-NEXT: vldr.16 s2, [r4, #6]
; CHECK-NEXT: vldr.16 s4, [r5, #6]
; CHECK-NEXT: vldr.16 s6, [r5, #4]
; CHECK-NEXT: vldr.16 s8, [r5, #2]
; CHECK-NEXT: vadd.f16 s2, s4, s2
; CHECK-NEXT: vldr.16 s4, [r2, #4]
; CHECK-NEXT: vldr.16 s10, [r4]
; CHECK-NEXT: vldr.16 s4, [r4, #4]
; CHECK-NEXT: vldr.16 s10, [r5]
; CHECK-NEXT: vcvtb.f32.f16 s2, s2
; CHECK-NEXT: vadd.f16 s4, s6, s4
; CHECK-NEXT: vldr.16 s6, [r2, #2]
; CHECK-NEXT: vldr.16 s6, [r4, #2]
; CHECK-NEXT: vcvtb.f32.f16 s4, s4
; CHECK-NEXT: adds r3, #8
; CHECK-NEXT: adds r2, #4
; CHECK-NEXT: vadd.f16 s6, s8, s6
; CHECK-NEXT: vldr.16 s8, [r2]
; CHECK-NEXT: vldr.16 s8, [r4]
; CHECK-NEXT: vcvtb.f32.f16 s6, s6
; CHECK-NEXT: add.w r12, r12, #4
; CHECK-NEXT: adds r3, #8
; CHECK-NEXT: vadd.f16 s8, s10, s8
; CHECK-NEXT: cmp r12, r2
; CHECK-NEXT: vcvtb.f32.f16 s8, s8
; CHECK-NEXT: vadd.f32 s0, s0, s8
; CHECK-NEXT: vadd.f32 s0, s0, s6
; CHECK-NEXT: vadd.f32 s0, s0, s4
; CHECK-NEXT: vadd.f32 s0, s0, s2
; CHECK-NEXT: le lr, .LBB10_5
; CHECK-NEXT: bne .LBB10_5
; CHECK-NEXT: .LBB10_6: @ %for.cond.cleanup.loopexit.unr-lcssa
; CHECK-NEXT: wls lr, r5, .LBB10_9
; CHECK-NEXT: wls lr, lr, .LBB10_9
; CHECK-NEXT: @ %bb.7: @ %for.body.epil.preheader
; CHECK-NEXT: add.w r0, r0, r12, lsl #1
; CHECK-NEXT: add.w r1, r1, r12, lsl #1
; CHECK-NEXT: mov lr, r5
; CHECK-NEXT: add.w r0, r0, r2, lsl #1
; CHECK-NEXT: add.w r1, r1, r2, lsl #1
; CHECK-NEXT: .LBB10_8: @ %for.body.epil
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vldr.16 s2, [r1]
Expand Down Expand Up @@ -1773,65 +1763,60 @@ define arm_aapcs_vfpcc float @half_short_mac(half* nocapture readonly %a, i16* n
; CHECK-NEXT: cbz r2, .LBB11_3
; CHECK-NEXT: @ %bb.1: @ %for.body.preheader
; CHECK-NEXT: subs r3, r2, #1
; CHECK-NEXT: and r6, r2, #3
; CHECK-NEXT: and lr, r2, #3
; CHECK-NEXT: vldr s0, .LCPI11_0
; CHECK-NEXT: cmp r3, #3
; CHECK-NEXT: bhs .LBB11_4
; CHECK-NEXT: @ %bb.2:
; CHECK-NEXT: vldr s0, .LCPI11_0
; CHECK-NEXT: mov.w r12, #0
; CHECK-NEXT: movs r2, #0
; CHECK-NEXT: b .LBB11_6
; CHECK-NEXT: .LBB11_3:
; CHECK-NEXT: vldr s0, .LCPI11_0
; CHECK-NEXT: b .LBB11_9
; CHECK-NEXT: .LBB11_4: @ %for.body.preheader.new
; CHECK-NEXT: bic r2, r2, #3
; CHECK-NEXT: movs r3, #1
; CHECK-NEXT: subs r2, #4
; CHECK-NEXT: vldr s0, .LCPI11_0
; CHECK-NEXT: mov.w r12, #0
; CHECK-NEXT: add.w lr, r3, r2, lsr #2
; CHECK-NEXT: sub.w r12, r2, lr
; CHECK-NEXT: adds r3, r1, #4
; CHECK-NEXT: dls lr, lr
; CHECK-NEXT: adds r2, r0, #4
; CHECK-NEXT: adds r4, r0, #4
; CHECK-NEXT: movs r2, #0
; CHECK-NEXT: .LBB11_5: @ %for.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: ldrsh.w r4, [r3, #2]
; CHECK-NEXT: vldr.16 s2, [r2, #2]
; CHECK-NEXT: add.w r12, r12, #4
; CHECK-NEXT: vmov s4, r4
; CHECK-NEXT: ldrsh r4, [r3], #8
; CHECK-NEXT: ldrsh.w r5, [r3, #2]
; CHECK-NEXT: vldr.16 s2, [r4, #2]
; CHECK-NEXT: adds r2, #4
; CHECK-NEXT: cmp r12, r2
; CHECK-NEXT: vmov s4, r5
; CHECK-NEXT: ldrsh r5, [r3], #8
; CHECK-NEXT: vcvt.f16.s32 s4, s4
; CHECK-NEXT: ldrsh r5, [r3, #-10]
; CHECK-NEXT: ldrsh r6, [r3, #-10]
; CHECK-NEXT: vmul.f16 s2, s2, s4
; CHECK-NEXT: vmov s6, r4
; CHECK-NEXT: vldr.16 s4, [r2]
; CHECK-NEXT: vmov s6, r5
; CHECK-NEXT: vldr.16 s4, [r4]
; CHECK-NEXT: vcvt.f16.s32 s6, s6
; CHECK-NEXT: ldrsh r4, [r3, #-12]
; CHECK-NEXT: ldrsh r5, [r3, #-12]
; CHECK-NEXT: vmul.f16 s4, s4, s6
; CHECK-NEXT: vmov s8, r5
; CHECK-NEXT: vldr.16 s6, [r2, #-2]
; CHECK-NEXT: vmov s8, r6
; CHECK-NEXT: vldr.16 s6, [r4, #-2]
; CHECK-NEXT: vcvt.f16.s32 s8, s8
; CHECK-NEXT: vmov s10, r4
; CHECK-NEXT: vmov s10, r5
; CHECK-NEXT: vcvtb.f32.f16 s4, s4
; CHECK-NEXT: vmul.f16 s6, s6, s8
; CHECK-NEXT: vldr.16 s8, [r2, #-4]
; CHECK-NEXT: vldr.16 s8, [r4, #-4]
; CHECK-NEXT: vcvt.f16.s32 s10, s10
; CHECK-NEXT: vcvtb.f32.f16 s6, s6
; CHECK-NEXT: vmul.f16 s8, s8, s10
; CHECK-NEXT: vcvtb.f32.f16 s2, s2
; CHECK-NEXT: vcvtb.f32.f16 s8, s8
; CHECK-NEXT: adds r2, #8
; CHECK-NEXT: add.w r4, r4, #8
; CHECK-NEXT: vadd.f32 s0, s0, s8
; CHECK-NEXT: vadd.f32 s0, s0, s6
; CHECK-NEXT: vadd.f32 s0, s0, s4
; CHECK-NEXT: vadd.f32 s0, s0, s2
; CHECK-NEXT: le lr, .LBB11_5
; CHECK-NEXT: bne .LBB11_5
; CHECK-NEXT: .LBB11_6: @ %for.cond.cleanup.loopexit.unr-lcssa
; CHECK-NEXT: wls lr, r6, .LBB11_9
; CHECK-NEXT: wls lr, lr, .LBB11_9
; CHECK-NEXT: @ %bb.7: @ %for.body.epil.preheader
; CHECK-NEXT: add.w r0, r0, r12, lsl #1
; CHECK-NEXT: add.w r1, r1, r12, lsl #1
; CHECK-NEXT: mov lr, r6
; CHECK-NEXT: add.w r0, r0, r2, lsl #1
; CHECK-NEXT: add.w r1, r1, r2, lsl #1
; CHECK-NEXT: .LBB11_8: @ %for.body.epil
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: ldrsh r2, [r1], #2
Expand Down
Loading

0 comments on commit 835104a

Please sign in to comment.