Skip to content

Commit eda7a86

Browse files
committed
[BypassSlowDivision] Improve our handling of divisions by constants
Summary: Don't bail out on constant divisors for divisions that can be narrowed without introducing control flow . This gives us a 32 bit multiply instead of an emulated 64 bit multiply in the generated PTX assembly. Reviewers: jlebar Subscribers: jholewinski, mcrosier, llvm-commits Differential Revision: https://reviews.llvm.org/D38265 llvm-svn: 314253
1 parent bbfa246 commit eda7a86

File tree

2 files changed

+90
-7
lines changed

2 files changed

+90
-7
lines changed

llvm/lib/Transforms/Utils/BypassSlowDivision.cpp

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -339,11 +339,6 @@ Optional<QuotRemPair> FastDivInsertionTask::insertFastDivAndRem() {
339339
Value *Dividend = SlowDivOrRem->getOperand(0);
340340
Value *Divisor = SlowDivOrRem->getOperand(1);
341341

342-
if (isa<ConstantInt>(Divisor)) {
343-
// Keep division by a constant for DAGCombiner.
344-
return None;
345-
}
346-
347342
VisitedSetTy SetL;
348343
ValueRange DividendRange = getValueRange(Dividend, SetL);
349344
if (DividendRange == VALRNG_LIKELY_LONG)
@@ -359,7 +354,9 @@ Optional<QuotRemPair> FastDivInsertionTask::insertFastDivAndRem() {
359354

360355
if (DividendShort && DivisorShort) {
361356
// If both operands are known to be short then just replace the long
362-
// division with a short one in-place.
357+
// division with a short one in-place. Since we're not introducing control
358+
// flow in this case, narrowing the division is always a win, even if the
359+
// divisor is a constant (and will later get replaced by a multiplication).
363360

364361
IRBuilder<> Builder(SlowDivOrRem);
365362
Value *TruncDividend = Builder.CreateTrunc(Dividend, BypassType);
@@ -369,7 +366,16 @@ Optional<QuotRemPair> FastDivInsertionTask::insertFastDivAndRem() {
369366
Value *ExtDiv = Builder.CreateZExt(TruncDiv, getSlowType());
370367
Value *ExtRem = Builder.CreateZExt(TruncRem, getSlowType());
371368
return QuotRemPair(ExtDiv, ExtRem);
372-
} else if (DividendShort && !isSignedOp()) {
369+
}
370+
371+
if (isa<ConstantInt>(Divisor)) {
372+
// If the divisor is not a constant, DAGCombiner will convert it to a
373+
// multiplication by a magic constant. It isn't clear if it is worth
374+
// introducing control flow to get a narrower multiply.
375+
return None;
376+
}
377+
378+
if (DividendShort && !isSignedOp()) {
373379
// If the division is unsigned and Dividend is known to be short, then
374380
// either
375381
// 1) Divisor is less or equal to Dividend, and the result can be computed

llvm/test/Transforms/CodeGenPrepare/NVPTX/bypass-slow-div.ll

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,3 +27,80 @@ define void @rem_only(i64 %a, i64 %b, i64* %retptr) {
2727
store i64 %d, i64* %retptr
2828
ret void
2929
}
30+
31+
; CHECK-LABEL: @udiv_by_constant(
32+
define i64 @udiv_by_constant(i32 %a) {
33+
; CHECK-NEXT: [[A_ZEXT:%.*]] = zext i32 [[A:%.*]] to i64
34+
; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[A_ZEXT]] to i32
35+
; CHECK-NEXT: [[TMP2:%.*]] = udiv i32 [[TMP1]], 50
36+
; CHECK-NEXT: [[TMP3:%.*]] = zext i32 [[TMP2]] to i64
37+
; CHECK-NEXT: ret i64 [[TMP3]]
38+
39+
%a.zext = zext i32 %a to i64
40+
%wide.div = udiv i64 %a.zext, 50
41+
ret i64 %wide.div
42+
}
43+
44+
; CHECK-LABEL: @urem_by_constant(
45+
define i64 @urem_by_constant(i32 %a) {
46+
; CHECK-NEXT: [[A_ZEXT:%.*]] = zext i32 [[A:%.*]] to i64
47+
; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[A_ZEXT]] to i32
48+
; CHECK-NEXT: [[TMP2:%.*]] = urem i32 [[TMP1]], 50
49+
; CHECK-NEXT: [[TMP3:%.*]] = zext i32 [[TMP2]] to i64
50+
; CHECK-NEXT: ret i64 [[TMP3]]
51+
52+
%a.zext = zext i32 %a to i64
53+
%wide.div = urem i64 %a.zext, 50
54+
ret i64 %wide.div
55+
}
56+
57+
; Negative test: instead of emitting a runtime check on %a, we prefer to let the
58+
; DAGCombiner transform this division by constant into a multiplication (with a
59+
; "magic constant").
60+
;
61+
; CHECK-LABEL: @udiv_by_constant_negative_0(
62+
define i64 @udiv_by_constant_negative_0(i64 %a) {
63+
; CHECK-NEXT: [[WIDE_DIV:%.*]] = udiv i64 [[A:%.*]], 50
64+
; CHECK-NEXT: ret i64 [[WIDE_DIV]]
65+
66+
%wide.div = udiv i64 %a, 50
67+
ret i64 %wide.div
68+
}
69+
70+
; Negative test: while we know the dividend is short, the divisor isn't. This
71+
; test is here for completeness, but instcombine will optimize this to return 0.
72+
;
73+
; CHECK-LABEL: @udiv_by_constant_negative_1(
74+
define i64 @udiv_by_constant_negative_1(i32 %a) {
75+
; CHECK-NEXT: [[A_ZEXT:%.*]] = zext i32 [[A:%.*]] to i64
76+
; CHECK-NEXT: [[WIDE_DIV:%.*]] = udiv i64 [[A_ZEXT]], 8589934592
77+
; CHECK-NEXT: ret i64 [[WIDE_DIV]]
78+
79+
%a.zext = zext i32 %a to i64
80+
%wide.div = udiv i64 %a.zext, 8589934592 ;; == 1 << 33
81+
ret i64 %wide.div
82+
}
83+
84+
; URem version of udiv_by_constant_negative_0
85+
;
86+
; CHECK-LABEL: @urem_by_constant_negative_0(
87+
define i64 @urem_by_constant_negative_0(i64 %a) {
88+
; CHECK-NEXT: [[WIDE_DIV:%.*]] = urem i64 [[A:%.*]], 50
89+
; CHECK-NEXT: ret i64 [[WIDE_DIV]]
90+
91+
%wide.div = urem i64 %a, 50
92+
ret i64 %wide.div
93+
}
94+
95+
; URem version of udiv_by_constant_negative_1
96+
;
97+
; CHECK-LABEL: @urem_by_constant_negative_1(
98+
define i64 @urem_by_constant_negative_1(i32 %a) {
99+
; CHECK-NEXT: [[A_ZEXT:%.*]] = zext i32 [[A:%.*]] to i64
100+
; CHECK-NEXT: [[WIDE_DIV:%.*]] = urem i64 [[A_ZEXT]], 8589934592
101+
; CHECK-NEXT: ret i64 [[WIDE_DIV]]
102+
103+
%a.zext = zext i32 %a to i64
104+
%wide.div = urem i64 %a.zext, 8589934592 ;; == 1 << 33
105+
ret i64 %wide.div
106+
}

0 commit comments

Comments
 (0)