Skip to content

Commit 8b400de

Browse files
authored
[X86] Enable TuningSlowDivide64 on Barcelona/Bobcat/Bulldozer/Ryzen Families (#91277)
Despite most AMD cpus having a lower latency for i64 divisions that converge early, we are still better off testing for values representable as i32 and performing a i32 division if possible. All AMD cpus appear to have been missed when we added the "idivq-to-divl" attribute - this patch now matches Intel cpu behaviour (and the x86-64/v2/3/4 levels). Unfortunately the difference in code scheduling means I've had to stop using the update_llc_test_checks script and just use old-fashioned CHECK-DAG checks for divl/divq pairs. Fixes #90985
1 parent e0d8dbc commit 8b400de

File tree

2 files changed

+52
-48
lines changed

2 files changed

+52
-48
lines changed

llvm/lib/Target/X86/X86.td

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1350,6 +1350,7 @@ def ProcessorFeatures {
13501350
FeatureCMOV,
13511351
FeatureX86_64];
13521352
list<SubtargetFeature> BarcelonaTuning = [TuningFastScalarShiftMasks,
1353+
TuningSlowDivide64,
13531354
TuningSlowSHLD,
13541355
TuningSBBDepBreaking,
13551356
TuningInsertVZEROUPPER];
@@ -1372,6 +1373,7 @@ def ProcessorFeatures {
13721373
list<SubtargetFeature> BtVer1Tuning = [TuningFast15ByteNOP,
13731374
TuningFastScalarShiftMasks,
13741375
TuningFastVectorShiftMasks,
1376+
TuningSlowDivide64,
13751377
TuningSlowSHLD,
13761378
TuningFastImm16,
13771379
TuningSBBDepBreaking,
@@ -1396,6 +1398,7 @@ def ProcessorFeatures {
13961398
TuningFastMOVBE,
13971399
TuningFastImm16,
13981400
TuningSBBDepBreaking,
1401+
TuningSlowDivide64,
13991402
TuningSlowSHLD];
14001403
list<SubtargetFeature> BtVer2Features =
14011404
!listconcat(BtVer1Features, BtVer2AdditionalFeatures);
@@ -1420,6 +1423,7 @@ def ProcessorFeatures {
14201423
FeatureLWP,
14211424
FeatureLAHFSAHF64];
14221425
list<SubtargetFeature> BdVer1Tuning = [TuningSlowSHLD,
1426+
TuningSlowDivide64,
14231427
TuningFast11ByteNOP,
14241428
TuningFastScalarShiftMasks,
14251429
TuningBranchFusion,
@@ -1500,6 +1504,7 @@ def ProcessorFeatures {
15001504
TuningFastVariablePerLaneShuffle,
15011505
TuningFastMOVBE,
15021506
TuningFastImm16,
1507+
TuningSlowDivide64,
15031508
TuningSlowSHLD,
15041509
TuningSBBDepBreaking,
15051510
TuningInsertVZEROUPPER,

llvm/test/CodeGen/X86/bypass-slow-division-64.ll

Lines changed: 47 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
21
; Check that 64-bit division is bypassed correctly.
32
; RUN: llc < %s -mtriple=x86_64-- -mattr=-idivq-to-divl | FileCheck %s --check-prefixes=CHECK,FAST-DIVQ
43
; RUN: llc < %s -mtriple=x86_64-- -mattr=+idivq-to-divl | FileCheck %s --check-prefixes=CHECK,SLOW-DIVQ
@@ -13,17 +12,17 @@
1312
; RUN: llc < %s -mtriple=x86_64-- -mcpu=skylake | FileCheck %s --check-prefixes=CHECK,SLOW-DIVQ
1413
; RUN: llc < %s -mtriple=x86_64-- -mcpu=alderlake | FileCheck %s --check-prefixes=CHECK,SLOW-DIVQ
1514
; AMD
16-
; RUN: llc < %s -mtriple=x86_64-- -mcpu=barcelona | FileCheck %s --check-prefixes=CHECK,FAST-DIVQ
17-
; RUN: llc < %s -mtriple=x86_64-- -mcpu=btver1 | FileCheck %s --check-prefixes=CHECK,FAST-DIVQ
18-
; RUN: llc < %s -mtriple=x86_64-- -mcpu=btver2 | FileCheck %s --check-prefixes=CHECK,FAST-DIVQ
19-
; RUN: llc < %s -mtriple=x86_64-- -mcpu=bdver1 | FileCheck %s --check-prefixes=CHECK,FAST-DIVQ
20-
; RUN: llc < %s -mtriple=x86_64-- -mcpu=bdver2 | FileCheck %s --check-prefixes=CHECK,FAST-DIVQ
21-
; RUN: llc < %s -mtriple=x86_64-- -mcpu=bdver3 | FileCheck %s --check-prefixes=CHECK,FAST-DIVQ
22-
; RUN: llc < %s -mtriple=x86_64-- -mcpu=bdver4 | FileCheck %s --check-prefixes=CHECK,FAST-DIVQ
23-
; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver1 | FileCheck %s --check-prefixes=CHECK,FAST-DIVQ
24-
; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver2 | FileCheck %s --check-prefixes=CHECK,FAST-DIVQ
25-
; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver3 | FileCheck %s --check-prefixes=CHECK,FAST-DIVQ
26-
; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver4 | FileCheck %s --check-prefixes=CHECK,FAST-DIVQ
15+
; RUN: llc < %s -mtriple=x86_64-- -mcpu=barcelona | FileCheck %s --check-prefixes=CHECK,SLOW-DIVQ
16+
; RUN: llc < %s -mtriple=x86_64-- -mcpu=btver1 | FileCheck %s --check-prefixes=CHECK,SLOW-DIVQ
17+
; RUN: llc < %s -mtriple=x86_64-- -mcpu=btver2 | FileCheck %s --check-prefixes=CHECK,SLOW-DIVQ
18+
; RUN: llc < %s -mtriple=x86_64-- -mcpu=bdver1 | FileCheck %s --check-prefixes=CHECK,SLOW-DIVQ
19+
; RUN: llc < %s -mtriple=x86_64-- -mcpu=bdver2 | FileCheck %s --check-prefixes=CHECK,SLOW-DIVQ
20+
; RUN: llc < %s -mtriple=x86_64-- -mcpu=bdver3 | FileCheck %s --check-prefixes=CHECK,SLOW-DIVQ
21+
; RUN: llc < %s -mtriple=x86_64-- -mcpu=bdver4 | FileCheck %s --check-prefixes=CHECK,SLOW-DIVQ
22+
; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver1 | FileCheck %s --check-prefixes=CHECK,SLOW-DIVQ
23+
; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver2 | FileCheck %s --check-prefixes=CHECK,SLOW-DIVQ
24+
; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver3 | FileCheck %s --check-prefixes=CHECK,SLOW-DIVQ
25+
; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver4 | FileCheck %s --check-prefixes=CHECK,SLOW-DIVQ
2726

2827
; Additional tests for 64-bit divide bypass
2928

@@ -41,18 +40,18 @@ define i64 @sdiv_quotient(i64 %a, i64 %b) nounwind {
4140
;
4241
; SLOW-DIVQ-LABEL: sdiv_quotient:
4342
; SLOW-DIVQ: # %bb.0:
44-
; SLOW-DIVQ-NEXT: movq %rdi, %rax
45-
; SLOW-DIVQ-NEXT: movq %rdi, %rcx
46-
; SLOW-DIVQ-NEXT: orq %rsi, %rcx
47-
; SLOW-DIVQ-NEXT: shrq $32, %rcx
43+
; SLOW-DIVQ-DAG: movq %rdi, %rax
44+
; SLOW-DIVQ-DAG: movq %rdi, %rcx
45+
; SLOW-DIVQ-DAG: orq %rsi, %rcx
46+
; SLOW-DIVQ-DAG: shrq $32, %rcx
4847
; SLOW-DIVQ-NEXT: je .LBB0_1
4948
; SLOW-DIVQ-NEXT: # %bb.2:
5049
; SLOW-DIVQ-NEXT: cqto
5150
; SLOW-DIVQ-NEXT: idivq %rsi
5251
; SLOW-DIVQ-NEXT: retq
5352
; SLOW-DIVQ-NEXT: .LBB0_1:
54-
; SLOW-DIVQ-NEXT: # kill: def $eax killed $eax killed $rax
55-
; SLOW-DIVQ-NEXT: xorl %edx, %edx
53+
; SLOW-DIVQ-DAG: # kill: def $eax killed $eax killed $rax
54+
; SLOW-DIVQ-DAG: xorl %edx, %edx
5655
; SLOW-DIVQ-NEXT: divl %esi
5756
; SLOW-DIVQ-NEXT: # kill: def $eax killed $eax def $rax
5857
; SLOW-DIVQ-NEXT: retq
@@ -93,19 +92,19 @@ define i64 @sdiv_remainder(i64 %a, i64 %b) nounwind {
9392
;
9493
; SLOW-DIVQ-LABEL: sdiv_remainder:
9594
; SLOW-DIVQ: # %bb.0:
96-
; SLOW-DIVQ-NEXT: movq %rdi, %rax
97-
; SLOW-DIVQ-NEXT: movq %rdi, %rcx
98-
; SLOW-DIVQ-NEXT: orq %rsi, %rcx
99-
; SLOW-DIVQ-NEXT: shrq $32, %rcx
95+
; SLOW-DIVQ-DAG: movq %rdi, %rax
96+
; SLOW-DIVQ-DAG: movq %rdi, %rcx
97+
; SLOW-DIVQ-DAG: orq %rsi, %rcx
98+
; SLOW-DIVQ-DAG: shrq $32, %rcx
10099
; SLOW-DIVQ-NEXT: je .LBB3_1
101100
; SLOW-DIVQ-NEXT: # %bb.2:
102101
; SLOW-DIVQ-NEXT: cqto
103102
; SLOW-DIVQ-NEXT: idivq %rsi
104103
; SLOW-DIVQ-NEXT: movq %rdx, %rax
105104
; SLOW-DIVQ-NEXT: retq
106105
; SLOW-DIVQ-NEXT: .LBB3_1:
107-
; SLOW-DIVQ-NEXT: # kill: def $eax killed $eax killed $rax
108-
; SLOW-DIVQ-NEXT: xorl %edx, %edx
106+
; SLOW-DIVQ-DAG: # kill: def $eax killed $eax killed $rax
107+
; SLOW-DIVQ-DAG: xorl %edx, %edx
109108
; SLOW-DIVQ-NEXT: divl %esi
110109
; SLOW-DIVQ-NEXT: movl %edx, %eax
111110
; SLOW-DIVQ-NEXT: retq
@@ -148,19 +147,19 @@ define i64 @sdiv_quotient_and_remainder(i64 %a, i64 %b) nounwind {
148147
;
149148
; SLOW-DIVQ-LABEL: sdiv_quotient_and_remainder:
150149
; SLOW-DIVQ: # %bb.0:
151-
; SLOW-DIVQ-NEXT: movq %rdi, %rax
152-
; SLOW-DIVQ-NEXT: movq %rdi, %rcx
153-
; SLOW-DIVQ-NEXT: orq %rsi, %rcx
154-
; SLOW-DIVQ-NEXT: shrq $32, %rcx
150+
; SLOW-DIVQ-DAG: movq %rdi, %rax
151+
; SLOW-DIVQ-DAG: movq %rdi, %rcx
152+
; SLOW-DIVQ-DAG: orq %rsi, %rcx
153+
; SLOW-DIVQ-DAG: shrq $32, %rcx
155154
; SLOW-DIVQ-NEXT: je .LBB6_1
156155
; SLOW-DIVQ-NEXT: # %bb.2:
157156
; SLOW-DIVQ-NEXT: cqto
158157
; SLOW-DIVQ-NEXT: idivq %rsi
159158
; SLOW-DIVQ-NEXT: addq %rdx, %rax
160159
; SLOW-DIVQ-NEXT: retq
161160
; SLOW-DIVQ-NEXT: .LBB6_1:
162-
; SLOW-DIVQ-NEXT: # kill: def $eax killed $eax killed $rax
163-
; SLOW-DIVQ-NEXT: xorl %edx, %edx
161+
; SLOW-DIVQ-DAG: # kill: def $eax killed $eax killed $rax
162+
; SLOW-DIVQ-DAG: xorl %edx, %edx
164163
; SLOW-DIVQ-NEXT: divl %esi
165164
; SLOW-DIVQ-NEXT: # kill: def $edx killed $edx def $rdx
166165
; SLOW-DIVQ-NEXT: # kill: def $eax killed $eax def $rax
@@ -214,18 +213,18 @@ define i64 @udiv_quotient(i64 %a, i64 %b) nounwind {
214213
;
215214
; SLOW-DIVQ-LABEL: udiv_quotient:
216215
; SLOW-DIVQ: # %bb.0:
217-
; SLOW-DIVQ-NEXT: movq %rdi, %rax
218-
; SLOW-DIVQ-NEXT: movq %rdi, %rcx
219-
; SLOW-DIVQ-NEXT: orq %rsi, %rcx
220-
; SLOW-DIVQ-NEXT: shrq $32, %rcx
216+
; SLOW-DIVQ-DAG: movq %rdi, %rax
217+
; SLOW-DIVQ-DAG: movq %rdi, %rcx
218+
; SLOW-DIVQ-DAG: orq %rsi, %rcx
219+
; SLOW-DIVQ-DAG: shrq $32, %rcx
221220
; SLOW-DIVQ-NEXT: je .LBB9_1
222221
; SLOW-DIVQ-NEXT: # %bb.2:
223222
; SLOW-DIVQ-NEXT: xorl %edx, %edx
224223
; SLOW-DIVQ-NEXT: divq %rsi
225224
; SLOW-DIVQ-NEXT: retq
226225
; SLOW-DIVQ-NEXT: .LBB9_1:
227-
; SLOW-DIVQ-NEXT: # kill: def $eax killed $eax killed $rax
228-
; SLOW-DIVQ-NEXT: xorl %edx, %edx
226+
; SLOW-DIVQ-DAG: # kill: def $eax killed $eax killed $rax
227+
; SLOW-DIVQ-DAG: xorl %edx, %edx
229228
; SLOW-DIVQ-NEXT: divl %esi
230229
; SLOW-DIVQ-NEXT: # kill: def $eax killed $eax def $rax
231230
; SLOW-DIVQ-NEXT: retq
@@ -266,19 +265,19 @@ define i64 @udiv_remainder(i64 %a, i64 %b) nounwind {
266265
;
267266
; SLOW-DIVQ-LABEL: udiv_remainder:
268267
; SLOW-DIVQ: # %bb.0:
269-
; SLOW-DIVQ-NEXT: movq %rdi, %rax
270-
; SLOW-DIVQ-NEXT: movq %rdi, %rcx
271-
; SLOW-DIVQ-NEXT: orq %rsi, %rcx
272-
; SLOW-DIVQ-NEXT: shrq $32, %rcx
268+
; SLOW-DIVQ-DAG: movq %rdi, %rax
269+
; SLOW-DIVQ-DAG: movq %rdi, %rcx
270+
; SLOW-DIVQ-DAG: orq %rsi, %rcx
271+
; SLOW-DIVQ-DAG: shrq $32, %rcx
273272
; SLOW-DIVQ-NEXT: je .LBB12_1
274273
; SLOW-DIVQ-NEXT: # %bb.2:
275274
; SLOW-DIVQ-NEXT: xorl %edx, %edx
276275
; SLOW-DIVQ-NEXT: divq %rsi
277276
; SLOW-DIVQ-NEXT: movq %rdx, %rax
278277
; SLOW-DIVQ-NEXT: retq
279278
; SLOW-DIVQ-NEXT: .LBB12_1:
280-
; SLOW-DIVQ-NEXT: # kill: def $eax killed $eax killed $rax
281-
; SLOW-DIVQ-NEXT: xorl %edx, %edx
279+
; SLOW-DIVQ-DAG: # kill: def $eax killed $eax killed $rax
280+
; SLOW-DIVQ-DAG: xorl %edx, %edx
282281
; SLOW-DIVQ-NEXT: divl %esi
283282
; SLOW-DIVQ-NEXT: movl %edx, %eax
284283
; SLOW-DIVQ-NEXT: retq
@@ -321,19 +320,19 @@ define i64 @udiv_quotient_and_remainder(i64 %a, i64 %b) nounwind {
321320
;
322321
; SLOW-DIVQ-LABEL: udiv_quotient_and_remainder:
323322
; SLOW-DIVQ: # %bb.0:
324-
; SLOW-DIVQ-NEXT: movq %rdi, %rax
325-
; SLOW-DIVQ-NEXT: movq %rdi, %rcx
326-
; SLOW-DIVQ-NEXT: orq %rsi, %rcx
327-
; SLOW-DIVQ-NEXT: shrq $32, %rcx
323+
; SLOW-DIVQ-DAG: movq %rdi, %rax
324+
; SLOW-DIVQ-DAG: movq %rdi, %rcx
325+
; SLOW-DIVQ-DAG: orq %rsi, %rcx
326+
; SLOW-DIVQ-DAG: shrq $32, %rcx
328327
; SLOW-DIVQ-NEXT: je .LBB15_1
329328
; SLOW-DIVQ-NEXT: # %bb.2:
330329
; SLOW-DIVQ-NEXT: xorl %edx, %edx
331330
; SLOW-DIVQ-NEXT: divq %rsi
332331
; SLOW-DIVQ-NEXT: addq %rdx, %rax
333332
; SLOW-DIVQ-NEXT: retq
334333
; SLOW-DIVQ-NEXT: .LBB15_1:
335-
; SLOW-DIVQ-NEXT: # kill: def $eax killed $eax killed $rax
336-
; SLOW-DIVQ-NEXT: xorl %edx, %edx
334+
; SLOW-DIVQ-DAG: # kill: def $eax killed $eax killed $rax
335+
; SLOW-DIVQ-DAG: xorl %edx, %edx
337336
; SLOW-DIVQ-NEXT: divl %esi
338337
; SLOW-DIVQ-NEXT: # kill: def $edx killed $edx def $rdx
339338
; SLOW-DIVQ-NEXT: # kill: def $eax killed $eax def $rax

0 commit comments

Comments
 (0)