Skip to content

Commit 8f3d2fd

Browse files
committed
[PowerPC] Fix lowering when performing conditional jumps on f128 or f16 values
1 parent 3531cc1 commit 8f3d2fd

File tree

4 files changed

+229
-1
lines changed

4 files changed

+229
-1
lines changed

llvm/lib/Target/PowerPC/PPCISelLowering.cpp

Lines changed: 34 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -232,6 +232,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
232232
setTruncStoreAction(MVT::f128, MVT::f16, Expand);
233233
setOperationAction(ISD::FP_TO_FP16, MVT::f128, Expand);
234234

235+
setOperationAction(ISD::BR_CC, MVT::f16, Custom);
235236
if (Subtarget.isISA3_0()) {
236237
setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f16, Legal);
237238
setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Legal);
@@ -1312,7 +1313,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
13121313
setOperationAction(ISD::SETCC, MVT::f128, Custom);
13131314
setOperationAction(ISD::STRICT_FSETCC, MVT::f128, Custom);
13141315
setOperationAction(ISD::STRICT_FSETCCS, MVT::f128, Custom);
1315-
setOperationAction(ISD::BR_CC, MVT::f128, Expand);
1316+
setOperationAction(ISD::BR_CC, MVT::f128, Custom);
13161317

13171318
// Lower following f128 select_cc pattern:
13181319
// select_cc x, y, tv, fv, cc -> select_cc (setcc x, y, cc), 0, tv, fv, NE
@@ -8236,6 +8237,36 @@ SDValue PPCTargetLowering::LowerTRUNCATEVector(SDValue Op,
82368237
return DAG.getVectorShuffle(WideVT, DL, Op1, Op2, ShuffV);
82378238
}
82388239

8240+
SDValue PPCTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
8241+
SDValue Chain = Op.getOperand(0);
8242+
ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
8243+
SDValue LHS = Op.getOperand(2);
8244+
SDValue RHS = Op.getOperand(3);
8245+
SDValue Dest = Op.getOperand(4);
8246+
EVT LHSVT = LHS.getValueType();
8247+
SDLoc dl(Op);
8248+
8249+
// Soften the cc condition with libcall if it is fp128.
8250+
if (LHSVT == MVT::f128) {
8251+
assert(!Subtarget.hasP9Vector() &&
8252+
"BR_CC for f128 is already legal under Power9!");
8253+
softenSetCCOperands(DAG, LHSVT, LHS, RHS, CC, dl, LHS, RHS, Chain);
8254+
if (RHS.getNode())
8255+
LHS = DAG.getNode(ISD::BR_CC, dl, Op.getValueType(), Chain,
8256+
DAG.getCondCode(CC), LHS, RHS, Dest);
8257+
return LHS;
8258+
}
8259+
8260+
if (LHSVT == MVT::f16) {
8261+
LHS = DAG.getFPExtendOrRound(LHS, dl, MVT::f32);
8262+
RHS = DAG.getFPExtendOrRound(RHS, dl, MVT::f32);
8263+
return DAG.getNode(ISD::BR_CC, dl, Op.getValueType(), Chain,
8264+
DAG.getCondCode(CC), LHS, RHS, Dest);
8265+
}
8266+
8267+
assert(false && "Only f16 and f128 BR_CC lowering is handled here!");
8268+
}
8269+
82398270
/// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when
82408271
/// possible.
82418272
SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
@@ -12493,6 +12524,8 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
1249312524
case ISD::STORE: return LowerSTORE(Op, DAG);
1249412525
case ISD::TRUNCATE: return LowerTRUNCATE(Op, DAG);
1249512526
case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
12527+
case ISD::BR_CC:
12528+
return LowerBR_CC(Op, DAG);
1249612529
case ISD::STRICT_FP_TO_UINT:
1249712530
case ISD::STRICT_FP_TO_SINT:
1249812531
case ISD::FP_TO_UINT:

llvm/lib/Target/PowerPC/PPCISelLowering.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1302,6 +1302,7 @@ namespace llvm {
13021302
SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
13031303
SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const;
13041304
SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
1305+
SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
13051306
SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
13061307
const SDLoc &dl) const;
13071308
SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;

llvm/test/CodeGen/PowerPC/f128-branch-cond.ll

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -291,6 +291,61 @@ final:
291291
ret i32 %result
292292
}
293293

294+
define i32 @test_choice5(fp128 %a) #0 {
295+
; LABEL: test_choice5
296+
; P8-LABEL: test_choice5:
297+
; P8: # %bb.0:
298+
; P8-NEXT: mflr 0
299+
; P8-NEXT: stdu 1, -32(1)
300+
; P8-NEXT: std 0, 48(1)
301+
; P8-NEXT: addis 3, 2, .LCPI4_0@toc@ha
302+
; P8-NEXT: addi 3, 3, .LCPI4_0@toc@l
303+
; P8-NEXT: lxvd2x 0, 0, 3
304+
; P8-NEXT: xxswapd 35, 0
305+
; P8-NEXT: bl __gtkf2
306+
; P8-NEXT: nop
307+
; P8-NEXT: # kill: def $r3 killed $r3 killed $x3
308+
; P8-NEXT: cmpwi 3, 0
309+
; P8-NEXT: bgt 0, .LBB4_2
310+
; P8-NEXT: b .LBB4_1
311+
; P8-NEXT: .LBB4_1: # %if.true
312+
; P8-NEXT: li 3, 1
313+
; P8-NEXT: addi 1, 1, 32
314+
; P8-NEXT: ld 0, 16(1)
315+
; P8-NEXT: mtlr 0
316+
; P8-NEXT: blr
317+
; P8-NEXT: .LBB4_2: # %if.false
318+
; P8-NEXT: li 3, 0
319+
; P8-NEXT: addi 1, 1, 32
320+
; P8-NEXT: ld 0, 16(1)
321+
; P8-NEXT: mtlr 0
322+
; P8-NEXT: blr
323+
;
324+
; P9-LABEL: test_choice5:
325+
; P9: # %bb.0:
326+
; P9-NEXT: addis 3, 2, .LCPI4_0@toc@ha
327+
; P9-NEXT: addi 3, 3, .LCPI4_0@toc@l
328+
; P9-NEXT: lxv 35, 0(3)
329+
; P9-NEXT: xscmpuqp 0, 2, 3
330+
; P9-NEXT: bgt 0, .LBB4_2
331+
; P9-NEXT: b .LBB4_1
332+
; P9-NEXT: .LBB4_1: # %if.true
333+
; P9-NEXT: li 3, 1
334+
; P9-NEXT: blr
335+
; P9-NEXT: .LBB4_2: # %if.false
336+
; P9-NEXT: li 3, 0
337+
; P9-NEXT: blr
338+
%cmp = fcmp ogt fp128 %a, 0xL00000000000000000000000000000000
339+
%not = icmp eq i1 %cmp, false
340+
br i1 %not, label %if.true, label %if.false
341+
342+
if.true:
343+
ret i32 1
344+
345+
if.false:
346+
ret i32 0
347+
}
348+
294349
attributes #0 = { nounwind }
295350

296351
declare i32 @foo()
Lines changed: 139 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,139 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc -mtriple=powerpc64le-unknown-linux -mcpu=pwr8 -O0 < %s | \
3+
; RUN: FileCheck %s -check-prefix=P8
4+
; RUN: llc -mtriple=powerpc64le-unknown-linux -mcpu=pwr9 -O0 < %s | \
5+
; RUN: FileCheck %s -check-prefix=P9
6+
7+
define i32 @test_choice1(half %a) #0 {
8+
; P8-LABEL: test_choice1:
9+
; P8: # %bb.0:
10+
; P8-NEXT: mflr 0
11+
; P8-NEXT: stdu 1, -32(1)
12+
; P8-NEXT: std 0, 48(1)
13+
; P8-NEXT: bl __truncsfhf2
14+
; P8-NEXT: nop
15+
; P8-NEXT: clrldi 3, 3, 48
16+
; P8-NEXT: bl __extendhfsf2
17+
; P8-NEXT: nop
18+
; P8-NEXT: xxlxor 0, 0, 0
19+
; P8-NEXT: fcmpu 0, 1, 0
20+
; P8-NEXT: bgt 0, .LBB0_2
21+
; P8-NEXT: b .LBB0_1
22+
; P8-NEXT: .LBB0_1: # %if.true
23+
; P8-NEXT: li 3, 1
24+
; P8-NEXT: addi 1, 1, 32
25+
; P8-NEXT: ld 0, 16(1)
26+
; P8-NEXT: mtlr 0
27+
; P8-NEXT: blr
28+
; P8-NEXT: .LBB0_2: # %if.false
29+
; P8-NEXT: li 3, 0
30+
; P8-NEXT: addi 1, 1, 32
31+
; P8-NEXT: ld 0, 16(1)
32+
; P8-NEXT: mtlr 0
33+
; P8-NEXT: blr
34+
;
35+
; P9-LABEL: test_choice1:
36+
; P9: # %bb.0:
37+
; P9-NEXT: xscvdphp 0, 1
38+
; P9-NEXT: mffprwz 3, 0
39+
; P9-NEXT: clrlwi 3, 3, 16
40+
; P9-NEXT: mtfprwz 0, 3
41+
; P9-NEXT: xscvhpdp 0, 0
42+
; P9-NEXT: xxlxor 1, 1, 1
43+
; P9-NEXT: fcmpu 0, 0, 1
44+
; P9-NEXT: bgt 0, .LBB0_2
45+
; P9-NEXT: b .LBB0_1
46+
; P9-NEXT: .LBB0_1: # %if.true
47+
; P9-NEXT: li 3, 1
48+
; P9-NEXT: blr
49+
; P9-NEXT: .LBB0_2: # %if.false
50+
; P9-NEXT: li 3, 0
51+
; P9-NEXT: blr
52+
%cmp = fcmp ogt half %a, 0.0
53+
%not = icmp eq i1 %cmp, false
54+
br i1 %not, label %if.true, label %if.false
55+
56+
if.true:
57+
ret i32 1
58+
59+
if.false:
60+
ret i32 0
61+
}
62+
63+
define i32 @test_choice2(half %a, half %b) #0 {
64+
; P8-LABEL: test_choice2:
65+
; P8: # %bb.0:
66+
; P8-NEXT: mflr 0
67+
; P8-NEXT: stdu 1, -64(1)
68+
; P8-NEXT: std 0, 80(1)
69+
; P8-NEXT: li 3, 52
70+
; P8-NEXT: stxsspx 2, 1, 3 # 4-byte Folded Spill
71+
; P8-NEXT: fmr 0, 1
72+
; P8-NEXT: lxsspx 1, 1, 3 # 4-byte Folded Reload
73+
; P8-NEXT: li 3, 56
74+
; P8-NEXT: stxsspx 0, 1, 3 # 4-byte Folded Spill
75+
; P8-NEXT: bl __truncsfhf2
76+
; P8-NEXT: nop
77+
; P8-NEXT: clrldi 3, 3, 48
78+
; P8-NEXT: bl __extendhfsf2
79+
; P8-NEXT: nop
80+
; P8-NEXT: fmr 0, 1
81+
; P8-NEXT: li 3, 56
82+
; P8-NEXT: lxsspx 1, 1, 3 # 4-byte Folded Reload
83+
; P8-NEXT: stfs 0, 60(1) # 4-byte Folded Spill
84+
; P8-NEXT: bl __truncsfhf2
85+
; P8-NEXT: nop
86+
; P8-NEXT: clrldi 3, 3, 48
87+
; P8-NEXT: bl __extendhfsf2
88+
; P8-NEXT: nop
89+
; P8-NEXT: lfs 0, 60(1) # 4-byte Folded Reload
90+
; P8-NEXT: fcmpu 0, 1, 0
91+
; P8-NEXT: bne 0, .LBB1_2
92+
; P8-NEXT: b .LBB1_1
93+
; P8-NEXT: .LBB1_1: # %if.true
94+
; P8-NEXT: li 3, 1
95+
; P8-NEXT: addi 1, 1, 64
96+
; P8-NEXT: ld 0, 16(1)
97+
; P8-NEXT: mtlr 0
98+
; P8-NEXT: blr
99+
; P8-NEXT: .LBB1_2: # %if.false
100+
; P8-NEXT: li 3, 0
101+
; P8-NEXT: addi 1, 1, 64
102+
; P8-NEXT: ld 0, 16(1)
103+
; P8-NEXT: mtlr 0
104+
; P8-NEXT: blr
105+
;
106+
; P9-LABEL: test_choice2:
107+
; P9: # %bb.0:
108+
; P9-NEXT: fmr 0, 1
109+
; P9-NEXT: xscvdphp 1, 2
110+
; P9-NEXT: mffprwz 3, 1
111+
; P9-NEXT: clrlwi 3, 3, 16
112+
; P9-NEXT: mtfprwz 1, 3
113+
; P9-NEXT: xscvhpdp 1, 1
114+
; P9-NEXT: xscvdphp 0, 0
115+
; P9-NEXT: mffprwz 3, 0
116+
; P9-NEXT: clrlwi 3, 3, 16
117+
; P9-NEXT: mtfprwz 0, 3
118+
; P9-NEXT: xscvhpdp 0, 0
119+
; P9-NEXT: fcmpu 0, 0, 1
120+
; P9-NEXT: bne 0, .LBB1_2
121+
; P9-NEXT: b .LBB1_1
122+
; P9-NEXT: .LBB1_1: # %if.true
123+
; P9-NEXT: li 3, 1
124+
; P9-NEXT: blr
125+
; P9-NEXT: .LBB1_2: # %if.false
126+
; P9-NEXT: li 3, 0
127+
; P9-NEXT: blr
128+
%cmp = fcmp une half %a, %b
129+
%not = xor i1 %cmp, true
130+
br i1 %not, label %if.true, label %if.false
131+
132+
if.true:
133+
ret i32 1
134+
135+
if.false:
136+
ret i32 0
137+
}
138+
139+
attributes #0 = { nounwind }

0 commit comments

Comments
 (0)