Skip to content

Commit b9f010f

Browse files
[LLVM][CodeGen][AArch64] Don't scalarise v8{f16,bf16} vsetcc operations.
I have also removed custom promotion code for the v4{f16,bf16} cases because the same common code can be used.
1 parent 396e2ef commit b9f010f

File tree

7 files changed

+435
-2053
lines changed

7 files changed

+435
-2053
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

+11-25
Original file line numberDiff line numberDiff line change
@@ -841,18 +841,21 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
841841
setOperationPromotedToType(ISD::FRINT, V4Narrow, MVT::v4f32);
842842
setOperationPromotedToType(ISD::FNEARBYINT, V4Narrow, MVT::v4f32);
843843
setOperationPromotedToType(ISD::FCANONICALIZE, V4Narrow, MVT::v4f32);
844+
setOperationPromotedToType(ISD::SETCC, V4Narrow, MVT::v4f32);
844845

845846
setOperationAction(ISD::FABS, V4Narrow, Legal);
846-
setOperationAction(ISD::FNEG, V4Narrow, Legal);
847+
setOperationAction(ISD::FNEG, V4Narrow, Legal);
847848
setOperationAction(ISD::FMA, V4Narrow, Expand);
848-
setOperationAction(ISD::SETCC, V4Narrow, Custom);
849849
setOperationAction(ISD::BR_CC, V4Narrow, Expand);
850850
setOperationAction(ISD::SELECT, V4Narrow, Expand);
851851
setOperationAction(ISD::SELECT_CC, V4Narrow, Expand);
852852
setOperationAction(ISD::FCOPYSIGN, V4Narrow, Custom);
853853
setOperationAction(ISD::FSQRT, V4Narrow, Expand);
854854

855855
auto V8Narrow = MVT::getVectorVT(ScalarVT, 8);
856+
setOperationPromotedToType(ISD::FCANONICALIZE, V8Narrow, MVT::v8f32);
857+
setOperationPromotedToType(ISD::SETCC, V8Narrow, MVT::v8f32);
858+
856859
setOperationAction(ISD::FABS, V8Narrow, Legal);
857860
setOperationAction(ISD::FADD, V8Narrow, Legal);
858861
setOperationAction(ISD::FCEIL, V8Narrow, Legal);
@@ -862,19 +865,17 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
862865
setOperationAction(ISD::FMA, V8Narrow, Expand);
863866
setOperationAction(ISD::FMUL, V8Narrow, Legal);
864867
setOperationAction(ISD::FNEARBYINT, V8Narrow, Legal);
865-
setOperationAction(ISD::FNEG, V8Narrow, Legal);
868+
setOperationAction(ISD::FNEG, V8Narrow, Legal);
866869
setOperationAction(ISD::FROUND, V8Narrow, Legal);
867870
setOperationAction(ISD::FROUNDEVEN, V8Narrow, Legal);
868871
setOperationAction(ISD::FRINT, V8Narrow, Legal);
869872
setOperationAction(ISD::FSQRT, V8Narrow, Expand);
870873
setOperationAction(ISD::FSUB, V8Narrow, Legal);
871874
setOperationAction(ISD::FTRUNC, V8Narrow, Legal);
872-
setOperationAction(ISD::SETCC, V8Narrow, Expand);
873875
setOperationAction(ISD::BR_CC, V8Narrow, Expand);
874876
setOperationAction(ISD::SELECT, V8Narrow, Expand);
875877
setOperationAction(ISD::SELECT_CC, V8Narrow, Expand);
876878
setOperationAction(ISD::FP_EXTEND, V8Narrow, Expand);
877-
setOperationPromotedToType(ISD::FCANONICALIZE, V8Narrow, MVT::v8f32);
878879
};
879880

880881
if (!Subtarget->hasFullFP16()) {
@@ -15905,6 +15906,11 @@ SDValue AArch64TargetLowering::LowerVSETCC(SDValue Op,
1590515906
if (LHS.getValueType().getVectorElementType().isInteger())
1590615907
return Op;
1590715908

15909+
assert((!Subtarget->hasFullFP16() &&
15910+
LHS.getValueType().getVectorElementType() != MVT::f16) ||
15911+
LHS.getValueType().getVectorElementType() != MVT::bf16 ||
15912+
LHS.getValueType().getVectorElementType() != MVT::f128);
15913+
1590815914
// Lower isnan(x) | isnan(never-nan) to x != x.
1590915915
// Lower !isnan(x) & !isnan(never-nan) to x == x.
1591015916
if (CC == ISD::SETUO || CC == ISD::SETO) {
@@ -15923,26 +15929,6 @@ SDValue AArch64TargetLowering::LowerVSETCC(SDValue Op,
1592315929
}
1592415930
}
1592515931

15926-
const bool FullFP16 = DAG.getSubtarget<AArch64Subtarget>().hasFullFP16();
15927-
15928-
// Make v4f16 (only) fcmp operations utilise vector instructions
15929-
// v8f16 support will be a litle more complicated
15930-
if ((!FullFP16 && LHS.getValueType().getVectorElementType() == MVT::f16) ||
15931-
LHS.getValueType().getVectorElementType() == MVT::bf16) {
15932-
if (LHS.getValueType().getVectorNumElements() == 4) {
15933-
LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::v4f32, LHS);
15934-
RHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::v4f32, RHS);
15935-
SDValue NewSetcc = DAG.getSetCC(dl, MVT::v4i16, LHS, RHS, CC);
15936-
DAG.ReplaceAllUsesWith(Op, NewSetcc);
15937-
CmpVT = MVT::v4i32;
15938-
} else
15939-
return SDValue();
15940-
}
15941-
15942-
assert((!FullFP16 && LHS.getValueType().getVectorElementType() != MVT::f16) ||
15943-
LHS.getValueType().getVectorElementType() != MVT::bf16 ||
15944-
LHS.getValueType().getVectorElementType() != MVT::f128);
15945-
1594615932
// Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't totally
1594715933
// clean. Some of them require two branches to implement.
1594815934
AArch64CC::CondCode CC1, CC2;

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

+3-1
Original file line numberDiff line numberDiff line change
@@ -4236,9 +4236,11 @@ InstructionCost AArch64TTIImpl::getCmpSelInstrCost(
42364236

42374237
if (isa<FixedVectorType>(ValTy) && ISD == ISD::SETCC) {
42384238
auto LT = getTypeLegalizationCost(ValTy);
4239-
// Cost v4f16 FCmp without FP16 support via converting to v4f32 and back.
4239+
// Cost v#f16 FCmp without FP16 support via converting to v#f32 and back.
42404240
if (LT.second == MVT::v4f16 && !ST->hasFullFP16())
42414241
return LT.first * 4; // fcvtl + fcvtl + fcmp + xtn
4242+
if (LT.second == MVT::v8f16 && !ST->hasFullFP16())
4243+
return LT.first * 8; // 2*(fcvtl + fcvtl2 + fcmp) + uzp1 + xtn
42424244
}
42434245

42444246
// Treat the icmp in icmp(and, 0) as free, as we can make use of ands.

llvm/test/Analysis/CostModel/AArch64/cmp.ll

+1-1
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ define void @cmps() {
1616
; CHECK-NEXT: Cost Model: Found costs of 1 for: %cf16 = fcmp oge half undef, undef
1717
; CHECK-NEXT: Cost Model: Found costs of 1 for: %cf32 = fcmp ogt float undef, undef
1818
; CHECK-NEXT: Cost Model: Found costs of 1 for: %cf64 = fcmp ogt double undef, undef
19-
; CHECK-NEXT: Cost Model: Found costs of RThru:22 CodeSize:1 Lat:1 SizeLat:1 for: %cfv816 = fcmp olt <8 x half> undef, undef
19+
; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %cfv816 = fcmp olt <8 x half> undef, undef
2020
; CHECK-NEXT: Cost Model: Found costs of 1 for: %cfv432 = fcmp oge <4 x float> undef, undef
2121
; CHECK-NEXT: Cost Model: Found costs of 1 for: %cfv264 = fcmp oge <2 x double> undef, undef
2222
; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void

llvm/test/Analysis/CostModel/AArch64/vector-select.ll

+7-7
Original file line numberDiff line numberDiff line change
@@ -119,7 +119,7 @@ define <4 x half> @v4f16_select_ogt(<4 x half> %a, <4 x half> %b, <4 x half> %c)
119119

120120
define <8 x half> @v8f16_select_ogt(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
121121
; COST-NOFP16-LABEL: 'v8f16_select_ogt'
122-
; COST-NOFP16-NEXT: Cost Model: Found costs of RThru:22 CodeSize:1 Lat:1 SizeLat:1 for: %cmp.1 = fcmp ogt <8 x half> %a, %b
122+
; COST-NOFP16-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %cmp.1 = fcmp ogt <8 x half> %a, %b
123123
; COST-NOFP16-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c
124124
; COST-NOFP16-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x half> %s.1
125125
;
@@ -184,7 +184,7 @@ define <4 x half> @v4f16_select_oge(<4 x half> %a, <4 x half> %b, <4 x half> %c)
184184

185185
define <8 x half> @v8f16_select_oge(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
186186
; COST-NOFP16-LABEL: 'v8f16_select_oge'
187-
; COST-NOFP16-NEXT: Cost Model: Found costs of RThru:22 CodeSize:1 Lat:1 SizeLat:1 for: %cmp.1 = fcmp oge <8 x half> %a, %b
187+
; COST-NOFP16-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %cmp.1 = fcmp oge <8 x half> %a, %b
188188
; COST-NOFP16-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c
189189
; COST-NOFP16-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x half> %s.1
190190
;
@@ -249,7 +249,7 @@ define <4 x half> @v4f16_select_olt(<4 x half> %a, <4 x half> %b, <4 x half> %c)
249249

250250
define <8 x half> @v8f16_select_olt(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
251251
; COST-NOFP16-LABEL: 'v8f16_select_olt'
252-
; COST-NOFP16-NEXT: Cost Model: Found costs of RThru:22 CodeSize:1 Lat:1 SizeLat:1 for: %cmp.1 = fcmp olt <8 x half> %a, %b
252+
; COST-NOFP16-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %cmp.1 = fcmp olt <8 x half> %a, %b
253253
; COST-NOFP16-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c
254254
; COST-NOFP16-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x half> %s.1
255255
;
@@ -314,7 +314,7 @@ define <4 x half> @v4f16_select_ole(<4 x half> %a, <4 x half> %b, <4 x half> %c)
314314

315315
define <8 x half> @v8f16_select_ole(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
316316
; COST-NOFP16-LABEL: 'v8f16_select_ole'
317-
; COST-NOFP16-NEXT: Cost Model: Found costs of RThru:22 CodeSize:1 Lat:1 SizeLat:1 for: %cmp.1 = fcmp ole <8 x half> %a, %b
317+
; COST-NOFP16-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %cmp.1 = fcmp ole <8 x half> %a, %b
318318
; COST-NOFP16-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c
319319
; COST-NOFP16-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x half> %s.1
320320
;
@@ -379,7 +379,7 @@ define <4 x half> @v4f16_select_oeq(<4 x half> %a, <4 x half> %b, <4 x half> %c)
379379

380380
define <8 x half> @v8f16_select_oeq(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
381381
; COST-NOFP16-LABEL: 'v8f16_select_oeq'
382-
; COST-NOFP16-NEXT: Cost Model: Found costs of RThru:22 CodeSize:1 Lat:1 SizeLat:1 for: %cmp.1 = fcmp oeq <8 x half> %a, %b
382+
; COST-NOFP16-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %cmp.1 = fcmp oeq <8 x half> %a, %b
383383
; COST-NOFP16-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c
384384
; COST-NOFP16-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x half> %s.1
385385
;
@@ -444,7 +444,7 @@ define <4 x half> @v4f16_select_one(<4 x half> %a, <4 x half> %b, <4 x half> %c)
444444

445445
define <8 x half> @v8f16_select_one(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
446446
; COST-NOFP16-LABEL: 'v8f16_select_one'
447-
; COST-NOFP16-NEXT: Cost Model: Found costs of RThru:22 CodeSize:1 Lat:1 SizeLat:1 for: %cmp.1 = fcmp one <8 x half> %a, %b
447+
; COST-NOFP16-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %cmp.1 = fcmp one <8 x half> %a, %b
448448
; COST-NOFP16-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c
449449
; COST-NOFP16-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x half> %s.1
450450
;
@@ -513,7 +513,7 @@ define <4 x half> @v4f16_select_une(<4 x half> %a, <4 x half> %b, <4 x half> %c)
513513

514514
define <8 x half> @v8f16_select_une(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
515515
; COST-NOFP16-LABEL: 'v8f16_select_une'
516-
; COST-NOFP16-NEXT: Cost Model: Found costs of RThru:22 CodeSize:1 Lat:1 SizeLat:1 for: %cmp.1 = fcmp une <8 x half> %a, %b
516+
; COST-NOFP16-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %cmp.1 = fcmp une <8 x half> %a, %b
517517
; COST-NOFP16-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c
518518
; COST-NOFP16-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x half> %s.1
519519
;

0 commit comments

Comments
 (0)