Skip to content

Commit 2ce586b

Browse files
Krzysztof Parzyszekquic-sanirudh
andcommitted
[Hexagon] Handle floating point splats
Co-authored-by: Anirudh Sundar Subramaniam <quic_sanirudh@quicinc.com>
1 parent 33fc675 commit 2ce586b

File tree

8 files changed

+205
-1
lines changed

8 files changed

+205
-1
lines changed

llvm/lib/Target/Hexagon/HexagonISelLowering.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -458,6 +458,7 @@ class HexagonTargetLowering : public TargetLowering {
458458
SelectionDAG &DAG) const;
459459

460460
SDValue LowerHvxBuildVector(SDValue Op, SelectionDAG &DAG) const;
461+
SDValue LowerHvxSplatVector(SDValue Op, SelectionDAG &DAG) const;
461462
SDValue LowerHvxConcatVectors(SDValue Op, SelectionDAG &DAG) const;
462463
SDValue LowerHvxExtractElement(SDValue Op, SelectionDAG &DAG) const;
463464
SDValue LowerHvxInsertElement(SDValue Op, SelectionDAG &DAG) const;

llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,9 @@ HexagonTargetLowering::initializeHVXLowering() {
9696
// BUILD_VECTOR with f16 operands cannot be promoted without
9797
// promoting the result, so lower the node to vsplat or constant pool
9898
setOperationAction(ISD::BUILD_VECTOR, MVT::f16, Custom);
99+
setOperationAction(ISD::SPLAT_VECTOR, MVT::f16, Custom);
100+
setOperationAction(ISD::SPLAT_VECTOR, MVT::v64f16, Legal);
101+
setOperationAction(ISD::SPLAT_VECTOR, MVT::v32f32, Legal);
99102

100103
// Custom-lower BUILD_VECTOR for vector pairs. The standard (target-
101104
// independent) handling of it would convert it to a load, which is
@@ -1299,6 +1302,24 @@ HexagonTargetLowering::LowerHvxBuildVector(SDValue Op, SelectionDAG &DAG)
12991302
return buildHvxVectorReg(Ops, dl, VecTy, DAG);
13001303
}
13011304

1305+
SDValue
1306+
HexagonTargetLowering::LowerHvxSplatVector(SDValue Op, SelectionDAG &DAG)
1307+
const {
1308+
const SDLoc &dl(Op);
1309+
MVT VecTy = ty(Op);
1310+
MVT ArgTy = ty(Op.getOperand(0));
1311+
1312+
if (ArgTy == MVT::f16) {
1313+
MVT SplatTy = MVT::getVectorVT(MVT::i16, VecTy.getVectorNumElements());
1314+
SDValue ToInt16 = DAG.getBitcast(MVT::i16, Op.getOperand(0));
1315+
SDValue ToInt32 = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, ToInt16);
1316+
SDValue Splat = DAG.getNode(ISD::SPLAT_VECTOR, dl, SplatTy, ToInt32);
1317+
return DAG.getBitcast(VecTy, Splat);
1318+
}
1319+
1320+
return SDValue();
1321+
}
1322+
13021323
SDValue
13031324
HexagonTargetLowering::LowerHvxConcatVectors(SDValue Op, SelectionDAG &DAG)
13041325
const {
@@ -2185,6 +2206,7 @@ HexagonTargetLowering::LowerHvxOperation(SDValue Op, SelectionDAG &DAG) const {
21852206
default:
21862207
break;
21872208
case ISD::BUILD_VECTOR: return LowerHvxBuildVector(Op, DAG);
2209+
case ISD::SPLAT_VECTOR: return LowerHvxSplatVector(Op, DAG);
21882210
case ISD::CONCAT_VECTORS: return LowerHvxConcatVectors(Op, DAG);
21892211
case ISD::INSERT_SUBVECTOR: return LowerHvxInsertSubvector(Op, DAG);
21902212
case ISD::INSERT_VECTOR_ELT: return LowerHvxInsertElement(Op, DAG);

llvm/lib/Target/Hexagon/HexagonPatterns.td

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -257,6 +257,9 @@ def anyimm3: PatLeaf<(i32 AnyImm3:$Addr)>;
257257

258258
def f32ImmPred : PatLeaf<(f32 fpimm:$F)>;
259259
def f64ImmPred : PatLeaf<(f64 fpimm:$F)>;
260+
def f32zero: PatLeaf<(f32 fpimm:$F), [{
261+
return N->isExactlyValue(APFloat::getZero(APFloat::IEEEsingle(), false));
262+
}]>;
260263

261264
// This complex pattern is really only to detect various forms of
262265
// sign-extension i32->i64. The selected value will be of type i64

llvm/lib/Target/Hexagon/HexagonPatternsHVX.td

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ def HexagonVPACKL: SDNode<"HexagonISD::VPACKL", SDTVecUnaryOp>;
6464
def HexagonVUNPACK: SDNode<"HexagonISD::VUNPACK", SDTVecUnaryOp>;
6565
def HexagonVUNPACKU: SDNode<"HexagonISD::VUNPACKU", SDTVecUnaryOp>;
6666

67-
def vzero: PatFrag<(ops), (splat_vector (i32 0))>;
67+
def vzero: PatFrags<(ops), [(splat_vector (i32 0)), (splat_vector (f32zero))]>;
6868
def qtrue: PatFrag<(ops), (HexagonQTRUE)>;
6969
def qfalse: PatFrag<(ops), (HexagonQFALSE)>;
7070
def qcat: PatFrag<(ops node:$Qs, node:$Qt),
@@ -265,10 +265,13 @@ let Predicates = [UseHVX] in {
265265
// These should be preferred over a vsplat of 0.
266266
def: Pat<(VecI8 vzero), (V6_vd0)>;
267267
def: Pat<(VecI16 vzero), (V6_vd0)>;
268+
def: Pat<(VecF16 vzero), (V6_vd0)>;
268269
def: Pat<(VecI32 vzero), (V6_vd0)>;
270+
def: Pat<(VecF32 vzero), (V6_vd0)>;
269271
def: Pat<(VecPI8 vzero), (PS_vdd0)>;
270272
def: Pat<(VecPI16 vzero), (PS_vdd0)>;
271273
def: Pat<(VecPI32 vzero), (PS_vdd0)>;
274+
def: Pat<(VecPF32 vzero), (PS_vdd0)>;
272275

273276
def: Pat<(concat_vectors (VecI8 vzero), (VecI8 vzero)), (PS_vdd0)>;
274277
def: Pat<(concat_vectors (VecI16 vzero), (VecI16 vzero)), (PS_vdd0)>;
@@ -363,6 +366,18 @@ let Predicates = [UseHVX,UseHVXV62] in {
363366
def: Pat<(VecPI32 (splat_vector I32:$Rs)), (Rep (V62splatrw $Rs))>;
364367
}
365368
}
369+
let Predicates = [UseHVXV68, UseHVXFloatingPoint] in {
370+
let AddedComplexity = 30 in {
371+
def: Pat<(VecF16 (splat_vector u16_0ImmPred:$V)), (V62splatih imm:$V)>;
372+
def: Pat<(VecF32 (splat_vector anyint:$V)), (V62splatiw imm:$V)>;
373+
def: Pat<(VecF32 (splat_vector f32ImmPred:$V)), (V62splatiw (ftoi $V))>;
374+
}
375+
let AddedComplexity = 20 in {
376+
def: Pat<(VecF16 (splat_vector I32:$Rs)), (V62splatrh $Rs)>;
377+
def: Pat<(VecF32 (splat_vector I32:$Rs)), (V62splatrw $Rs)>;
378+
def: Pat<(VecF32 (splat_vector F32:$Rs)), (V62splatrw $Rs)>;
379+
}
380+
}
366381

367382
class Vneg1<ValueType VecTy>
368383
: PatFrag<(ops), (VecTy (splat_vector (i32 -1)))>;
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
; RUN: llc -mtriple=hexagon < %s | FileCheck %s
2+
3+
; Check that the vsplat instruction is generated
4+
; CHECK: r[[V:[0-9]+]] = ##1092616192
5+
; CHECK: vsplat(r[[V]])
6+
7+
target datalayout = "e-m:e-p:32:32:32-a:0-n16:32-i64:64:64-i32:32:32-i16:16:16-i1:8:8-f32:32:32-f64:64:64-v32:32:32-v64:64:64-v512:512:512-v1024:1024:1024-v2048:2048:2048"
8+
target triple = "hexagon"
9+
; Function Attrs: nofree norecurse nounwind writeonly
10+
define dso_local i32 @foo(float* nocapture %0, i32 %1) local_unnamed_addr #0 {
11+
%3 = icmp sgt i32 %1, 0
12+
br i1 %3, label %4, label %22
13+
14+
4: ; preds = %2
15+
%5 = icmp ult i32 %1, 64
16+
br i1 %5, label %6, label %9
17+
18+
6: ; preds = %20, %4
19+
%7 = phi float* [ %0, %4 ], [ %11, %20 ]
20+
%8 = phi i32 [ 0, %4 ], [ %10, %20 ]
21+
br label %23
22+
23+
9: ; preds = %4
24+
%10 = and i32 %1, -64
25+
%11 = getelementptr float, float* %0, i32 %10
26+
br label %12
27+
28+
12: ; preds = %12, %9
29+
%13 = phi i32 [ 0, %9 ], [ %18, %12 ]
30+
%14 = getelementptr float, float* %0, i32 %13
31+
%15 = bitcast float* %14 to <32 x float>*
32+
store <32 x float> <float 1.000000e+01, float 1.000000e+01, float 1.000000e+01, float 1.000000e+01, float 1.000000e+01, float 1.000000e+01, float 1.000000e+01, float 1.000000e+01, float 1.000000e+01, float 1.000000e+01, float 1.000000e+01, float 1.000000e+01, float 1.000000e+01, float 1.000000e+01, float 1.000000e+01, float 1.000000e+01, float 1.000000e+01, float 1.000000e+01, float 1.000000e+01, float 1.000000e+01, float 1.000000e+01, float 1.000000e+01, float 1.000000e+01, float 1.000000e+01, float 1.000000e+01, float 1.000000e+01, float 1.000000e+01, float 1.000000e+01, float 1.000000e+01, float 1.000000e+01, float 1.000000e+01, float 1.000000e+01>, <32 x float>* %15, align 4
33+
%16 = getelementptr float, float* %14, i32 32
34+
%17 = bitcast float* %16 to <32 x float>*
35+
store <32 x float> <float 1.000000e+01, float 1.000000e+01, float 1.000000e+01, float 1.000000e+01, float 1.000000e+01, float 1.000000e+01, float 1.000000e+01, float 1.000000e+01, float 1.000000e+01, float 1.000000e+01, float 1.000000e+01, float 1.000000e+01, float 1.000000e+01, float 1.000000e+01, float 1.000000e+01, float 1.000000e+01, float 1.000000e+01, float 1.000000e+01, float 1.000000e+01, float 1.000000e+01, float 1.000000e+01, float 1.000000e+01, float 1.000000e+01, float 1.000000e+01, float 1.000000e+01, float 1.000000e+01, float 1.000000e+01, float 1.000000e+01, float 1.000000e+01, float 1.000000e+01, float 1.000000e+01, float 1.000000e+01>, <32 x float>* %17, align 4
36+
%18 = add i32 %13, 64
37+
%19 = icmp eq i32 %18, %10
38+
br i1 %19, label %20, label %12
39+
40+
20: ; preds = %12
41+
%21 = icmp eq i32 %10, %1
42+
br i1 %21, label %22, label %6
43+
44+
22: ; preds = %23, %20, %2
45+
ret i32 0
46+
47+
23: ; preds = %23, %6
48+
%24 = phi float* [ %28, %23 ], [ %7, %6 ]
49+
%25 = phi i32 [ %26, %23 ], [ %8, %6 ]
50+
store float 1.000000e+01, float* %24, align 4
51+
%26 = add nuw nsw i32 %25, 1
52+
%27 = icmp eq i32 %26, %1
53+
%28 = getelementptr float, float* %24, i32 1
54+
br i1 %27, label %22, label %23
55+
}
56+
57+
attributes #0 = { nofree norecurse nounwind writeonly "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="hexagonv69" "target-features"="+hvx-length128b,+hvxv69,+v69,-long-calls" "unsafe-fp-math"="false" "use-soft-float"="false" }
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
; RUN: llc -mtriple=hexagon < %s | FileCheck %s
2+
3+
; Check that the vsplat instruction is generated
4+
; CHECK: .word 1097875824
5+
; CHECK: .word 1048133241
6+
; CHECK: .word 0
7+
8+
target datalayout = "e-m:e-p:32:32:32-a:0-n16:32-i64:64:64-i32:32:32-i16:16:16-i1:8:8-f32:32:32-f64:64:64-v32:32:32-v64:64:64-v512:512:512-v1024:1024:1024-v2048:2048:2048"
9+
target triple = "hexagon"
10+
; Function Attrs: nofree norecurse nounwind writeonly
11+
define dso_local i32 @foo(half* nocapture %a) local_unnamed_addr #0 {
12+
vector.body:
13+
%0 = bitcast half* %a to <40 x half>*
14+
store <40 x half> <half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH3E79, half 0xH3E79, half 0xH3E79, half 0xH3E79, half 0xH3E79, half 0xH3E79, half 0xH3E79, half 0xH3E79, half 0xH3E79>, <40 x half>* %0, align 2
15+
ret i32 0
16+
}
17+
18+
attributes #0 = { nofree norecurse nounwind writeonly "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="hexagonv69" "target-features"="+hvx-length128b,+hvxv69,+v69,-long-calls" "unsafe-fp-math"="false" "use-soft-float"="false" }
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
; RUN: llc -mtriple=hexagon < %s | FileCheck %s
2+
3+
; Check that the vsplat instruction is generated
4+
; CHECK: r[[V:[0-9]+]] = #16752
5+
; CHECK: vsplat(r[[V]])
6+
7+
target datalayout = "e-m:e-p:32:32:32-a:0-n16:32-i64:64:64-i32:32:32-i16:16:16-i1:8:8-f32:32:32-f64:64:64-v32:32:32-v64:64:64-v512:512:512-v1024:1024:1024-v2048:2048:2048"
8+
target triple = "hexagon"
9+
; Function Attrs: nofree norecurse nounwind writeonly
10+
define dso_local i32 @foo(half* nocapture %0, i32 %1) local_unnamed_addr #0 {
11+
%3 = icmp sgt i32 %1, 0
12+
br i1 %3, label %4, label %22
13+
14+
4: ; preds = %2
15+
%5 = icmp ult i32 %1, 128
16+
br i1 %5, label %6, label %9
17+
18+
6: ; preds = %20, %4
19+
%7 = phi half* [ %0, %4 ], [ %11, %20 ]
20+
%8 = phi i32 [ 0, %4 ], [ %10, %20 ]
21+
br label %23
22+
23+
9: ; preds = %4
24+
%10 = and i32 %1, -128
25+
%11 = getelementptr half, half* %0, i32 %10
26+
br label %12
27+
28+
12: ; preds = %12, %9
29+
%13 = phi i32 [ 0, %9 ], [ %18, %12 ]
30+
%14 = getelementptr half, half* %0, i32 %13
31+
%15 = bitcast half* %14 to <64 x half>*
32+
store <64 x half> <half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170>, <64 x half>* %15, align 2
33+
%16 = getelementptr half, half* %14, i32 64
34+
%17 = bitcast half* %16 to <64 x half>*
35+
store <64 x half> <half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170, half 0xH4170>, <64 x half>* %17, align 2
36+
%18 = add i32 %13, 128
37+
%19 = icmp eq i32 %18, %10
38+
br i1 %19, label %20, label %12
39+
40+
20: ; preds = %12
41+
%21 = icmp eq i32 %10, %1
42+
br i1 %21, label %22, label %6
43+
44+
22: ; preds = %23, %20, %2
45+
ret i32 0
46+
47+
23: ; preds = %23, %6
48+
%24 = phi half* [ %28, %23 ], [ %7, %6 ]
49+
%25 = phi i32 [ %26, %23 ], [ %8, %6 ]
50+
store half 0xH4170, half* %24, align 2
51+
%26 = add nuw nsw i32 %25, 1
52+
%27 = icmp eq i32 %26, %1
53+
%28 = getelementptr half, half* %24, i32 1
54+
br i1 %27, label %22, label %23
55+
}
56+
57+
attributes #0 = { nofree norecurse nounwind writeonly "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="hexagonv69" "target-features"="+hvx-length128b,+hvxv69,+v69,-long-calls" "unsafe-fp-math"="false" "use-soft-float"="false" }

llvm/test/CodeGen/Hexagon/autohvx/splat.ll

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -397,5 +397,36 @@ define <64 x i32> @f23(i32 %a0) #1 {
397397
ret <64 x i32> %v1
398398
}
399399

400+
; Splat register, 16 bit fp, v68+
401+
define <64 x half> @f24(i16 %a0) #2 {
402+
; CHECK-LABEL: f24:
403+
; CHECK: // %bb.0:
404+
; CHECK-NEXT: {
405+
; CHECK-NEXT: v0.h = vsplat(r1)
406+
; CHECK-NEXT: jumpr r31
407+
; CHECK-NEXT: vmem(r0+#0) = v0.new
408+
; CHECK-NEXT: }
409+
%v0 = bitcast i16 %a0 to half
410+
%v1 = insertelement <64 x half> undef, half %v0, i32 0
411+
%v2 = shufflevector <64 x half> %v1, <64 x half> undef, <64 x i32> zeroinitializer
412+
ret <64 x half> %v2
413+
}
414+
415+
; Splat register, 32 bit fp, v68+
416+
define <32 x float> @f25(float %a0) #2 {
417+
; CHECK-LABEL: f25:
418+
; CHECK: // %bb.0:
419+
; CHECK-NEXT: {
420+
; CHECK-NEXT: v0 = vsplat(r1)
421+
; CHECK-NEXT: jumpr r31
422+
; CHECK-NEXT: vmem(r0+#0) = v0.new
423+
; CHECK-NEXT: }
424+
%v0 = insertelement <32 x float> undef, float %a0, i32 0
425+
%v1 = shufflevector <32 x float> %v0, <32 x float> undef, <32 x i32> zeroinitializer
426+
ret <32 x float> %v1
427+
}
428+
429+
400430
attributes #0 = { nounwind readnone "target-cpu"="hexagonv60" "target-features"="+hvxv60,+hvx-length128b" }
401431
attributes #1 = { nounwind readnone "target-cpu"="hexagonv62" "target-features"="+hvxv62,+hvx-length128b" }
432+
attributes #2 = { nounwind readnone "target-cpu"="hexagonv69" "target-features"="+hvxv69,+hvx-length128b,+hvx-qfloat" }

0 commit comments

Comments
 (0)