Skip to content

Commit b2f3863

Browse files
[Hexagon] Added v32i1/v64i1 to v32f32/v64f16 lowering (#159355)
This patch introduces uint_to_fp conversions from v32i1 and v64i1 predicate vectors to v32f32 and v64f16 floating-point vectors. Patch-by: Santanu Das Co-authored-by: quic-santdas <quic_santdas@quicinc.com>
1 parent 8ed4899 commit b2f3863

File tree

5 files changed

+182
-1
lines changed

5 files changed

+182
-1
lines changed

llvm/lib/Target/Hexagon/HexagonISelLowering.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3352,7 +3352,6 @@ HexagonTargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const {
33523352
SDValue
33533353
HexagonTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
33543354
unsigned Opc = Op.getOpcode();
3355-
33563355
// Handle INLINEASM first.
33573356
if (Opc == ISD::INLINEASM || Opc == ISD::INLINEASM_BR)
33583357
return LowerINLINEASM(Op, DAG);

llvm/lib/Target/Hexagon/HexagonISelLowering.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -577,6 +577,8 @@ class HexagonTargetLowering : public TargetLowering {
577577
SDValue LowerHvxFpExtend(SDValue Op, SelectionDAG &DAG) const;
578578
SDValue LowerHvxFpToInt(SDValue Op, SelectionDAG &DAG) const;
579579
SDValue LowerHvxIntToFp(SDValue Op, SelectionDAG &DAG) const;
580+
SDValue LowerHvxPred32ToFp(SDValue Op, SelectionDAG &DAG) const;
581+
SDValue LowerHvxPred64ToFp(SDValue Op, SelectionDAG &DAG) const;
580582
SDValue ExpandHvxFpToInt(SDValue Op, SelectionDAG &DAG) const;
581583
SDValue ExpandHvxIntToFp(SDValue Op, SelectionDAG &DAG) const;
582584

llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp

Lines changed: 128 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -446,6 +446,10 @@ HexagonTargetLowering::initializeHVXLowering() {
446446
}
447447
}
448448

449+
// Include cases which are not hander earlier
450+
setOperationAction(ISD::UINT_TO_FP, MVT::v32i1, Custom);
451+
setOperationAction(ISD::UINT_TO_FP, MVT::v64i1, Custom);
452+
449453
setTargetDAGCombine({ISD::CONCAT_VECTORS, ISD::TRUNCATE, ISD::VSELECT});
450454
}
451455

@@ -2333,6 +2337,123 @@ HexagonTargetLowering::LowerHvxFpToInt(SDValue Op, SelectionDAG &DAG) const {
23332337
return ExpandHvxFpToInt(Op, DAG);
23342338
}
23352339

2340+
// For vector type v32i1 uint_to_fp to v32f32:
2341+
// R1 = #1, R2 holds the v32i1 param
2342+
// V1 = vsplat(R1)
2343+
// V2 = vsplat(R2)
2344+
// Q0 = vand(V1,R1)
2345+
// V0.w=prefixsum(Q0)
2346+
// V0.w=vsub(V0.w,V1.w)
2347+
// V2.w = vlsr(V2.w,V0.w)
2348+
// V2 = vand(V2,V1)
2349+
// V2.sf = V2.w
2350+
SDValue HexagonTargetLowering::LowerHvxPred32ToFp(SDValue PredOp,
2351+
SelectionDAG &DAG) const {
2352+
2353+
MVT ResTy = ty(PredOp);
2354+
const SDLoc &dl(PredOp);
2355+
2356+
SDValue Const = DAG.getTargetConstant(0x1, dl, MVT::i32);
2357+
SDNode *RegConst = DAG.getMachineNode(Hexagon::A2_tfrsi, dl, MVT::i32, Const);
2358+
SDNode *SplatConst = DAG.getMachineNode(Hexagon::V6_lvsplatw, dl, MVT::v32i32,
2359+
SDValue(RegConst, 0));
2360+
SDNode *PredTransfer =
2361+
DAG.getMachineNode(Hexagon::V6_vandvrt, dl, MVT::v32i1,
2362+
SDValue(SplatConst, 0), SDValue(RegConst, 0));
2363+
SDNode *PrefixSum = DAG.getMachineNode(Hexagon::V6_vprefixqw, dl, MVT::v32i32,
2364+
SDValue(PredTransfer, 0));
2365+
SDNode *SplatParam = DAG.getMachineNode(
2366+
Hexagon::V6_lvsplatw, dl, MVT::v32i32,
2367+
DAG.getNode(ISD::BITCAST, dl, MVT::i32, PredOp.getOperand(0)));
2368+
SDNode *Vsub =
2369+
DAG.getMachineNode(Hexagon::V6_vsubw, dl, MVT::v32i32,
2370+
SDValue(PrefixSum, 0), SDValue(SplatConst, 0));
2371+
SDNode *IndexShift =
2372+
DAG.getMachineNode(Hexagon::V6_vlsrwv, dl, MVT::v32i32,
2373+
SDValue(SplatParam, 0), SDValue(Vsub, 0));
2374+
SDNode *MaskOff =
2375+
DAG.getMachineNode(Hexagon::V6_vand, dl, MVT::v32i32,
2376+
SDValue(IndexShift, 0), SDValue(SplatConst, 0));
2377+
SDNode *Convert = DAG.getMachineNode(Hexagon::V6_vconv_sf_w, dl, ResTy,
2378+
SDValue(MaskOff, 0));
2379+
return SDValue(Convert, 0);
2380+
}
2381+
2382+
// For vector type v64i1 uint_to_fo to v64f16:
2383+
// i64 R32 = bitcast v64i1 R3:2 (R3:2 holds v64i1)
2384+
// R3 = subreg_high (R32)
2385+
// R2 = subreg_low (R32)
2386+
// R1 = #1
2387+
// V1 = vsplat(R1)
2388+
// V2 = vsplat(R2)
2389+
// V3 = vsplat(R3)
2390+
// Q0 = vand(V1,R1)
2391+
// V0.w=prefixsum(Q0)
2392+
// V0.w=vsub(V0.w,V1.w)
2393+
// V2.w = vlsr(V2.w,V0.w)
2394+
// V3.w = vlsr(V3.w,V0.w)
2395+
// V2 = vand(V2,V1)
2396+
// V3 = vand(V3,V1)
2397+
// V2.h = vpacke(V3.w,V2.w)
2398+
// V2.hf = V2.h
2399+
SDValue HexagonTargetLowering::LowerHvxPred64ToFp(SDValue PredOp,
2400+
SelectionDAG &DAG) const {
2401+
2402+
MVT ResTy = ty(PredOp);
2403+
const SDLoc &dl(PredOp);
2404+
2405+
SDValue Inp = DAG.getNode(ISD::BITCAST, dl, MVT::i64, PredOp.getOperand(0));
2406+
// Get the hi and lo regs
2407+
SDValue HiReg =
2408+
DAG.getTargetExtractSubreg(Hexagon::isub_hi, dl, MVT::i32, Inp);
2409+
SDValue LoReg =
2410+
DAG.getTargetExtractSubreg(Hexagon::isub_lo, dl, MVT::i32, Inp);
2411+
// Get constant #1 and splat into vector V1
2412+
SDValue Const = DAG.getTargetConstant(0x1, dl, MVT::i32);
2413+
SDNode *RegConst = DAG.getMachineNode(Hexagon::A2_tfrsi, dl, MVT::i32, Const);
2414+
SDNode *SplatConst = DAG.getMachineNode(Hexagon::V6_lvsplatw, dl, MVT::v32i32,
2415+
SDValue(RegConst, 0));
2416+
// Splat the hi and lo args
2417+
SDNode *SplatHi =
2418+
DAG.getMachineNode(Hexagon::V6_lvsplatw, dl, MVT::v32i32,
2419+
DAG.getNode(ISD::BITCAST, dl, MVT::i32, HiReg));
2420+
SDNode *SplatLo =
2421+
DAG.getMachineNode(Hexagon::V6_lvsplatw, dl, MVT::v32i32,
2422+
DAG.getNode(ISD::BITCAST, dl, MVT::i32, LoReg));
2423+
// vand between splatted const and const
2424+
SDNode *PredTransfer =
2425+
DAG.getMachineNode(Hexagon::V6_vandvrt, dl, MVT::v32i1,
2426+
SDValue(SplatConst, 0), SDValue(RegConst, 0));
2427+
// Get the prefixsum
2428+
SDNode *PrefixSum = DAG.getMachineNode(Hexagon::V6_vprefixqw, dl, MVT::v32i32,
2429+
SDValue(PredTransfer, 0));
2430+
// Get the vsub
2431+
SDNode *Vsub =
2432+
DAG.getMachineNode(Hexagon::V6_vsubw, dl, MVT::v32i32,
2433+
SDValue(PrefixSum, 0), SDValue(SplatConst, 0));
2434+
// Get vlsr for hi and lo
2435+
SDNode *IndexShift_hi =
2436+
DAG.getMachineNode(Hexagon::V6_vlsrwv, dl, MVT::v32i32,
2437+
SDValue(SplatHi, 0), SDValue(Vsub, 0));
2438+
SDNode *IndexShift_lo =
2439+
DAG.getMachineNode(Hexagon::V6_vlsrwv, dl, MVT::v32i32,
2440+
SDValue(SplatLo, 0), SDValue(Vsub, 0));
2441+
// Get vand of hi and lo
2442+
SDNode *MaskOff_hi =
2443+
DAG.getMachineNode(Hexagon::V6_vand, dl, MVT::v32i32,
2444+
SDValue(IndexShift_hi, 0), SDValue(SplatConst, 0));
2445+
SDNode *MaskOff_lo =
2446+
DAG.getMachineNode(Hexagon::V6_vand, dl, MVT::v32i32,
2447+
SDValue(IndexShift_lo, 0), SDValue(SplatConst, 0));
2448+
// Pack them
2449+
SDNode *Pack =
2450+
DAG.getMachineNode(Hexagon::V6_vpackeh, dl, MVT::v64i16,
2451+
SDValue(MaskOff_hi, 0), SDValue(MaskOff_lo, 0));
2452+
SDNode *Convert =
2453+
DAG.getMachineNode(Hexagon::V6_vconv_hf_h, dl, ResTy, SDValue(Pack, 0));
2454+
return SDValue(Convert, 0);
2455+
}
2456+
23362457
SDValue
23372458
HexagonTargetLowering::LowerHvxIntToFp(SDValue Op, SelectionDAG &DAG) const {
23382459
// Catch invalid conversion ops (just in case).
@@ -2343,6 +2464,13 @@ HexagonTargetLowering::LowerHvxIntToFp(SDValue Op, SelectionDAG &DAG) const {
23432464
MVT IntTy = ty(Op.getOperand(0)).getVectorElementType();
23442465
MVT FpTy = ResTy.getVectorElementType();
23452466

2467+
if (Op.getOpcode() == ISD::UINT_TO_FP) {
2468+
if (ResTy == MVT::v32f32 && ty(Op.getOperand(0)) == MVT::v32i1)
2469+
return LowerHvxPred32ToFp(Op, DAG);
2470+
if (ResTy == MVT::v64f16 && ty(Op.getOperand(0)) == MVT::v64i1)
2471+
return LowerHvxPred64ToFp(Op, DAG);
2472+
}
2473+
23462474
if (Subtarget.useHVXIEEEFPOps()) {
23472475
// There are only conversions to f16.
23482476
if (FpTy == MVT::f16) {
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
; Tests lowering of v32i1 to v32f32
2+
3+
; RUN: llc -march=hexagon -mattr=+hvxv79,+hvx-length128b,+hvx-ieee-fp \
4+
; RUN: -stop-after=hexagon-isel %s -o - | FileCheck %s
5+
6+
; CHECK: [[R0:%[0-9]+]]:hvxvr = V6_lvsplatw killed %{{[0-9]+}}
7+
; CHECK-NEXT: [[R1:%[0-9]+]]:intregs = A2_tfrsi 1
8+
; CHECK-NEXT: [[R2:%[0-9]+]]:hvxvr = V6_lvsplatw [[R1]]
9+
; CHECK-NEXT: [[R3:%[0-9]+]]:hvxqr = V6_vandvrt [[R2]], [[R1]]
10+
; CHECK-NEXT: [[R4:%[0-9]+]]:hvxvr = V6_vprefixqw killed [[R3]]
11+
; CHECK-NEXT: [[R5:%[0-9]+]]:hvxvr = V6_vsubw killed [[R4]], [[R2]]
12+
; CHECK-NEXT: [[R6:%[0-9]+]]:hvxvr = V6_vlsrwv killed [[R0]], killed [[R5]]
13+
; CHECK-NEXT: [[R7:%[0-9]+]]:hvxvr = V6_vand killed [[R6]], [[R2]]
14+
; CHECK-NEXT: [[R8:%[0-9]+]]:hvxvr = V6_vconv_sf_w killed [[R7]]
15+
; CHECK-NEXT: hvxvr = V6_vadd_sf_sf [[R8]], [[R8]]
16+
17+
define <32 x float> @uitofp_i1(<32 x i16> %in0, <32 x i16> %in1) #0
18+
{
19+
%q1 = icmp eq <32 x i16> %in0, %in1
20+
%fp0 = uitofp <32 x i1> %q1 to <32 x float>
21+
%out = fadd <32 x float> %fp0, %fp0
22+
ret <32 x float> %out
23+
}
24+
25+
attributes #0 = { nounwind readnone "target-cpu"="hexagonv79" "target-features"="+hvxv79,+hvx-length128b" }
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
; Tests the conversion pattern for v64i1 to v64f16
2+
; r0, r3 and r9 registers are i32 types converted from
3+
; v32i1 via a bitcasting sequence.
4+
5+
; RUN: llc -march=hexagon -mattr=+hvxv79,+hvx-length128b \
6+
; RUN: %s -verify-machineinstrs -o - | FileCheck %s
7+
8+
; CHECK: [[V3:v[0-9]+]] = vsplat([[R0:r[0-9]+]])
9+
; CHECK: [[Q0:q[0-9]+]] = vand([[V3]],[[R0]])
10+
; CHECK: [[V4:v[0-9]+]].w = prefixsum([[Q0]])
11+
; CHECK: [[V5:v[0-9]+]].w = vsub([[V4]].w,[[V3]].w)
12+
; CHECK: [[V1:v[0-9]+]] = vsplat(r
13+
; CHECK: [[V2:v[0-9]+]] = vsplat(r
14+
; CHECK: [[V6:v[0-9]+]].w = vlsr([[V1]].w,[[V5]].w)
15+
; CHECK: [[V7:v[0-9]+]].w = vlsr([[V2]].w,[[V5]].w)
16+
; CHECK: [[V8:v[0-9]+]] = vand([[V6]],[[V3]])
17+
; CHECK: [[V9:v[0-9]+]] = vand([[V7]],[[V3]])
18+
; CHECK: [[V10:v[0-9]+]].h = vpacke([[V9]].w,[[V8]].w)
19+
; CHECK: .hf = [[V10]].h
20+
21+
define <64 x half> @uitofp_i1(<64 x i16> %in0, <64 x i16> %in1)
22+
{
23+
%in = icmp eq <64 x i16> %in0, %in1
24+
%fp0 = uitofp <64 x i1> %in to <64 x half>
25+
%out = fadd <64 x half> %fp0, %fp0
26+
ret <64 x half> %out
27+
}

0 commit comments

Comments
 (0)