Skip to content

Commit afa517a

Browse files
authored
[RISCV] Fold SRLIW+SLLI+ADD into SRLIW+SHXADD (#142611)
This handles RV64 SRLIW, similarly to SRLI. Also fixing the comments for SRLI combines that did not match the code.
1 parent 07a5341 commit afa517a

File tree

2 files changed

+116
-57
lines changed

2 files changed

+116
-57
lines changed

llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp

Lines changed: 33 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -3116,34 +3116,44 @@ bool RISCVDAGToDAGISel::selectSHXADDOp(SDValue N, unsigned ShAmt,
31163116
else
31173117
Mask &= maskTrailingOnes<uint64_t>(XLen - C2);
31183118

3119-
// Look for (and (shl y, c2), c1) where c1 is a shifted mask with no
3120-
// leading zeros and c3 trailing zeros. We can use an SRLI by c2+c3
3121-
// followed by a SHXADD with c3 for the X amount.
31223119
if (isShiftedMask_64(Mask)) {
31233120
unsigned Leading = XLen - llvm::bit_width(Mask);
31243121
unsigned Trailing = llvm::countr_zero(Mask);
3125-
if (LeftShift && Leading == 0 && C2 < Trailing && Trailing == ShAmt) {
3126-
SDLoc DL(N);
3127-
EVT VT = N.getValueType();
3128-
Val = SDValue(CurDAG->getMachineNode(
3129-
RISCV::SRLI, DL, VT, N0.getOperand(0),
3130-
CurDAG->getTargetConstant(Trailing - C2, DL, VT)),
3131-
0);
3132-
return true;
3133-
}
3122+
if (Trailing != ShAmt)
3123+
return false;
3124+
3125+
unsigned Opcode;
3126+
// Look for (and (shl y, c2), c1) where c1 is a shifted mask with no
3127+
// leading zeros and c3 trailing zeros. We can use an SRLI by c3-c2
3128+
// followed by a SHXADD with c3 for the X amount.
3129+
if (LeftShift && Leading == 0 && C2 < Trailing)
3130+
Opcode = RISCV::SRLI;
3131+
// Look for (and (shl y, c2), c1) where c1 is a shifted mask with 32-c2
3132+
// leading zeros and c3 trailing zeros. We can use an SRLIW by c3-c2
3133+
// followed by a SHXADD with c3 for the X amount.
3134+
else if (LeftShift && Leading == 32 - C2 && C2 < Trailing)
3135+
Opcode = RISCV::SRLIW;
31343136
// Look for (and (shr y, c2), c1) where c1 is a shifted mask with c2
3135-
// leading zeros and c3 trailing zeros. We can use an SRLI by C3
3137+
// leading zeros and c3 trailing zeros. We can use an SRLI by c2+c3
31363138
// followed by a SHXADD using c3 for the X amount.
3137-
if (!LeftShift && Leading == C2 && Trailing == ShAmt) {
3138-
SDLoc DL(N);
3139-
EVT VT = N.getValueType();
3140-
Val = SDValue(
3141-
CurDAG->getMachineNode(
3142-
RISCV::SRLI, DL, VT, N0.getOperand(0),
3143-
CurDAG->getTargetConstant(Leading + Trailing, DL, VT)),
3144-
0);
3145-
return true;
3146-
}
3139+
else if (!LeftShift && Leading == C2)
3140+
Opcode = RISCV::SRLI;
3141+
// Look for (and (shr y, c2), c1) where c1 is a shifted mask with 32+c2
3142+
// leading zeros and c3 trailing zeros. We can use an SRLIW by c2+c3
3143+
// followed by a SHXADD using c3 for the X amount.
3144+
else if (!LeftShift && Leading == 32 + C2)
3145+
Opcode = RISCV::SRLIW;
3146+
else
3147+
return false;
3148+
3149+
SDLoc DL(N);
3150+
EVT VT = N.getValueType();
3151+
ShAmt = LeftShift ? Trailing - C2 : Trailing + C2;
3152+
Val = SDValue(
3153+
CurDAG->getMachineNode(Opcode, DL, VT, N0.getOperand(0),
3154+
CurDAG->getTargetConstant(ShAmt, DL, VT)),
3155+
0);
3156+
return true;
31473157
}
31483158
} else if (N0.getOpcode() == ISD::SRA && N0.hasOneUse() &&
31493159
isa<ConstantSDNode>(N0.getOperand(1))) {

llvm/test/CodeGen/RISCV/rv64zba.ll

Lines changed: 83 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -3688,15 +3688,14 @@ define ptr @test_gep_gep_dont_crash(ptr %p, i64 %a1, i64 %a2) {
36883688
; RV64ZBA-LABEL: test_gep_gep_dont_crash:
36893689
; RV64ZBA: # %bb.0:
36903690
; RV64ZBA-NEXT: srliw a2, a2, 6
3691-
; RV64ZBA-NEXT: add a1, a2, a1
3691+
; RV64ZBA-NEXT: sh3add a0, a2, a0
36923692
; RV64ZBA-NEXT: sh3add a0, a1, a0
36933693
; RV64ZBA-NEXT: ret
36943694
;
36953695
; RV64XANDESPERF-LABEL: test_gep_gep_dont_crash:
36963696
; RV64XANDESPERF: # %bb.0:
36973697
; RV64XANDESPERF-NEXT: srliw a2, a2, 6
3698-
; RV64XANDESPERF-NEXT: slli a2, a2, 3
3699-
; RV64XANDESPERF-NEXT: add a0, a0, a2
3698+
; RV64XANDESPERF-NEXT: nds.lea.d a0, a0, a2
37003699
; RV64XANDESPERF-NEXT: nds.lea.d a0, a0, a1
37013700
; RV64XANDESPERF-NEXT: ret
37023701
%lshr = lshr i64 %a2, 6
@@ -4276,52 +4275,104 @@ entry:
42764275
}
42774276

42784277
define ptr @shl_and_gep(ptr %p, i64 %i) {
4279-
; CHECK-LABEL: shl_and_gep:
4280-
; CHECK: # %bb.0:
4281-
; CHECK-NEXT: srliw a1, a1, 2
4282-
; CHECK-NEXT: slli a1, a1, 3
4283-
; CHECK-NEXT: add a0, a0, a1
4284-
; CHECK-NEXT: ret
4278+
; RV64I-LABEL: shl_and_gep:
4279+
; RV64I: # %bb.0:
4280+
; RV64I-NEXT: srliw a1, a1, 2
4281+
; RV64I-NEXT: slli a1, a1, 3
4282+
; RV64I-NEXT: add a0, a0, a1
4283+
; RV64I-NEXT: ret
4284+
;
4285+
; RV64ZBA-LABEL: shl_and_gep:
4286+
; RV64ZBA: # %bb.0:
4287+
; RV64ZBA-NEXT: srliw a1, a1, 2
4288+
; RV64ZBA-NEXT: sh3add a0, a1, a0
4289+
; RV64ZBA-NEXT: ret
4290+
;
4291+
; RV64XANDESPERF-LABEL: shl_and_gep:
4292+
; RV64XANDESPERF: # %bb.0:
4293+
; RV64XANDESPERF-NEXT: srliw a1, a1, 2
4294+
; RV64XANDESPERF-NEXT: nds.lea.d a0, a0, a1
4295+
; RV64XANDESPERF-NEXT: ret
42854296
%shl = shl i64 %i, 1
42864297
%and = and i64 %shl, 8589934584
42874298
%gep = getelementptr i8, ptr %p, i64 %and
42884299
ret ptr %gep
42894300
}
42904301

42914302
define ptr @shr_and_gep(ptr %p, i64 %i) {
4292-
; CHECK-LABEL: shr_and_gep:
4293-
; CHECK: # %bb.0:
4294-
; CHECK-NEXT: srliw a1, a1, 6
4295-
; CHECK-NEXT: slli a1, a1, 1
4296-
; CHECK-NEXT: add a0, a0, a1
4297-
; CHECK-NEXT: ret
4303+
; RV64I-LABEL: shr_and_gep:
4304+
; RV64I: # %bb.0:
4305+
; RV64I-NEXT: srliw a1, a1, 6
4306+
; RV64I-NEXT: slli a1, a1, 1
4307+
; RV64I-NEXT: add a0, a0, a1
4308+
; RV64I-NEXT: ret
4309+
;
4310+
; RV64ZBA-LABEL: shr_and_gep:
4311+
; RV64ZBA: # %bb.0:
4312+
; RV64ZBA-NEXT: srliw a1, a1, 6
4313+
; RV64ZBA-NEXT: sh1add a0, a1, a0
4314+
; RV64ZBA-NEXT: ret
4315+
;
4316+
; RV64XANDESPERF-LABEL: shr_and_gep:
4317+
; RV64XANDESPERF: # %bb.0:
4318+
; RV64XANDESPERF-NEXT: srliw a1, a1, 6
4319+
; RV64XANDESPERF-NEXT: nds.lea.h a0, a0, a1
4320+
; RV64XANDESPERF-NEXT: ret
42984321
%lshr = lshr i64 %i, 6
42994322
%and = and i64 %lshr, 67108863
43004323
%gep = getelementptr i16, ptr %p, i64 %and
43014324
ret ptr %gep
43024325
}
43034326

43044327
define ptr @slt_select_gep(ptr %p, i32 %y) {
4305-
; CHECK-LABEL: slt_select_gep:
4306-
; CHECK: # %bb.0:
4307-
; CHECK-NEXT: srli a1, a1, 28
4308-
; CHECK-NEXT: andi a1, a1, 8
4309-
; CHECK-NEXT: add a0, a0, a1
4310-
; CHECK-NEXT: addi a0, a0, 16
4311-
; CHECK-NEXT: ret
4328+
; RV64I-LABEL: slt_select_gep:
4329+
; RV64I: # %bb.0:
4330+
; RV64I-NEXT: srli a1, a1, 28
4331+
; RV64I-NEXT: andi a1, a1, 8
4332+
; RV64I-NEXT: add a0, a0, a1
4333+
; RV64I-NEXT: addi a0, a0, 16
4334+
; RV64I-NEXT: ret
4335+
;
4336+
; RV64ZBA-LABEL: slt_select_gep:
4337+
; RV64ZBA: # %bb.0:
4338+
; RV64ZBA-NEXT: srliw a1, a1, 31
4339+
; RV64ZBA-NEXT: sh3add a0, a1, a0
4340+
; RV64ZBA-NEXT: addi a0, a0, 16
4341+
; RV64ZBA-NEXT: ret
4342+
;
4343+
; RV64XANDESPERF-LABEL: slt_select_gep:
4344+
; RV64XANDESPERF: # %bb.0:
4345+
; RV64XANDESPERF-NEXT: srliw a1, a1, 31
4346+
; RV64XANDESPERF-NEXT: nds.lea.d a0, a0, a1
4347+
; RV64XANDESPERF-NEXT: addi a0, a0, 16
4348+
; RV64XANDESPERF-NEXT: ret
43124349
%cmp = icmp slt i32 %y, 0
43134350
%select = select i1 %cmp, i64 24, i64 16
43144351
%gep = getelementptr i8, ptr %p, i64 %select
43154352
ret ptr %gep
43164353
}
43174354

43184355
define i32 @shr_and_add(i32 %x, i32 %y) {
4319-
; CHECK-LABEL: shr_and_add:
4320-
; CHECK: # %bb.0:
4321-
; CHECK-NEXT: srliw a1, a1, 9
4322-
; CHECK-NEXT: slli a1, a1, 2
4323-
; CHECK-NEXT: addw a0, a0, a1
4324-
; CHECK-NEXT: ret
4356+
; RV64I-LABEL: shr_and_add:
4357+
; RV64I: # %bb.0:
4358+
; RV64I-NEXT: srliw a1, a1, 9
4359+
; RV64I-NEXT: slli a1, a1, 2
4360+
; RV64I-NEXT: addw a0, a0, a1
4361+
; RV64I-NEXT: ret
4362+
;
4363+
; RV64ZBA-LABEL: shr_and_add:
4364+
; RV64ZBA: # %bb.0:
4365+
; RV64ZBA-NEXT: srliw a1, a1, 9
4366+
; RV64ZBA-NEXT: sh2add a0, a1, a0
4367+
; RV64ZBA-NEXT: sext.w a0, a0
4368+
; RV64ZBA-NEXT: ret
4369+
;
4370+
; RV64XANDESPERF-LABEL: shr_and_add:
4371+
; RV64XANDESPERF: # %bb.0:
4372+
; RV64XANDESPERF-NEXT: srliw a1, a1, 9
4373+
; RV64XANDESPERF-NEXT: nds.lea.w a0, a0, a1
4374+
; RV64XANDESPERF-NEXT: sext.w a0, a0
4375+
; RV64XANDESPERF-NEXT: ret
43254376
%lshr = lshr i32 %y, 7
43264377
%and = and i32 %lshr, 33554428
43274378
%add = add i32 %x, %and
@@ -4344,19 +4395,17 @@ define ptr @udiv1280_gep(ptr %p, i16 zeroext %i) {
43444395
; RV64ZBA-NEXT: lui a2, 13
43454396
; RV64ZBA-NEXT: addi a2, a2, -819
43464397
; RV64ZBA-NEXT: mul a1, a1, a2
4347-
; RV64ZBA-NEXT: srli a1, a1, 23
4348-
; RV64ZBA-NEXT: srliw a1, a1, 3
4349-
; RV64ZBA-NEXT: sh3add.uw a0, a1, a0
4398+
; RV64ZBA-NEXT: srliw a1, a1, 26
4399+
; RV64ZBA-NEXT: sh3add a0, a1, a0
43504400
; RV64ZBA-NEXT: ret
43514401
;
43524402
; RV64XANDESPERF-LABEL: udiv1280_gep:
43534403
; RV64XANDESPERF: # %bb.0:
43544404
; RV64XANDESPERF-NEXT: lui a2, 13
43554405
; RV64XANDESPERF-NEXT: addi a2, a2, -819
43564406
; RV64XANDESPERF-NEXT: mul a1, a1, a2
4357-
; RV64XANDESPERF-NEXT: srli a1, a1, 23
4358-
; RV64XANDESPERF-NEXT: srliw a1, a1, 3
4359-
; RV64XANDESPERF-NEXT: nds.lea.d.ze a0, a0, a1
4407+
; RV64XANDESPERF-NEXT: srliw a1, a1, 26
4408+
; RV64XANDESPERF-NEXT: nds.lea.d a0, a0, a1
43604409
; RV64XANDESPERF-NEXT: ret
43614410
%udiv = udiv i16 %i, 1280
43624411
%idx.ext = zext nneg i16 %udiv to i64

0 commit comments

Comments
 (0)