Skip to content

[RISCV] Fold SRLIW+SLLI+ADD into SRLIW+SHXADD #142611

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jun 4, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 33 additions & 23 deletions llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3116,34 +3116,44 @@ bool RISCVDAGToDAGISel::selectSHXADDOp(SDValue N, unsigned ShAmt,
else
Mask &= maskTrailingOnes<uint64_t>(XLen - C2);

// Look for (and (shl y, c2), c1) where c1 is a shifted mask with no
// leading zeros and c3 trailing zeros. We can use an SRLI by c2+c3
// followed by a SHXADD with c3 for the X amount.
if (isShiftedMask_64(Mask)) {
unsigned Leading = XLen - llvm::bit_width(Mask);
unsigned Trailing = llvm::countr_zero(Mask);
if (LeftShift && Leading == 0 && C2 < Trailing && Trailing == ShAmt) {
SDLoc DL(N);
EVT VT = N.getValueType();
Val = SDValue(CurDAG->getMachineNode(
RISCV::SRLI, DL, VT, N0.getOperand(0),
CurDAG->getTargetConstant(Trailing - C2, DL, VT)),
0);
return true;
}
if (Trailing != ShAmt)
return false;

unsigned Opcode;
// Look for (and (shl y, c2), c1) where c1 is a shifted mask with no
// leading zeros and c3 trailing zeros. We can use an SRLI by c3-c2
// followed by a SHXADD with c3 for the X amount.
if (LeftShift && Leading == 0 && C2 < Trailing)
Opcode = RISCV::SRLI;
// Look for (and (shl y, c2), c1) where c1 is a shifted mask with 32-c2
// leading zeros and c3 trailing zeros. We can use an SRLIW by c3-c2
// followed by a SHXADD with c3 for the X amount.
else if (LeftShift && Leading == 32 - C2 && C2 < Trailing)
Opcode = RISCV::SRLIW;
// Look for (and (shr y, c2), c1) where c1 is a shifted mask with c2
// leading zeros and c3 trailing zeros. We can use an SRLI by C3
// leading zeros and c3 trailing zeros. We can use an SRLI by c2+c3
// followed by a SHXADD using c3 for the X amount.
if (!LeftShift && Leading == C2 && Trailing == ShAmt) {
SDLoc DL(N);
EVT VT = N.getValueType();
Val = SDValue(
CurDAG->getMachineNode(
RISCV::SRLI, DL, VT, N0.getOperand(0),
CurDAG->getTargetConstant(Leading + Trailing, DL, VT)),
0);
return true;
}
else if (!LeftShift && Leading == C2)
Opcode = RISCV::SRLI;
// Look for (and (shr y, c2), c1) where c1 is a shifted mask with 32+c2
// leading zeros and c3 trailing zeros. We can use an SRLIW by c2+c3
// followed by a SHXADD using c3 for the X amount.
else if (!LeftShift && Leading == 32 + C2)
Opcode = RISCV::SRLIW;
else
return false;

SDLoc DL(N);
EVT VT = N.getValueType();
ShAmt = LeftShift ? Trailing - C2 : Trailing + C2;
Val = SDValue(
CurDAG->getMachineNode(Opcode, DL, VT, N0.getOperand(0),
CurDAG->getTargetConstant(ShAmt, DL, VT)),
0);
return true;
}
} else if (N0.getOpcode() == ISD::SRA && N0.hasOneUse() &&
isa<ConstantSDNode>(N0.getOperand(1))) {
Expand Down
117 changes: 83 additions & 34 deletions llvm/test/CodeGen/RISCV/rv64zba.ll
Original file line number Diff line number Diff line change
Expand Up @@ -3688,15 +3688,14 @@ define ptr @test_gep_gep_dont_crash(ptr %p, i64 %a1, i64 %a2) {
; RV64ZBA-LABEL: test_gep_gep_dont_crash:
; RV64ZBA: # %bb.0:
; RV64ZBA-NEXT: srliw a2, a2, 6
; RV64ZBA-NEXT: add a1, a2, a1
; RV64ZBA-NEXT: sh3add a0, a2, a0
; RV64ZBA-NEXT: sh3add a0, a1, a0
; RV64ZBA-NEXT: ret
;
; RV64XANDESPERF-LABEL: test_gep_gep_dont_crash:
; RV64XANDESPERF: # %bb.0:
; RV64XANDESPERF-NEXT: srliw a2, a2, 6
; RV64XANDESPERF-NEXT: slli a2, a2, 3
; RV64XANDESPERF-NEXT: add a0, a0, a2
; RV64XANDESPERF-NEXT: nds.lea.d a0, a0, a2
; RV64XANDESPERF-NEXT: nds.lea.d a0, a0, a1
; RV64XANDESPERF-NEXT: ret
%lshr = lshr i64 %a2, 6
Expand Down Expand Up @@ -4276,52 +4275,104 @@ entry:
}

define ptr @shl_and_gep(ptr %p, i64 %i) {
; CHECK-LABEL: shl_and_gep:
; CHECK: # %bb.0:
; CHECK-NEXT: srliw a1, a1, 2
; CHECK-NEXT: slli a1, a1, 3
; CHECK-NEXT: add a0, a0, a1
; CHECK-NEXT: ret
; RV64I-LABEL: shl_and_gep:
; RV64I: # %bb.0:
; RV64I-NEXT: srliw a1, a1, 2
; RV64I-NEXT: slli a1, a1, 3
; RV64I-NEXT: add a0, a0, a1
; RV64I-NEXT: ret
;
; RV64ZBA-LABEL: shl_and_gep:
; RV64ZBA: # %bb.0:
; RV64ZBA-NEXT: srliw a1, a1, 2
; RV64ZBA-NEXT: sh3add a0, a1, a0
; RV64ZBA-NEXT: ret
;
; RV64XANDESPERF-LABEL: shl_and_gep:
; RV64XANDESPERF: # %bb.0:
; RV64XANDESPERF-NEXT: srliw a1, a1, 2
; RV64XANDESPERF-NEXT: nds.lea.d a0, a0, a1
; RV64XANDESPERF-NEXT: ret
%shl = shl i64 %i, 1
%and = and i64 %shl, 8589934584
%gep = getelementptr i8, ptr %p, i64 %and
ret ptr %gep
}

define ptr @shr_and_gep(ptr %p, i64 %i) {
; CHECK-LABEL: shr_and_gep:
; CHECK: # %bb.0:
; CHECK-NEXT: srliw a1, a1, 6
; CHECK-NEXT: slli a1, a1, 1
; CHECK-NEXT: add a0, a0, a1
; CHECK-NEXT: ret
; RV64I-LABEL: shr_and_gep:
; RV64I: # %bb.0:
; RV64I-NEXT: srliw a1, a1, 6
; RV64I-NEXT: slli a1, a1, 1
; RV64I-NEXT: add a0, a0, a1
; RV64I-NEXT: ret
;
; RV64ZBA-LABEL: shr_and_gep:
; RV64ZBA: # %bb.0:
; RV64ZBA-NEXT: srliw a1, a1, 6
; RV64ZBA-NEXT: sh1add a0, a1, a0
; RV64ZBA-NEXT: ret
;
; RV64XANDESPERF-LABEL: shr_and_gep:
; RV64XANDESPERF: # %bb.0:
; RV64XANDESPERF-NEXT: srliw a1, a1, 6
; RV64XANDESPERF-NEXT: nds.lea.h a0, a0, a1
; RV64XANDESPERF-NEXT: ret
%lshr = lshr i64 %i, 6
%and = and i64 %lshr, 67108863
%gep = getelementptr i16, ptr %p, i64 %and
ret ptr %gep
}

define ptr @slt_select_gep(ptr %p, i32 %y) {
; CHECK-LABEL: slt_select_gep:
; CHECK: # %bb.0:
; CHECK-NEXT: srli a1, a1, 28
; CHECK-NEXT: andi a1, a1, 8
; CHECK-NEXT: add a0, a0, a1
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: ret
; RV64I-LABEL: slt_select_gep:
; RV64I: # %bb.0:
; RV64I-NEXT: srli a1, a1, 28
; RV64I-NEXT: andi a1, a1, 8
; RV64I-NEXT: add a0, a0, a1
; RV64I-NEXT: addi a0, a0, 16
; RV64I-NEXT: ret
;
; RV64ZBA-LABEL: slt_select_gep:
; RV64ZBA: # %bb.0:
; RV64ZBA-NEXT: srliw a1, a1, 31
; RV64ZBA-NEXT: sh3add a0, a1, a0
; RV64ZBA-NEXT: addi a0, a0, 16
; RV64ZBA-NEXT: ret
;
; RV64XANDESPERF-LABEL: slt_select_gep:
; RV64XANDESPERF: # %bb.0:
; RV64XANDESPERF-NEXT: srliw a1, a1, 31
; RV64XANDESPERF-NEXT: nds.lea.d a0, a0, a1
; RV64XANDESPERF-NEXT: addi a0, a0, 16
; RV64XANDESPERF-NEXT: ret
%cmp = icmp slt i32 %y, 0
%select = select i1 %cmp, i64 24, i64 16
%gep = getelementptr i8, ptr %p, i64 %select
ret ptr %gep
}

define i32 @shr_and_add(i32 %x, i32 %y) {
; CHECK-LABEL: shr_and_add:
; CHECK: # %bb.0:
; CHECK-NEXT: srliw a1, a1, 9
; CHECK-NEXT: slli a1, a1, 2
; CHECK-NEXT: addw a0, a0, a1
; CHECK-NEXT: ret
; RV64I-LABEL: shr_and_add:
; RV64I: # %bb.0:
; RV64I-NEXT: srliw a1, a1, 9
; RV64I-NEXT: slli a1, a1, 2
; RV64I-NEXT: addw a0, a0, a1
; RV64I-NEXT: ret
;
; RV64ZBA-LABEL: shr_and_add:
; RV64ZBA: # %bb.0:
; RV64ZBA-NEXT: srliw a1, a1, 9
; RV64ZBA-NEXT: sh2add a0, a1, a0
; RV64ZBA-NEXT: sext.w a0, a0
; RV64ZBA-NEXT: ret
;
; RV64XANDESPERF-LABEL: shr_and_add:
; RV64XANDESPERF: # %bb.0:
; RV64XANDESPERF-NEXT: srliw a1, a1, 9
; RV64XANDESPERF-NEXT: nds.lea.w a0, a0, a1
; RV64XANDESPERF-NEXT: sext.w a0, a0
; RV64XANDESPERF-NEXT: ret
%lshr = lshr i32 %y, 7
%and = and i32 %lshr, 33554428
%add = add i32 %x, %and
Expand All @@ -4344,19 +4395,17 @@ define ptr @udiv1280_gep(ptr %p, i16 zeroext %i) {
; RV64ZBA-NEXT: lui a2, 13
; RV64ZBA-NEXT: addi a2, a2, -819
; RV64ZBA-NEXT: mul a1, a1, a2
; RV64ZBA-NEXT: srli a1, a1, 23
; RV64ZBA-NEXT: srliw a1, a1, 3
; RV64ZBA-NEXT: sh3add.uw a0, a1, a0
; RV64ZBA-NEXT: srliw a1, a1, 26
; RV64ZBA-NEXT: sh3add a0, a1, a0
; RV64ZBA-NEXT: ret
;
; RV64XANDESPERF-LABEL: udiv1280_gep:
; RV64XANDESPERF: # %bb.0:
; RV64XANDESPERF-NEXT: lui a2, 13
; RV64XANDESPERF-NEXT: addi a2, a2, -819
; RV64XANDESPERF-NEXT: mul a1, a1, a2
; RV64XANDESPERF-NEXT: srli a1, a1, 23
; RV64XANDESPERF-NEXT: srliw a1, a1, 3
; RV64XANDESPERF-NEXT: nds.lea.d.ze a0, a0, a1
; RV64XANDESPERF-NEXT: srliw a1, a1, 26
; RV64XANDESPERF-NEXT: nds.lea.d a0, a0, a1
; RV64XANDESPERF-NEXT: ret
%udiv = udiv i16 %i, 1280
%idx.ext = zext nneg i16 %udiv to i64
Expand Down