-
Notifications
You must be signed in to change notification settings - Fork 13.6k
[RISCV] Fold SRLIW+SLLI+ADD into SRLIW+SHXADD #142611
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-backend-risc-v Author: Piotr Fusik (pfusik) ChangesThis handles RV64 SRLIW, similarly to SRLI. Also fixing the comments for SRLI combines that did not match the code. Full diff: https://github.com/llvm/llvm-project/pull/142611.diff 2 Files Affected:
diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
index 4f6aa41d1e03b..4dd53fdd0213d 100644
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
@@ -3116,34 +3116,44 @@ bool RISCVDAGToDAGISel::selectSHXADDOp(SDValue N, unsigned ShAmt,
else
Mask &= maskTrailingOnes<uint64_t>(XLen - C2);
- // Look for (and (shl y, c2), c1) where c1 is a shifted mask with no
- // leading zeros and c3 trailing zeros. We can use an SRLI by c2+c3
- // followed by a SHXADD with c3 for the X amount.
if (isShiftedMask_64(Mask)) {
unsigned Leading = XLen - llvm::bit_width(Mask);
unsigned Trailing = llvm::countr_zero(Mask);
- if (LeftShift && Leading == 0 && C2 < Trailing && Trailing == ShAmt) {
- SDLoc DL(N);
- EVT VT = N.getValueType();
- Val = SDValue(CurDAG->getMachineNode(
- RISCV::SRLI, DL, VT, N0.getOperand(0),
- CurDAG->getTargetConstant(Trailing - C2, DL, VT)),
- 0);
- return true;
- }
+ if (Trailing != ShAmt)
+ return false;
+
+ unsigned Opcode;
+ // Look for (and (shl y, c2), c1) where c1 is a shifted mask with no
+ // leading zeros and c3 trailing zeros. We can use an SRLI by c3-c2
+ // followed by a SHXADD with c3 for the X amount.
+ if (LeftShift && Leading == 0 && C2 < Trailing)
+ Opcode = RISCV::SRLI;
+ // Look for (and (shl y, c2), c1) where c1 is a shifted mask with 32-c2
+ // leading zeros and c3 trailing zeros. We can use an SRLIW by c3-c2
+ // followed by a SHXADD with c3 for the X amount.
+ else if (LeftShift && Leading == 32 - C2 && C2 < Trailing)
+ Opcode = RISCV::SRLIW;
// Look for (and (shr y, c2), c1) where c1 is a shifted mask with c2
- // leading zeros and c3 trailing zeros. We can use an SRLI by C3
+ // leading zeros and c3 trailing zeros. We can use an SRLI by c2+c3
// followed by a SHXADD using c3 for the X amount.
- if (!LeftShift && Leading == C2 && Trailing == ShAmt) {
- SDLoc DL(N);
- EVT VT = N.getValueType();
- Val = SDValue(
- CurDAG->getMachineNode(
- RISCV::SRLI, DL, VT, N0.getOperand(0),
- CurDAG->getTargetConstant(Leading + Trailing, DL, VT)),
- 0);
- return true;
- }
+ else if (!LeftShift && Leading == C2)
+ Opcode = RISCV::SRLI;
+ // Look for (and (shr y, c2), c1) where c1 is a shifted mask with 32+c2
+ // leading zeros and c3 trailing zeros. We can use an SRLIW by c2+c3
+ // followed by a SHXADD using c3 for the X amount.
+ else if (!LeftShift && Leading == 32 + C2)
+ Opcode = RISCV::SRLIW;
+ else
+ return false;
+
+ SDLoc DL(N);
+ EVT VT = N.getValueType();
+ ShAmt = LeftShift ? Trailing - C2 : Trailing + C2;
+ Val = SDValue(
+ CurDAG->getMachineNode(Opcode, DL, VT, N0.getOperand(0),
+ CurDAG->getTargetConstant(ShAmt, DL, VT)),
+ 0);
+ return true;
}
} else if (N0.getOpcode() == ISD::SRA && N0.hasOneUse() &&
isa<ConstantSDNode>(N0.getOperand(1))) {
diff --git a/llvm/test/CodeGen/RISCV/rv64zba.ll b/llvm/test/CodeGen/RISCV/rv64zba.ll
index a4d3b80edbd58..9859b9c555dbd 100644
--- a/llvm/test/CodeGen/RISCV/rv64zba.ll
+++ b/llvm/test/CodeGen/RISCV/rv64zba.ll
@@ -3688,15 +3688,14 @@ define ptr @test_gep_gep_dont_crash(ptr %p, i64 %a1, i64 %a2) {
; RV64ZBA-LABEL: test_gep_gep_dont_crash:
; RV64ZBA: # %bb.0:
; RV64ZBA-NEXT: srliw a2, a2, 6
-; RV64ZBA-NEXT: add a1, a2, a1
+; RV64ZBA-NEXT: sh3add a0, a2, a0
; RV64ZBA-NEXT: sh3add a0, a1, a0
; RV64ZBA-NEXT: ret
;
; RV64XANDESPERF-LABEL: test_gep_gep_dont_crash:
; RV64XANDESPERF: # %bb.0:
; RV64XANDESPERF-NEXT: srliw a2, a2, 6
-; RV64XANDESPERF-NEXT: slli a2, a2, 3
-; RV64XANDESPERF-NEXT: add a0, a0, a2
+; RV64XANDESPERF-NEXT: nds.lea.d a0, a0, a2
; RV64XANDESPERF-NEXT: nds.lea.d a0, a0, a1
; RV64XANDESPERF-NEXT: ret
%lshr = lshr i64 %a2, 6
@@ -4274,3 +4273,142 @@ entry:
%and = and i64 %add, 4294967295
ret i64 %and
}
+
+define ptr @shl_and_gep(ptr %p, i64 %i) {
+; RV64I-LABEL: shl_and_gep:
+; RV64I: # %bb.0:
+; RV64I-NEXT: srliw a1, a1, 2
+; RV64I-NEXT: slli a1, a1, 3
+; RV64I-NEXT: add a0, a0, a1
+; RV64I-NEXT: ret
+;
+; RV64ZBA-LABEL: shl_and_gep:
+; RV64ZBA: # %bb.0:
+; RV64ZBA-NEXT: srliw a1, a1, 2
+; RV64ZBA-NEXT: sh3add a0, a1, a0
+; RV64ZBA-NEXT: ret
+;
+; RV64XANDESPERF-LABEL: shl_and_gep:
+; RV64XANDESPERF: # %bb.0:
+; RV64XANDESPERF-NEXT: srliw a1, a1, 2
+; RV64XANDESPERF-NEXT: nds.lea.d a0, a0, a1
+; RV64XANDESPERF-NEXT: ret
+ %shl = shl i64 %i, 1
+ %and = and i64 %shl, 8589934584
+ %gep = getelementptr i8, ptr %p, i64 %and
+ ret ptr %gep
+}
+
+define ptr @shr_and_gep(ptr %p, i64 %i) {
+; RV64I-LABEL: shr_and_gep:
+; RV64I: # %bb.0:
+; RV64I-NEXT: srliw a1, a1, 6
+; RV64I-NEXT: slli a1, a1, 1
+; RV64I-NEXT: add a0, a0, a1
+; RV64I-NEXT: ret
+;
+; RV64ZBA-LABEL: shr_and_gep:
+; RV64ZBA: # %bb.0:
+; RV64ZBA-NEXT: srliw a1, a1, 6
+; RV64ZBA-NEXT: sh1add a0, a1, a0
+; RV64ZBA-NEXT: ret
+;
+; RV64XANDESPERF-LABEL: shr_and_gep:
+; RV64XANDESPERF: # %bb.0:
+; RV64XANDESPERF-NEXT: srliw a1, a1, 6
+; RV64XANDESPERF-NEXT: nds.lea.h a0, a0, a1
+; RV64XANDESPERF-NEXT: ret
+ %lshr = lshr i64 %i, 6
+ %and = and i64 %lshr, 67108863
+ %gep = getelementptr i16, ptr %p, i64 %and
+ ret ptr %gep
+}
+
+define ptr @slt_select_gep(ptr %p, i32 %y) {
+; RV64I-LABEL: slt_select_gep:
+; RV64I: # %bb.0:
+; RV64I-NEXT: srli a1, a1, 28
+; RV64I-NEXT: andi a1, a1, 8
+; RV64I-NEXT: add a0, a0, a1
+; RV64I-NEXT: addi a0, a0, 16
+; RV64I-NEXT: ret
+;
+; RV64ZBA-LABEL: slt_select_gep:
+; RV64ZBA: # %bb.0:
+; RV64ZBA-NEXT: srliw a1, a1, 31
+; RV64ZBA-NEXT: sh3add a0, a1, a0
+; RV64ZBA-NEXT: addi a0, a0, 16
+; RV64ZBA-NEXT: ret
+;
+; RV64XANDESPERF-LABEL: slt_select_gep:
+; RV64XANDESPERF: # %bb.0:
+; RV64XANDESPERF-NEXT: srliw a1, a1, 31
+; RV64XANDESPERF-NEXT: nds.lea.d a0, a0, a1
+; RV64XANDESPERF-NEXT: addi a0, a0, 16
+; RV64XANDESPERF-NEXT: ret
+ %cmp = icmp slt i32 %y, 0
+ %select = select i1 %cmp, i64 24, i64 16
+ %gep = getelementptr i8, ptr %p, i64 %select
+ ret ptr %gep
+}
+
+define i32 @shr_and_add(i32 %x, i32 %y) {
+; RV64I-LABEL: shr_and_add:
+; RV64I: # %bb.0:
+; RV64I-NEXT: srliw a1, a1, 9
+; RV64I-NEXT: slli a1, a1, 2
+; RV64I-NEXT: addw a0, a0, a1
+; RV64I-NEXT: ret
+;
+; RV64ZBA-LABEL: shr_and_add:
+; RV64ZBA: # %bb.0:
+; RV64ZBA-NEXT: srliw a1, a1, 9
+; RV64ZBA-NEXT: sh2add a0, a1, a0
+; RV64ZBA-NEXT: sext.w a0, a0
+; RV64ZBA-NEXT: ret
+;
+; RV64XANDESPERF-LABEL: shr_and_add:
+; RV64XANDESPERF: # %bb.0:
+; RV64XANDESPERF-NEXT: srliw a1, a1, 9
+; RV64XANDESPERF-NEXT: nds.lea.w a0, a0, a1
+; RV64XANDESPERF-NEXT: sext.w a0, a0
+; RV64XANDESPERF-NEXT: ret
+ %lshr = lshr i32 %y, 7
+ %and = and i32 %lshr, 33554428
+ %add = add i32 %x, %and
+ ret i32 %add
+}
+
+define ptr @udiv1280_gep(ptr %p, i16 zeroext %i) {
+; RV64I-LABEL: udiv1280_gep:
+; RV64I: # %bb.0:
+; RV64I-NEXT: lui a2, 13
+; RV64I-NEXT: addi a2, a2, -819
+; RV64I-NEXT: mul a1, a1, a2
+; RV64I-NEXT: srliw a1, a1, 26
+; RV64I-NEXT: slli a1, a1, 3
+; RV64I-NEXT: add a0, a0, a1
+; RV64I-NEXT: ret
+;
+; RV64ZBA-LABEL: udiv1280_gep:
+; RV64ZBA: # %bb.0:
+; RV64ZBA-NEXT: lui a2, 13
+; RV64ZBA-NEXT: addi a2, a2, -819
+; RV64ZBA-NEXT: mul a1, a1, a2
+; RV64ZBA-NEXT: srliw a1, a1, 26
+; RV64ZBA-NEXT: sh3add a0, a1, a0
+; RV64ZBA-NEXT: ret
+;
+; RV64XANDESPERF-LABEL: udiv1280_gep:
+; RV64XANDESPERF: # %bb.0:
+; RV64XANDESPERF-NEXT: lui a2, 13
+; RV64XANDESPERF-NEXT: addi a2, a2, -819
+; RV64XANDESPERF-NEXT: mul a1, a1, a2
+; RV64XANDESPERF-NEXT: srliw a1, a1, 26
+; RV64XANDESPERF-NEXT: nds.lea.d a0, a0, a1
+; RV64XANDESPERF-NEXT: ret
+ %udiv = udiv i16 %i, 1280
+ %idx.ext = zext nneg i16 %udiv to i64
+ %add.ptr = getelementptr i64, ptr %p, i64 %idx.ext
+ ret ptr %add.ptr
+}
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
Tests merged into main branch as aef4373. PR rebased. |
This handles RV64 SRLIW, similarly to SRLI. Also fixing the comments for SRLI combines that did not match the code.
This handles RV64 SRLIW, similarly to SRLI.
Also fixing the comments for SRLI combines that did not match the code.