Skip to content

[RISCV] Fold SRLIW+SLLI+ADD into SRLIW+SHXADD #142611

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jun 4, 2025
Merged

Conversation

pfusik
Copy link
Contributor

@pfusik pfusik commented Jun 3, 2025

This handles RV64 SRLIW, similarly to SRLI.

Also fixing the comments for SRLI combines that did not match the code.

@llvmbot
Copy link
Member

llvmbot commented Jun 3, 2025

@llvm/pr-subscribers-backend-risc-v

Author: Piotr Fusik (pfusik)

Changes

This handles RV64 SRLIW, similarly to SRLI.

Also fixing the comments for SRLI combines that did not match the code.


Full diff: https://github.com/llvm/llvm-project/pull/142611.diff

2 Files Affected:

  • (modified) llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp (+33-23)
  • (modified) llvm/test/CodeGen/RISCV/rv64zba.ll (+141-3)
diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
index 4f6aa41d1e03b..4dd53fdd0213d 100644
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
@@ -3116,34 +3116,44 @@ bool RISCVDAGToDAGISel::selectSHXADDOp(SDValue N, unsigned ShAmt,
       else
         Mask &= maskTrailingOnes<uint64_t>(XLen - C2);
 
-      // Look for (and (shl y, c2), c1) where c1 is a shifted mask with no
-      // leading zeros and c3 trailing zeros. We can use an SRLI by c2+c3
-      // followed by a SHXADD with c3 for the X amount.
       if (isShiftedMask_64(Mask)) {
         unsigned Leading = XLen - llvm::bit_width(Mask);
         unsigned Trailing = llvm::countr_zero(Mask);
-        if (LeftShift && Leading == 0 && C2 < Trailing && Trailing == ShAmt) {
-          SDLoc DL(N);
-          EVT VT = N.getValueType();
-          Val = SDValue(CurDAG->getMachineNode(
-                            RISCV::SRLI, DL, VT, N0.getOperand(0),
-                            CurDAG->getTargetConstant(Trailing - C2, DL, VT)),
-                        0);
-          return true;
-        }
+        if (Trailing != ShAmt)
+          return false;
+
+        unsigned Opcode;
+        // Look for (and (shl y, c2), c1) where c1 is a shifted mask with no
+        // leading zeros and c3 trailing zeros. We can use an SRLI by c3-c2
+        // followed by a SHXADD with c3 for the X amount.
+        if (LeftShift && Leading == 0 && C2 < Trailing)
+          Opcode = RISCV::SRLI;
+        // Look for (and (shl y, c2), c1) where c1 is a shifted mask with 32-c2
+        // leading zeros and c3 trailing zeros. We can use an SRLIW by c3-c2
+        // followed by a SHXADD with c3 for the X amount.
+        else if (LeftShift && Leading == 32 - C2 && C2 < Trailing)
+          Opcode = RISCV::SRLIW;
         // Look for (and (shr y, c2), c1) where c1 is a shifted mask with c2
-        // leading zeros and c3 trailing zeros. We can use an SRLI by C3
+        // leading zeros and c3 trailing zeros. We can use an SRLI by c2+c3
         // followed by a SHXADD using c3 for the X amount.
-        if (!LeftShift && Leading == C2 && Trailing == ShAmt) {
-          SDLoc DL(N);
-          EVT VT = N.getValueType();
-          Val = SDValue(
-              CurDAG->getMachineNode(
-                  RISCV::SRLI, DL, VT, N0.getOperand(0),
-                  CurDAG->getTargetConstant(Leading + Trailing, DL, VT)),
-              0);
-          return true;
-        }
+        else if (!LeftShift && Leading == C2)
+          Opcode = RISCV::SRLI;
+        // Look for (and (shr y, c2), c1) where c1 is a shifted mask with 32+c2
+        // leading zeros and c3 trailing zeros. We can use an SRLIW by c2+c3
+        // followed by a SHXADD using c3 for the X amount.
+        else if (!LeftShift && Leading == 32 + C2)
+          Opcode = RISCV::SRLIW;
+        else
+          return false;
+
+        SDLoc DL(N);
+        EVT VT = N.getValueType();
+        ShAmt = LeftShift ? Trailing - C2 : Trailing + C2;
+        Val = SDValue(
+            CurDAG->getMachineNode(Opcode, DL, VT, N0.getOperand(0),
+                                   CurDAG->getTargetConstant(ShAmt, DL, VT)),
+            0);
+        return true;
       }
     } else if (N0.getOpcode() == ISD::SRA && N0.hasOneUse() &&
                isa<ConstantSDNode>(N0.getOperand(1))) {
diff --git a/llvm/test/CodeGen/RISCV/rv64zba.ll b/llvm/test/CodeGen/RISCV/rv64zba.ll
index a4d3b80edbd58..9859b9c555dbd 100644
--- a/llvm/test/CodeGen/RISCV/rv64zba.ll
+++ b/llvm/test/CodeGen/RISCV/rv64zba.ll
@@ -3688,15 +3688,14 @@ define ptr @test_gep_gep_dont_crash(ptr %p, i64 %a1, i64 %a2) {
 ; RV64ZBA-LABEL: test_gep_gep_dont_crash:
 ; RV64ZBA:       # %bb.0:
 ; RV64ZBA-NEXT:    srliw a2, a2, 6
-; RV64ZBA-NEXT:    add a1, a2, a1
+; RV64ZBA-NEXT:    sh3add a0, a2, a0
 ; RV64ZBA-NEXT:    sh3add a0, a1, a0
 ; RV64ZBA-NEXT:    ret
 ;
 ; RV64XANDESPERF-LABEL: test_gep_gep_dont_crash:
 ; RV64XANDESPERF:       # %bb.0:
 ; RV64XANDESPERF-NEXT:    srliw a2, a2, 6
-; RV64XANDESPERF-NEXT:    slli a2, a2, 3
-; RV64XANDESPERF-NEXT:    add a0, a0, a2
+; RV64XANDESPERF-NEXT:    nds.lea.d a0, a0, a2
 ; RV64XANDESPERF-NEXT:    nds.lea.d a0, a0, a1
 ; RV64XANDESPERF-NEXT:    ret
   %lshr = lshr i64 %a2, 6
@@ -4274,3 +4273,142 @@ entry:
   %and = and i64 %add, 4294967295
   ret i64 %and
 }
+
+define ptr @shl_and_gep(ptr %p, i64 %i) {
+; RV64I-LABEL: shl_and_gep:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    srliw a1, a1, 2
+; RV64I-NEXT:    slli a1, a1, 3
+; RV64I-NEXT:    add a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: shl_and_gep:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    srliw a1, a1, 2
+; RV64ZBA-NEXT:    sh3add a0, a1, a0
+; RV64ZBA-NEXT:    ret
+;
+; RV64XANDESPERF-LABEL: shl_and_gep:
+; RV64XANDESPERF:       # %bb.0:
+; RV64XANDESPERF-NEXT:    srliw a1, a1, 2
+; RV64XANDESPERF-NEXT:    nds.lea.d a0, a0, a1
+; RV64XANDESPERF-NEXT:    ret
+  %shl = shl i64 %i, 1
+  %and = and i64 %shl, 8589934584
+  %gep = getelementptr i8, ptr %p, i64 %and
+  ret ptr %gep
+}
+
+define ptr @shr_and_gep(ptr %p, i64 %i) {
+; RV64I-LABEL: shr_and_gep:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    srliw a1, a1, 6
+; RV64I-NEXT:    slli a1, a1, 1
+; RV64I-NEXT:    add a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: shr_and_gep:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    srliw a1, a1, 6
+; RV64ZBA-NEXT:    sh1add a0, a1, a0
+; RV64ZBA-NEXT:    ret
+;
+; RV64XANDESPERF-LABEL: shr_and_gep:
+; RV64XANDESPERF:       # %bb.0:
+; RV64XANDESPERF-NEXT:    srliw a1, a1, 6
+; RV64XANDESPERF-NEXT:    nds.lea.h a0, a0, a1
+; RV64XANDESPERF-NEXT:    ret
+  %lshr = lshr i64 %i, 6
+  %and = and i64 %lshr, 67108863
+  %gep = getelementptr i16, ptr %p, i64 %and
+  ret ptr %gep
+}
+
+define ptr @slt_select_gep(ptr %p, i32 %y) {
+; RV64I-LABEL: slt_select_gep:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    srli a1, a1, 28
+; RV64I-NEXT:    andi a1, a1, 8
+; RV64I-NEXT:    add a0, a0, a1
+; RV64I-NEXT:    addi a0, a0, 16
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: slt_select_gep:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    srliw a1, a1, 31
+; RV64ZBA-NEXT:    sh3add a0, a1, a0
+; RV64ZBA-NEXT:    addi a0, a0, 16
+; RV64ZBA-NEXT:    ret
+;
+; RV64XANDESPERF-LABEL: slt_select_gep:
+; RV64XANDESPERF:       # %bb.0:
+; RV64XANDESPERF-NEXT:    srliw a1, a1, 31
+; RV64XANDESPERF-NEXT:    nds.lea.d a0, a0, a1
+; RV64XANDESPERF-NEXT:    addi a0, a0, 16
+; RV64XANDESPERF-NEXT:    ret
+  %cmp = icmp slt i32 %y, 0
+  %select = select i1 %cmp, i64 24, i64 16
+  %gep = getelementptr i8, ptr %p, i64 %select
+  ret ptr %gep
+}
+
+define i32 @shr_and_add(i32 %x, i32 %y) {
+; RV64I-LABEL: shr_and_add:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    srliw a1, a1, 9
+; RV64I-NEXT:    slli a1, a1, 2
+; RV64I-NEXT:    addw a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: shr_and_add:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    srliw a1, a1, 9
+; RV64ZBA-NEXT:    sh2add a0, a1, a0
+; RV64ZBA-NEXT:    sext.w a0, a0
+; RV64ZBA-NEXT:    ret
+;
+; RV64XANDESPERF-LABEL: shr_and_add:
+; RV64XANDESPERF:       # %bb.0:
+; RV64XANDESPERF-NEXT:    srliw a1, a1, 9
+; RV64XANDESPERF-NEXT:    nds.lea.w a0, a0, a1
+; RV64XANDESPERF-NEXT:    sext.w a0, a0
+; RV64XANDESPERF-NEXT:    ret
+  %lshr = lshr i32 %y, 7
+  %and = and i32 %lshr, 33554428
+  %add = add i32 %x, %and
+  ret i32 %add
+}
+
+define ptr @udiv1280_gep(ptr %p, i16 zeroext %i) {
+; RV64I-LABEL: udiv1280_gep:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a2, 13
+; RV64I-NEXT:    addi a2, a2, -819
+; RV64I-NEXT:    mul a1, a1, a2
+; RV64I-NEXT:    srliw a1, a1, 26
+; RV64I-NEXT:    slli a1, a1, 3
+; RV64I-NEXT:    add a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: udiv1280_gep:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    lui a2, 13
+; RV64ZBA-NEXT:    addi a2, a2, -819
+; RV64ZBA-NEXT:    mul a1, a1, a2
+; RV64ZBA-NEXT:    srliw a1, a1, 26
+; RV64ZBA-NEXT:    sh3add a0, a1, a0
+; RV64ZBA-NEXT:    ret
+;
+; RV64XANDESPERF-LABEL: udiv1280_gep:
+; RV64XANDESPERF:       # %bb.0:
+; RV64XANDESPERF-NEXT:    lui a2, 13
+; RV64XANDESPERF-NEXT:    addi a2, a2, -819
+; RV64XANDESPERF-NEXT:    mul a1, a1, a2
+; RV64XANDESPERF-NEXT:    srliw a1, a1, 26
+; RV64XANDESPERF-NEXT:    nds.lea.d a0, a0, a1
+; RV64XANDESPERF-NEXT:    ret
+  %udiv = udiv i16 %i, 1280
+  %idx.ext = zext nneg i16 %udiv to i64
+  %add.ptr = getelementptr i64, ptr %p, i64 %idx.ext
+  ret ptr %add.ptr
+}

Copy link
Collaborator

@topperc topperc left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM

@pfusik
Copy link
Contributor Author

pfusik commented Jun 4, 2025

Tests merged into main branch as aef4373. PR rebased.

This handles RV64 SRLIW, similarly to SRLI.

Also fixing the comments for SRLI combines that did not match the code.
@pfusik pfusik merged commit afa517a into llvm:main Jun 4, 2025
11 checks passed
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

3 participants