-
Notifications
You must be signed in to change notification settings - Fork 14.4k
[RISCV][ISel] Optimize setcc with mask test idioms #147015
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
@llvm/pr-subscribers-llvm-globalisel Author: Yingwei Zheng (dtcxzyw) ChangesAs we are converting more comparisons/differences of pointers into those of offsets in InstCombine, the mask test idiom This patch eliminates unnecessary srli instructions for this pattern. We have a similar optimization for llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.cpp Lines 2416 to 2446 in a89e232
However, I cannot reuse the function Full diff: https://github.com/llvm/llvm-project/pull/147015.diff 6 Files Affected:
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
index e0321443ba2d4..0a6e1d6a7004d 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
@@ -1615,6 +1615,10 @@ def riscv_seteq : ComplexPattern<XLenVT, 1, "selectSETEQ", [setcc]>;
def : Pat<(riscv_seteq (XLenVT GPR:$rs1)), (SLTIU GPR:$rs1, 1)>;
def : Pat<(riscv_setne (XLenVT GPR:$rs1)), (SLTU (XLenVT X0), GPR:$rs1)>;
def : Pat<(XLenVT (setne (XLenVT GPR:$rs1), -1)), (SLTIU GPR:$rs1, -1)>;
+def : Pat<(XLenVT (seteq (XLenVT (and GPR:$rs, immop_oneuse<TrailingOnesMask>:$mask)), 0)),
+ (SLTIU (XLenVT (SLLI GPR:$rs, (XLenSubTrailingOnes imm:$mask))), 1)>;
+def : Pat<(XLenVT (setne (XLenVT (and GPR:$rs, immop_oneuse<TrailingOnesMask>:$mask)), 0)),
+ (SLTU (XLenVT X0), (XLenVT (SLLI GPR:$rs, (XLenSubTrailingOnes imm:$mask))))>;
def IntCCtoRISCVCC : SDNodeXForm<riscv_selectcc, [{
ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/float-intrinsics.ll b/llvm/test/CodeGen/RISCV/GlobalISel/float-intrinsics.ll
index 88413291c26cd..7f387a763b6da 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/float-intrinsics.ll
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/float-intrinsics.ll
@@ -972,19 +972,19 @@ define i1 @fpclass(float %x) {
; RV32I-NEXT: slli a2, a0, 1
; RV32I-NEXT: lui a3, 2048
; RV32I-NEXT: lui a4, 1046528
-; RV32I-NEXT: srli a2, a2, 1
+; RV32I-NEXT: srli a5, a2, 1
; RV32I-NEXT: addi a3, a3, -1
-; RV32I-NEXT: addi a5, a2, -1
+; RV32I-NEXT: xor a0, a0, a5
+; RV32I-NEXT: xor a6, a5, a1
+; RV32I-NEXT: sltu a1, a1, a5
+; RV32I-NEXT: add a4, a5, a4
+; RV32I-NEXT: addi a5, a5, -1
; RV32I-NEXT: sltu a3, a5, a3
; RV32I-NEXT: lui a5, 520192
-; RV32I-NEXT: xor a0, a0, a2
-; RV32I-NEXT: add a4, a2, a4
; RV32I-NEXT: sltu a4, a4, a5
-; RV32I-NEXT: xor a5, a2, a1
-; RV32I-NEXT: sltu a1, a1, a2
; RV32I-NEXT: seqz a2, a2
; RV32I-NEXT: snez a0, a0
-; RV32I-NEXT: seqz a5, a5
+; RV32I-NEXT: seqz a5, a6
; RV32I-NEXT: and a3, a3, a0
; RV32I-NEXT: or a2, a2, a5
; RV32I-NEXT: and a0, a4, a0
@@ -1000,19 +1000,19 @@ define i1 @fpclass(float %x) {
; RV64I-NEXT: sext.w a0, a0
; RV64I-NEXT: li a3, 1
; RV64I-NEXT: lui a4, 2048
-; RV64I-NEXT: lui a5, 520192
-; RV64I-NEXT: srli a2, a2, 33
-; RV64I-NEXT: addi a6, a4, -1
-; RV64I-NEXT: xor a0, a0, a2
-; RV64I-NEXT: subw a3, a2, a3
-; RV64I-NEXT: sltu a3, a3, a6
-; RV64I-NEXT: xor a6, a2, a1
-; RV64I-NEXT: sltu a1, a1, a2
-; RV64I-NEXT: subw a4, a2, a4
+; RV64I-NEXT: srli a5, a2, 33
+; RV64I-NEXT: xor a0, a0, a5
+; RV64I-NEXT: subw a3, a5, a3
+; RV64I-NEXT: xor a6, a5, a1
+; RV64I-NEXT: sltu a1, a1, a5
+; RV64I-NEXT: subw a5, a5, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: sltu a3, a3, a4
+; RV64I-NEXT: lui a4, 520192
; RV64I-NEXT: seqz a2, a2
; RV64I-NEXT: snez a0, a0
; RV64I-NEXT: seqz a6, a6
-; RV64I-NEXT: sltu a4, a4, a5
+; RV64I-NEXT: sltu a4, a5, a4
; RV64I-NEXT: and a3, a3, a0
; RV64I-NEXT: or a2, a2, a6
; RV64I-NEXT: or a1, a2, a1
diff --git a/llvm/test/CodeGen/RISCV/float-intrinsics.ll b/llvm/test/CodeGen/RISCV/float-intrinsics.ll
index be9ddc68ce667..ed50042f54ab5 100644
--- a/llvm/test/CodeGen/RISCV/float-intrinsics.ll
+++ b/llvm/test/CodeGen/RISCV/float-intrinsics.ll
@@ -1625,18 +1625,18 @@ define i1 @fpclass(float %x) {
; RV32I: # %bb.0:
; RV32I-NEXT: slli a1, a0, 1
; RV32I-NEXT: lui a2, 2048
-; RV32I-NEXT: slti a0, a0, 0
; RV32I-NEXT: lui a3, 522240
; RV32I-NEXT: lui a4, 1046528
-; RV32I-NEXT: srli a1, a1, 1
+; RV32I-NEXT: srli a5, a1, 1
; RV32I-NEXT: addi a2, a2, -1
-; RV32I-NEXT: addi a5, a1, -1
+; RV32I-NEXT: xor a6, a5, a3
+; RV32I-NEXT: slt a3, a3, a5
+; RV32I-NEXT: add a4, a5, a4
+; RV32I-NEXT: addi a5, a5, -1
; RV32I-NEXT: sltu a2, a5, a2
-; RV32I-NEXT: xor a5, a1, a3
-; RV32I-NEXT: slt a3, a3, a1
-; RV32I-NEXT: add a4, a1, a4
+; RV32I-NEXT: slti a0, a0, 0
; RV32I-NEXT: seqz a1, a1
-; RV32I-NEXT: seqz a5, a5
+; RV32I-NEXT: seqz a5, a6
; RV32I-NEXT: srli a4, a4, 24
; RV32I-NEXT: and a2, a2, a0
; RV32I-NEXT: or a1, a1, a5
@@ -1649,29 +1649,29 @@ define i1 @fpclass(float %x) {
;
; RV64I-LABEL: fpclass:
; RV64I: # %bb.0:
-; RV64I-NEXT: sext.w a1, a0
-; RV64I-NEXT: slli a0, a0, 33
+; RV64I-NEXT: slli a1, a0, 33
; RV64I-NEXT: lui a2, 2048
; RV64I-NEXT: lui a3, 522240
; RV64I-NEXT: lui a4, 1046528
-; RV64I-NEXT: srli a0, a0, 33
+; RV64I-NEXT: srli a5, a1, 33
; RV64I-NEXT: addi a2, a2, -1
-; RV64I-NEXT: slti a1, a1, 0
-; RV64I-NEXT: addi a5, a0, -1
+; RV64I-NEXT: xor a6, a5, a3
+; RV64I-NEXT: slt a3, a3, a5
+; RV64I-NEXT: add a4, a5, a4
+; RV64I-NEXT: addi a5, a5, -1
; RV64I-NEXT: sltu a2, a5, a2
-; RV64I-NEXT: xor a5, a0, a3
-; RV64I-NEXT: slt a3, a3, a0
-; RV64I-NEXT: add a4, a0, a4
-; RV64I-NEXT: seqz a0, a0
-; RV64I-NEXT: seqz a5, a5
+; RV64I-NEXT: sext.w a0, a0
+; RV64I-NEXT: slti a0, a0, 0
+; RV64I-NEXT: seqz a1, a1
+; RV64I-NEXT: seqz a5, a6
; RV64I-NEXT: srliw a4, a4, 24
-; RV64I-NEXT: and a2, a2, a1
-; RV64I-NEXT: or a0, a0, a5
+; RV64I-NEXT: and a2, a2, a0
+; RV64I-NEXT: or a1, a1, a5
; RV64I-NEXT: sltiu a4, a4, 127
-; RV64I-NEXT: or a0, a0, a2
-; RV64I-NEXT: or a0, a0, a3
-; RV64I-NEXT: and a1, a4, a1
-; RV64I-NEXT: or a0, a0, a1
+; RV64I-NEXT: or a1, a1, a2
+; RV64I-NEXT: or a1, a1, a3
+; RV64I-NEXT: and a0, a4, a0
+; RV64I-NEXT: or a0, a1, a0
; RV64I-NEXT: ret
%cmp = call i1 @llvm.is.fpclass.f32(float %x, i32 639)
ret i1 %cmp
diff --git a/llvm/test/CodeGen/RISCV/i32-icmp.ll b/llvm/test/CodeGen/RISCV/i32-icmp.ll
index 6e3e0fe39cca7..53892f9497bba 100644
--- a/llvm/test/CodeGen/RISCV/i32-icmp.ll
+++ b/llvm/test/CodeGen/RISCV/i32-icmp.ll
@@ -1136,3 +1136,57 @@ define i32 @icmp_sle_constant_neg_2050(i32 %a) nounwind {
%2 = zext i1 %1 to i32
ret i32 %2
}
+
+define i32 @mask_test_eq(i32 %x) nounwind {
+; RV32I-LABEL: mask_test_eq:
+; RV32I: # %bb.0:
+; RV32I-NEXT: slli a0, a0, 12
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: ret
+;
+; RV32XQCILIA-LABEL: mask_test_eq:
+; RV32XQCILIA: # %bb.0:
+; RV32XQCILIA-NEXT: slli a0, a0, 12
+; RV32XQCILIA-NEXT: seqz a0, a0
+; RV32XQCILIA-NEXT: ret
+ %y = and i32 %x, 1048575
+ %cmp = icmp eq i32 %y, 0
+ %ext = zext i1 %cmp to i32
+ ret i32 %ext
+}
+
+define i32 @mask_test_ne(i32 %x) nounwind {
+; RV32I-LABEL: mask_test_ne:
+; RV32I: # %bb.0:
+; RV32I-NEXT: slli a0, a0, 12
+; RV32I-NEXT: snez a0, a0
+; RV32I-NEXT: ret
+;
+; RV32XQCILIA-LABEL: mask_test_ne:
+; RV32XQCILIA: # %bb.0:
+; RV32XQCILIA-NEXT: slli a0, a0, 12
+; RV32XQCILIA-NEXT: snez a0, a0
+; RV32XQCILIA-NEXT: ret
+ %y = and i32 %x, 1048575
+ %cmp = icmp ne i32 %y, 0
+ %ext = zext i1 %cmp to i32
+ ret i32 %ext
+}
+
+define i32 @mask_test_eq_simm12(i32 %x) nounwind {
+; RV32I-LABEL: mask_test_eq_simm12:
+; RV32I: # %bb.0:
+; RV32I-NEXT: andi a0, a0, 3
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: ret
+;
+; RV32XQCILIA-LABEL: mask_test_eq_simm12:
+; RV32XQCILIA: # %bb.0:
+; RV32XQCILIA-NEXT: andi a0, a0, 3
+; RV32XQCILIA-NEXT: seqz a0, a0
+; RV32XQCILIA-NEXT: ret
+ %y = and i32 %x, 3
+ %cmp = icmp eq i32 %y, 0
+ %ext = zext i1 %cmp to i32
+ ret i32 %ext
+}
diff --git a/llvm/test/CodeGen/RISCV/i64-icmp.ll b/llvm/test/CodeGen/RISCV/i64-icmp.ll
index 49103231a075f..837987d8b9162 100644
--- a/llvm/test/CodeGen/RISCV/i64-icmp.ll
+++ b/llvm/test/CodeGen/RISCV/i64-icmp.ll
@@ -767,4 +767,56 @@ define i64 @icmp_ne_zext_inreg_umin(i64 %a) nounwind {
%4 = zext i1 %3 to i64
ret i64 %4
}
+
+define i64 @mask_test_eq(i64 %x) nounwind {
+; RV64I-LABEL: mask_test_eq:
+; RV64I: # %bb.0:
+; RV64I-NEXT: slli a0, a0, 2
+; RV64I-NEXT: seqz a0, a0
+; RV64I-NEXT: ret
+ %y = and i64 %x, 4611686018427387903
+ %cmp = icmp eq i64 %y, 0
+ %ext = zext i1 %cmp to i64
+ ret i64 %ext
+}
+
+define i64 @mask_test_ne(i64 %x) nounwind {
+; RV64I-LABEL: mask_test_ne:
+; RV64I: # %bb.0:
+; RV64I-NEXT: slli a0, a0, 2
+; RV64I-NEXT: snez a0, a0
+; RV64I-NEXT: ret
+ %y = and i64 %x, 4611686018427387903
+ %cmp = icmp ne i64 %y, 0
+ %ext = zext i1 %cmp to i64
+ ret i64 %ext
+}
+
+define i64 @mask_test_eq_simm12(i64 %x) nounwind {
+; RV64I-LABEL: mask_test_eq_simm12:
+; RV64I: # %bb.0:
+; RV64I-NEXT: andi a0, a0, 3
+; RV64I-NEXT: seqz a0, a0
+; RV64I-NEXT: ret
+ %y = and i64 %x, 3
+ %cmp = icmp eq i64 %y, 0
+ %ext = zext i1 %cmp to i64
+ ret i64 %ext
+}
+
+define i64 @mask_test_eq_multiuse(i64 %x, ptr %p) nounwind {
+; RV64I-LABEL: mask_test_eq_multiuse:
+; RV64I: # %bb.0:
+; RV64I-NEXT: slli a0, a0, 2
+; RV64I-NEXT: srli a2, a0, 2
+; RV64I-NEXT: seqz a0, a0
+; RV64I-NEXT: sd a2, 0(a1)
+; RV64I-NEXT: ret
+ %y = and i64 %x, 4611686018427387903
+ store i64 %y, ptr %p, align 8
+ %cmp = icmp eq i64 %y, 0
+ %ext = zext i1 %cmp to i64
+ ret i64 %ext
+}
+
declare i64 @llvm.umin.i64(i64, i64)
diff --git a/llvm/test/CodeGen/RISCV/overflow-intrinsics.ll b/llvm/test/CodeGen/RISCV/overflow-intrinsics.ll
index 98c897084ab49..47b90a006a249 100644
--- a/llvm/test/CodeGen/RISCV/overflow-intrinsics.ll
+++ b/llvm/test/CodeGen/RISCV/overflow-intrinsics.ll
@@ -594,7 +594,6 @@ define i1 @uaddo_i16_increment_noncanonical_3(i16 %x, ptr %p) {
; RV32: # %bb.0:
; RV32-NEXT: addi a2, a0, 1
; RV32-NEXT: slli a0, a2, 16
-; RV32-NEXT: srli a0, a0, 16
; RV32-NEXT: seqz a0, a0
; RV32-NEXT: sh a2, 0(a1)
; RV32-NEXT: ret
@@ -603,7 +602,6 @@ define i1 @uaddo_i16_increment_noncanonical_3(i16 %x, ptr %p) {
; RV64: # %bb.0:
; RV64-NEXT: addi a2, a0, 1
; RV64-NEXT: slli a0, a2, 48
-; RV64-NEXT: srli a0, a0, 48
; RV64-NEXT: seqz a0, a0
; RV64-NEXT: sh a2, 0(a1)
; RV64-NEXT: ret
@@ -759,10 +757,9 @@ define i1 @uaddo_i42_increment_illegal_type(i42 %x, ptr %p) {
; RV64-LABEL: uaddo_i42_increment_illegal_type:
; RV64: # %bb.0:
; RV64-NEXT: addi a2, a0, 1
-; RV64-NEXT: slli a0, a2, 22
-; RV64-NEXT: srli a3, a0, 22
+; RV64-NEXT: slli a3, a2, 22
; RV64-NEXT: seqz a0, a3
-; RV64-NEXT: srli a3, a3, 32
+; RV64-NEXT: srli a3, a3, 54
; RV64-NEXT: sw a2, 0(a1)
; RV64-NEXT: sh a3, 4(a1)
; RV64-NEXT: ret
|
@llvm/pr-subscribers-backend-risc-v Author: Yingwei Zheng (dtcxzyw) ChangesAs we are converting more comparisons/differences of pointers into those of offsets in InstCombine, the mask test idiom This patch eliminates unnecessary srli instructions for this pattern. We have a similar optimization for llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.cpp Lines 2416 to 2446 in a89e232
However, I cannot reuse the function Full diff: https://github.com/llvm/llvm-project/pull/147015.diff 6 Files Affected:
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
index e0321443ba2d4..0a6e1d6a7004d 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
@@ -1615,6 +1615,10 @@ def riscv_seteq : ComplexPattern<XLenVT, 1, "selectSETEQ", [setcc]>;
def : Pat<(riscv_seteq (XLenVT GPR:$rs1)), (SLTIU GPR:$rs1, 1)>;
def : Pat<(riscv_setne (XLenVT GPR:$rs1)), (SLTU (XLenVT X0), GPR:$rs1)>;
def : Pat<(XLenVT (setne (XLenVT GPR:$rs1), -1)), (SLTIU GPR:$rs1, -1)>;
+def : Pat<(XLenVT (seteq (XLenVT (and GPR:$rs, immop_oneuse<TrailingOnesMask>:$mask)), 0)),
+ (SLTIU (XLenVT (SLLI GPR:$rs, (XLenSubTrailingOnes imm:$mask))), 1)>;
+def : Pat<(XLenVT (setne (XLenVT (and GPR:$rs, immop_oneuse<TrailingOnesMask>:$mask)), 0)),
+ (SLTU (XLenVT X0), (XLenVT (SLLI GPR:$rs, (XLenSubTrailingOnes imm:$mask))))>;
def IntCCtoRISCVCC : SDNodeXForm<riscv_selectcc, [{
ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/float-intrinsics.ll b/llvm/test/CodeGen/RISCV/GlobalISel/float-intrinsics.ll
index 88413291c26cd..7f387a763b6da 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/float-intrinsics.ll
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/float-intrinsics.ll
@@ -972,19 +972,19 @@ define i1 @fpclass(float %x) {
; RV32I-NEXT: slli a2, a0, 1
; RV32I-NEXT: lui a3, 2048
; RV32I-NEXT: lui a4, 1046528
-; RV32I-NEXT: srli a2, a2, 1
+; RV32I-NEXT: srli a5, a2, 1
; RV32I-NEXT: addi a3, a3, -1
-; RV32I-NEXT: addi a5, a2, -1
+; RV32I-NEXT: xor a0, a0, a5
+; RV32I-NEXT: xor a6, a5, a1
+; RV32I-NEXT: sltu a1, a1, a5
+; RV32I-NEXT: add a4, a5, a4
+; RV32I-NEXT: addi a5, a5, -1
; RV32I-NEXT: sltu a3, a5, a3
; RV32I-NEXT: lui a5, 520192
-; RV32I-NEXT: xor a0, a0, a2
-; RV32I-NEXT: add a4, a2, a4
; RV32I-NEXT: sltu a4, a4, a5
-; RV32I-NEXT: xor a5, a2, a1
-; RV32I-NEXT: sltu a1, a1, a2
; RV32I-NEXT: seqz a2, a2
; RV32I-NEXT: snez a0, a0
-; RV32I-NEXT: seqz a5, a5
+; RV32I-NEXT: seqz a5, a6
; RV32I-NEXT: and a3, a3, a0
; RV32I-NEXT: or a2, a2, a5
; RV32I-NEXT: and a0, a4, a0
@@ -1000,19 +1000,19 @@ define i1 @fpclass(float %x) {
; RV64I-NEXT: sext.w a0, a0
; RV64I-NEXT: li a3, 1
; RV64I-NEXT: lui a4, 2048
-; RV64I-NEXT: lui a5, 520192
-; RV64I-NEXT: srli a2, a2, 33
-; RV64I-NEXT: addi a6, a4, -1
-; RV64I-NEXT: xor a0, a0, a2
-; RV64I-NEXT: subw a3, a2, a3
-; RV64I-NEXT: sltu a3, a3, a6
-; RV64I-NEXT: xor a6, a2, a1
-; RV64I-NEXT: sltu a1, a1, a2
-; RV64I-NEXT: subw a4, a2, a4
+; RV64I-NEXT: srli a5, a2, 33
+; RV64I-NEXT: xor a0, a0, a5
+; RV64I-NEXT: subw a3, a5, a3
+; RV64I-NEXT: xor a6, a5, a1
+; RV64I-NEXT: sltu a1, a1, a5
+; RV64I-NEXT: subw a5, a5, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: sltu a3, a3, a4
+; RV64I-NEXT: lui a4, 520192
; RV64I-NEXT: seqz a2, a2
; RV64I-NEXT: snez a0, a0
; RV64I-NEXT: seqz a6, a6
-; RV64I-NEXT: sltu a4, a4, a5
+; RV64I-NEXT: sltu a4, a5, a4
; RV64I-NEXT: and a3, a3, a0
; RV64I-NEXT: or a2, a2, a6
; RV64I-NEXT: or a1, a2, a1
diff --git a/llvm/test/CodeGen/RISCV/float-intrinsics.ll b/llvm/test/CodeGen/RISCV/float-intrinsics.ll
index be9ddc68ce667..ed50042f54ab5 100644
--- a/llvm/test/CodeGen/RISCV/float-intrinsics.ll
+++ b/llvm/test/CodeGen/RISCV/float-intrinsics.ll
@@ -1625,18 +1625,18 @@ define i1 @fpclass(float %x) {
; RV32I: # %bb.0:
; RV32I-NEXT: slli a1, a0, 1
; RV32I-NEXT: lui a2, 2048
-; RV32I-NEXT: slti a0, a0, 0
; RV32I-NEXT: lui a3, 522240
; RV32I-NEXT: lui a4, 1046528
-; RV32I-NEXT: srli a1, a1, 1
+; RV32I-NEXT: srli a5, a1, 1
; RV32I-NEXT: addi a2, a2, -1
-; RV32I-NEXT: addi a5, a1, -1
+; RV32I-NEXT: xor a6, a5, a3
+; RV32I-NEXT: slt a3, a3, a5
+; RV32I-NEXT: add a4, a5, a4
+; RV32I-NEXT: addi a5, a5, -1
; RV32I-NEXT: sltu a2, a5, a2
-; RV32I-NEXT: xor a5, a1, a3
-; RV32I-NEXT: slt a3, a3, a1
-; RV32I-NEXT: add a4, a1, a4
+; RV32I-NEXT: slti a0, a0, 0
; RV32I-NEXT: seqz a1, a1
-; RV32I-NEXT: seqz a5, a5
+; RV32I-NEXT: seqz a5, a6
; RV32I-NEXT: srli a4, a4, 24
; RV32I-NEXT: and a2, a2, a0
; RV32I-NEXT: or a1, a1, a5
@@ -1649,29 +1649,29 @@ define i1 @fpclass(float %x) {
;
; RV64I-LABEL: fpclass:
; RV64I: # %bb.0:
-; RV64I-NEXT: sext.w a1, a0
-; RV64I-NEXT: slli a0, a0, 33
+; RV64I-NEXT: slli a1, a0, 33
; RV64I-NEXT: lui a2, 2048
; RV64I-NEXT: lui a3, 522240
; RV64I-NEXT: lui a4, 1046528
-; RV64I-NEXT: srli a0, a0, 33
+; RV64I-NEXT: srli a5, a1, 33
; RV64I-NEXT: addi a2, a2, -1
-; RV64I-NEXT: slti a1, a1, 0
-; RV64I-NEXT: addi a5, a0, -1
+; RV64I-NEXT: xor a6, a5, a3
+; RV64I-NEXT: slt a3, a3, a5
+; RV64I-NEXT: add a4, a5, a4
+; RV64I-NEXT: addi a5, a5, -1
; RV64I-NEXT: sltu a2, a5, a2
-; RV64I-NEXT: xor a5, a0, a3
-; RV64I-NEXT: slt a3, a3, a0
-; RV64I-NEXT: add a4, a0, a4
-; RV64I-NEXT: seqz a0, a0
-; RV64I-NEXT: seqz a5, a5
+; RV64I-NEXT: sext.w a0, a0
+; RV64I-NEXT: slti a0, a0, 0
+; RV64I-NEXT: seqz a1, a1
+; RV64I-NEXT: seqz a5, a6
; RV64I-NEXT: srliw a4, a4, 24
-; RV64I-NEXT: and a2, a2, a1
-; RV64I-NEXT: or a0, a0, a5
+; RV64I-NEXT: and a2, a2, a0
+; RV64I-NEXT: or a1, a1, a5
; RV64I-NEXT: sltiu a4, a4, 127
-; RV64I-NEXT: or a0, a0, a2
-; RV64I-NEXT: or a0, a0, a3
-; RV64I-NEXT: and a1, a4, a1
-; RV64I-NEXT: or a0, a0, a1
+; RV64I-NEXT: or a1, a1, a2
+; RV64I-NEXT: or a1, a1, a3
+; RV64I-NEXT: and a0, a4, a0
+; RV64I-NEXT: or a0, a1, a0
; RV64I-NEXT: ret
%cmp = call i1 @llvm.is.fpclass.f32(float %x, i32 639)
ret i1 %cmp
diff --git a/llvm/test/CodeGen/RISCV/i32-icmp.ll b/llvm/test/CodeGen/RISCV/i32-icmp.ll
index 6e3e0fe39cca7..53892f9497bba 100644
--- a/llvm/test/CodeGen/RISCV/i32-icmp.ll
+++ b/llvm/test/CodeGen/RISCV/i32-icmp.ll
@@ -1136,3 +1136,57 @@ define i32 @icmp_sle_constant_neg_2050(i32 %a) nounwind {
%2 = zext i1 %1 to i32
ret i32 %2
}
+
+define i32 @mask_test_eq(i32 %x) nounwind {
+; RV32I-LABEL: mask_test_eq:
+; RV32I: # %bb.0:
+; RV32I-NEXT: slli a0, a0, 12
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: ret
+;
+; RV32XQCILIA-LABEL: mask_test_eq:
+; RV32XQCILIA: # %bb.0:
+; RV32XQCILIA-NEXT: slli a0, a0, 12
+; RV32XQCILIA-NEXT: seqz a0, a0
+; RV32XQCILIA-NEXT: ret
+ %y = and i32 %x, 1048575
+ %cmp = icmp eq i32 %y, 0
+ %ext = zext i1 %cmp to i32
+ ret i32 %ext
+}
+
+define i32 @mask_test_ne(i32 %x) nounwind {
+; RV32I-LABEL: mask_test_ne:
+; RV32I: # %bb.0:
+; RV32I-NEXT: slli a0, a0, 12
+; RV32I-NEXT: snez a0, a0
+; RV32I-NEXT: ret
+;
+; RV32XQCILIA-LABEL: mask_test_ne:
+; RV32XQCILIA: # %bb.0:
+; RV32XQCILIA-NEXT: slli a0, a0, 12
+; RV32XQCILIA-NEXT: snez a0, a0
+; RV32XQCILIA-NEXT: ret
+ %y = and i32 %x, 1048575
+ %cmp = icmp ne i32 %y, 0
+ %ext = zext i1 %cmp to i32
+ ret i32 %ext
+}
+
+define i32 @mask_test_eq_simm12(i32 %x) nounwind {
+; RV32I-LABEL: mask_test_eq_simm12:
+; RV32I: # %bb.0:
+; RV32I-NEXT: andi a0, a0, 3
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: ret
+;
+; RV32XQCILIA-LABEL: mask_test_eq_simm12:
+; RV32XQCILIA: # %bb.0:
+; RV32XQCILIA-NEXT: andi a0, a0, 3
+; RV32XQCILIA-NEXT: seqz a0, a0
+; RV32XQCILIA-NEXT: ret
+ %y = and i32 %x, 3
+ %cmp = icmp eq i32 %y, 0
+ %ext = zext i1 %cmp to i32
+ ret i32 %ext
+}
diff --git a/llvm/test/CodeGen/RISCV/i64-icmp.ll b/llvm/test/CodeGen/RISCV/i64-icmp.ll
index 49103231a075f..837987d8b9162 100644
--- a/llvm/test/CodeGen/RISCV/i64-icmp.ll
+++ b/llvm/test/CodeGen/RISCV/i64-icmp.ll
@@ -767,4 +767,56 @@ define i64 @icmp_ne_zext_inreg_umin(i64 %a) nounwind {
%4 = zext i1 %3 to i64
ret i64 %4
}
+
+define i64 @mask_test_eq(i64 %x) nounwind {
+; RV64I-LABEL: mask_test_eq:
+; RV64I: # %bb.0:
+; RV64I-NEXT: slli a0, a0, 2
+; RV64I-NEXT: seqz a0, a0
+; RV64I-NEXT: ret
+ %y = and i64 %x, 4611686018427387903
+ %cmp = icmp eq i64 %y, 0
+ %ext = zext i1 %cmp to i64
+ ret i64 %ext
+}
+
+define i64 @mask_test_ne(i64 %x) nounwind {
+; RV64I-LABEL: mask_test_ne:
+; RV64I: # %bb.0:
+; RV64I-NEXT: slli a0, a0, 2
+; RV64I-NEXT: snez a0, a0
+; RV64I-NEXT: ret
+ %y = and i64 %x, 4611686018427387903
+ %cmp = icmp ne i64 %y, 0
+ %ext = zext i1 %cmp to i64
+ ret i64 %ext
+}
+
+define i64 @mask_test_eq_simm12(i64 %x) nounwind {
+; RV64I-LABEL: mask_test_eq_simm12:
+; RV64I: # %bb.0:
+; RV64I-NEXT: andi a0, a0, 3
+; RV64I-NEXT: seqz a0, a0
+; RV64I-NEXT: ret
+ %y = and i64 %x, 3
+ %cmp = icmp eq i64 %y, 0
+ %ext = zext i1 %cmp to i64
+ ret i64 %ext
+}
+
+define i64 @mask_test_eq_multiuse(i64 %x, ptr %p) nounwind {
+; RV64I-LABEL: mask_test_eq_multiuse:
+; RV64I: # %bb.0:
+; RV64I-NEXT: slli a0, a0, 2
+; RV64I-NEXT: srli a2, a0, 2
+; RV64I-NEXT: seqz a0, a0
+; RV64I-NEXT: sd a2, 0(a1)
+; RV64I-NEXT: ret
+ %y = and i64 %x, 4611686018427387903
+ store i64 %y, ptr %p, align 8
+ %cmp = icmp eq i64 %y, 0
+ %ext = zext i1 %cmp to i64
+ ret i64 %ext
+}
+
declare i64 @llvm.umin.i64(i64, i64)
diff --git a/llvm/test/CodeGen/RISCV/overflow-intrinsics.ll b/llvm/test/CodeGen/RISCV/overflow-intrinsics.ll
index 98c897084ab49..47b90a006a249 100644
--- a/llvm/test/CodeGen/RISCV/overflow-intrinsics.ll
+++ b/llvm/test/CodeGen/RISCV/overflow-intrinsics.ll
@@ -594,7 +594,6 @@ define i1 @uaddo_i16_increment_noncanonical_3(i16 %x, ptr %p) {
; RV32: # %bb.0:
; RV32-NEXT: addi a2, a0, 1
; RV32-NEXT: slli a0, a2, 16
-; RV32-NEXT: srli a0, a0, 16
; RV32-NEXT: seqz a0, a0
; RV32-NEXT: sh a2, 0(a1)
; RV32-NEXT: ret
@@ -603,7 +602,6 @@ define i1 @uaddo_i16_increment_noncanonical_3(i16 %x, ptr %p) {
; RV64: # %bb.0:
; RV64-NEXT: addi a2, a0, 1
; RV64-NEXT: slli a0, a2, 48
-; RV64-NEXT: srli a0, a0, 48
; RV64-NEXT: seqz a0, a0
; RV64-NEXT: sh a2, 0(a1)
; RV64-NEXT: ret
@@ -759,10 +757,9 @@ define i1 @uaddo_i42_increment_illegal_type(i42 %x, ptr %p) {
; RV64-LABEL: uaddo_i42_increment_illegal_type:
; RV64: # %bb.0:
; RV64-NEXT: addi a2, a0, 1
-; RV64-NEXT: slli a0, a2, 22
-; RV64-NEXT: srli a3, a0, 22
+; RV64-NEXT: slli a3, a2, 22
; RV64-NEXT: seqz a0, a3
-; RV64-NEXT: srli a3, a3, 32
+; RV64-NEXT: srli a3, a3, 54
; RV64-NEXT: sw a2, 0(a1)
; RV64-NEXT: sh a3, 4(a1)
; RV64-NEXT: ret
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
As we are converting more comparisons/differences of pointers into those of offsets in InstCombine, the mask test idiom
icmp eq/ne (and X, Mask), 0
may be more common in real-world programs.This patch eliminates unnecessary srli instructions for this pattern. We have a similar optimization for
RISCVISD::SELECT_CC/BR_CC
:llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
Lines 2416 to 2446 in a89e232
However, I cannot reuse the function
translateSetCCForBranch
due to some regressions caused by other DAGCombiner folds: main...dtcxzyw:llvm-project:rv-mask-test. So this patch defers the transformation to ISel.