-
Notifications
You must be signed in to change notification settings - Fork 13.6k
[RISCV] Fold LI 1 / SLLI into BSETI during i64 materialization #142348
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-backend-risc-v Author: Piotr Fusik (pfusik) ChangesMy first approach was to avoid emitting LI 1 / SLLI in the first place. Another possible approach would be to keep LI 1 / SLLI if it is to be Full diff: https://github.com/llvm/llvm-project/pull/142348.diff 2 Files Affected:
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp
index 8ea2548258fdb..f239129617b97 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp
@@ -353,6 +353,13 @@ InstSeq generateInstSeq(int64_t Val, const MCSubtargetInfo &STI) {
} while (Hi != 0);
Res = TmpSeq;
}
+
+ // Fold LI 1 + SLLI into BSETI.
+ if (Res[0].getOpcode() == RISCV::ADDI && Res[0].getImm() == 1 &&
+ Res[1].getOpcode() == RISCV::SLLI) {
+ Res.erase(Res.begin()); // Remove ADDI.
+ Res.front() = Inst(RISCV::BSETI, Res.front().getImm()); // Patch SLLI.
+ }
}
// Perform optimization with BCLRI in the Zbs extension.
diff --git a/llvm/test/CodeGen/RISCV/imm.ll b/llvm/test/CodeGen/RISCV/imm.ll
index f324a9bc120ef..418407d9b7cd6 100644
--- a/llvm/test/CodeGen/RISCV/imm.ll
+++ b/llvm/test/CodeGen/RISCV/imm.ll
@@ -4637,3 +4637,127 @@ define i64 @imm64_0xFF7FFFFF7FFFFFFE() {
; RV64-REMAT-NEXT: ret
ret i64 -36028799166447617 ; 0xFF7FFFFF7FFFFFFE
}
+
+define i64 @imm64_0xFFFFFFFF0() {
+; RV32I-LABEL: imm64_0xFFFFFFFF0:
+; RV32I: # %bb.0:
+; RV32I-NEXT: li a0, -16
+; RV32I-NEXT: li a1, 15
+; RV32I-NEXT: ret
+;
+; RV32IXQCILI-LABEL: imm64_0xFFFFFFFF0:
+; RV32IXQCILI: # %bb.0:
+; RV32IXQCILI-NEXT: li a0, -16
+; RV32IXQCILI-NEXT: li a1, 15
+; RV32IXQCILI-NEXT: ret
+;
+; RV64I-LABEL: imm64_0xFFFFFFFF0:
+; RV64I: # %bb.0:
+; RV64I-NEXT: li a0, 1
+; RV64I-NEXT: slli a0, a0, 36
+; RV64I-NEXT: addi a0, a0, -16
+; RV64I-NEXT: ret
+;
+; RV64IZBA-LABEL: imm64_0xFFFFFFFF0:
+; RV64IZBA: # %bb.0:
+; RV64IZBA-NEXT: li a0, 1
+; RV64IZBA-NEXT: slli a0, a0, 36
+; RV64IZBA-NEXT: addi a0, a0, -16
+; RV64IZBA-NEXT: ret
+;
+; RV64IZBB-LABEL: imm64_0xFFFFFFFF0:
+; RV64IZBB: # %bb.0:
+; RV64IZBB-NEXT: li a0, 1
+; RV64IZBB-NEXT: slli a0, a0, 36
+; RV64IZBB-NEXT: addi a0, a0, -16
+; RV64IZBB-NEXT: ret
+;
+; RV64IZBS-LABEL: imm64_0xFFFFFFFF0:
+; RV64IZBS: # %bb.0:
+; RV64IZBS-NEXT: bseti a0, zero, 36
+; RV64IZBS-NEXT: addi a0, a0, -16
+; RV64IZBS-NEXT: ret
+;
+; RV64IXTHEADBB-LABEL: imm64_0xFFFFFFFF0:
+; RV64IXTHEADBB: # %bb.0:
+; RV64IXTHEADBB-NEXT: li a0, 1
+; RV64IXTHEADBB-NEXT: slli a0, a0, 36
+; RV64IXTHEADBB-NEXT: addi a0, a0, -16
+; RV64IXTHEADBB-NEXT: ret
+;
+; RV32-REMAT-LABEL: imm64_0xFFFFFFFF0:
+; RV32-REMAT: # %bb.0:
+; RV32-REMAT-NEXT: li a0, -16
+; RV32-REMAT-NEXT: li a1, 15
+; RV32-REMAT-NEXT: ret
+;
+; RV64-REMAT-LABEL: imm64_0xFFFFFFFF0:
+; RV64-REMAT: # %bb.0:
+; RV64-REMAT-NEXT: li a0, 1
+; RV64-REMAT-NEXT: slli a0, a0, 36
+; RV64-REMAT-NEXT: addi a0, a0, -16
+; RV64-REMAT-NEXT: ret
+ ret i64 68719476720 ; 0xFFFFFFFF0
+}
+
+define i64 @imm64_0x1FFFFFF08() {
+; RV32I-LABEL: imm64_0x1FFFFFF08:
+; RV32I: # %bb.0:
+; RV32I-NEXT: li a0, -248
+; RV32I-NEXT: li a1, 1
+; RV32I-NEXT: ret
+;
+; RV32IXQCILI-LABEL: imm64_0x1FFFFFF08:
+; RV32IXQCILI: # %bb.0:
+; RV32IXQCILI-NEXT: li a0, -248
+; RV32IXQCILI-NEXT: li a1, 1
+; RV32IXQCILI-NEXT: ret
+;
+; RV64I-LABEL: imm64_0x1FFFFFF08:
+; RV64I: # %bb.0:
+; RV64I-NEXT: li a0, 1
+; RV64I-NEXT: slli a0, a0, 33
+; RV64I-NEXT: addi a0, a0, -248
+; RV64I-NEXT: ret
+;
+; RV64IZBA-LABEL: imm64_0x1FFFFFF08:
+; RV64IZBA: # %bb.0:
+; RV64IZBA-NEXT: li a0, 1
+; RV64IZBA-NEXT: slli a0, a0, 33
+; RV64IZBA-NEXT: addi a0, a0, -248
+; RV64IZBA-NEXT: ret
+;
+; RV64IZBB-LABEL: imm64_0x1FFFFFF08:
+; RV64IZBB: # %bb.0:
+; RV64IZBB-NEXT: li a0, 1
+; RV64IZBB-NEXT: slli a0, a0, 33
+; RV64IZBB-NEXT: addi a0, a0, -248
+; RV64IZBB-NEXT: ret
+;
+; RV64IZBS-LABEL: imm64_0x1FFFFFF08:
+; RV64IZBS: # %bb.0:
+; RV64IZBS-NEXT: bseti a0, zero, 33
+; RV64IZBS-NEXT: addi a0, a0, -248
+; RV64IZBS-NEXT: ret
+;
+; RV64IXTHEADBB-LABEL: imm64_0x1FFFFFF08:
+; RV64IXTHEADBB: # %bb.0:
+; RV64IXTHEADBB-NEXT: li a0, 1
+; RV64IXTHEADBB-NEXT: slli a0, a0, 33
+; RV64IXTHEADBB-NEXT: addi a0, a0, -248
+; RV64IXTHEADBB-NEXT: ret
+;
+; RV32-REMAT-LABEL: imm64_0x1FFFFFF08:
+; RV32-REMAT: # %bb.0:
+; RV32-REMAT-NEXT: li a0, -248
+; RV32-REMAT-NEXT: li a1, 1
+; RV32-REMAT-NEXT: ret
+;
+; RV64-REMAT-LABEL: imm64_0x1FFFFFF08:
+; RV64-REMAT: # %bb.0:
+; RV64-REMAT-NEXT: li a0, 1
+; RV64-REMAT-NEXT: slli a0, a0, 33
+; RV64-REMAT-NEXT: addi a0, a0, -248
+; RV64-REMAT-NEXT: ret
+ ret i64 8589934344 ; 0x1FFFFFF08
+}
|
if (Res[0].getOpcode() == RISCV::ADDI && Res[0].getImm() == 1 && | ||
Res[1].getOpcode() == RISCV::SLLI) { | ||
Res.erase(Res.begin()); // Remove ADDI. | ||
Res.front() = Inst(RISCV::BSETI, Res.front().getImm()); // Patch SLLI. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Another option would be to add setOpcode()
.
✅ With the latest revision this PR passed the C/C++ code formatter. |
; ZBS64: # %bb.0: | ||
; ZBS64-NEXT: bseti a2, zero, 33 | ||
; ZBS64-NEXT: addi a2, a2, -2 | ||
; ZBS64-NEXT: orn a0, a0, a2 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
My first approach was to avoid emitting LI 1 / SLLI in the first place. Unfortunately, that favors BSETI C / ADDI -1 over LI -1 / SRLI 64-C even though the latter has both instructions compressible. This is because the code assumes in several places that a two-instruction sequence (here: BSETI / ADDI) cannot be improved. Another possible approach would be to keep LI 1 / SLLI if it is to be later replaced with SRLI. This would be harder to grasp than simply patching LI 1 / SLLI with BSETI.
Tests merged into main branch as 405c31f. PR rebased. |
…142348) My first approach was to avoid emitting LI 1 / SLLI in the first place. Unfortunately, that favors BSETI C / ADDI -1 over LI -1 / SRLI 64-C even though the latter has both instructions compressible. This is because the code assumes in several places that a two-instruction sequence (here: BSETI / ADDI) cannot be improved. Another possible approach would be to keep LI 1 / SLLI if it is to be later replaced with SRLI. This would be harder to grasp than eventually patching LI 1 / SLLI with BSETI.
My first approach was to avoid emitting LI 1 / SLLI in the first place.
Unfortunately, that favors BSETI C / ADDI -1 over LI -1 / SRLI 64-C
even though the latter has both instructions compressible.
This is because the code assumes in several places that a two-instruction
sequence (here: BSETI / ADDI) cannot be improved.
Another possible approach would be to keep LI 1 / SLLI if it is to be
later replaced with SRLI. This would be harder to grasp than eventually
patching LI 1 / SLLI with BSETI.