-
Notifications
You must be signed in to change notification settings - Fork 13.5k
[AArch64] Remove copy in SVE/SME predicate spill and fill #81716
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-llvm-globalisel @llvm/pr-subscribers-backend-aarch64 Author: Sam Tebbs (SamTebbs33) Changes7dc20ab introduced an extra COPY when spilling a PNR register, which can't be elided as the input (PNR predicate) and output (PPR predicate) register classes differ. This patch emits a new ConvertPNRtoPPR pseudo instruction instead. When this is expanded, it gets erased if the PNR is a subregister of the PPR, since the conversion is implicit, otherwise it is lowered to an ORR. Full diff: https://github.com/llvm/llvm-project/pull/81716.diff 5 Files Affected:
diff --git a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
index 352c61d48e2fff..7dfd4ed5b38cff 100644
--- a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
@@ -1119,6 +1119,23 @@ bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
default:
break;
+ case AArch64::ConvertPNRtoPPR: {
+ auto TRI = MBB.getParent()->getSubtarget().getRegisterInfo();
+ MachineOperand DstMO = MI.getOperand(0);
+ MachineOperand SrcMO = MI.getOperand(1);
+ unsigned SrcReg = SrcMO.getReg();
+ if (!TRI->isSubRegister(DstMO.getReg(), SrcReg)) {
+ unsigned SrcSuperReg = TRI->getMatchingSuperReg(SrcReg, AArch64::psub,
+ &AArch64::PPRRegClass);
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ORR_PPzPP))
+ .add(DstMO)
+ .addReg(SrcSuperReg)
+ .addReg(SrcSuperReg)
+ .addReg(SrcSuperReg);
+ }
+ MI.eraseFromParent();
+ return true;
+ }
case AArch64::BSPv8i8:
case AArch64::BSPv16i8: {
Register DstReg = MI.getOperand(0).getReg();
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index 656259727c124f..01c3a1c92d07d7 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -4789,6 +4789,7 @@ void AArch64InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
bool Offset = true;
MCRegister PNRReg = MCRegister::NoRegister;
unsigned StackID = TargetStackID::Default;
+ const TargetInstrInfo *TII = MBB.getParent()->getSubtarget().getInstrInfo();
switch (TRI->getSpillSize(*RC)) {
case 1:
if (AArch64::FPR8RegClass.hasSubClassEq(RC))
@@ -4807,8 +4808,9 @@ void AArch64InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
"Unexpected register store without SVE2p1 or SME2");
if (SrcReg.isVirtual()) {
auto NewSrcReg =
- MF.getRegInfo().createVirtualRegister(&AArch64::PPRRegClass);
- BuildMI(MBB, MBBI, DebugLoc(), get(TargetOpcode::COPY), NewSrcReg)
+ MF.getRegInfo().createVirtualRegister(&AArch64::PPR_p8to15RegClass);
+ BuildMI(MBB, MBBI, DebugLoc(), TII->get(AArch64::ConvertPNRtoPPR),
+ NewSrcReg)
.addReg(SrcReg);
SrcReg = NewSrcReg;
} else
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index c4d69232c9e30e..92d4c3774908e2 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -2308,6 +2308,8 @@ let Predicates = [HasBF16, HasSVEorSME] in {
defm BFCVTNT_ZPmZ : sve_bfloat_convert<0b0, "bfcvtnt", int_aarch64_sve_fcvtnt_bf16f32>;
} // End HasBF16, HasSVEorSME
+def ConvertPNRtoPPR : Pseudo<(outs PPRAny:$Pd), (ins PNRAny:$Pm), []>, Sched<[]>;
+
let Predicates = [HasSVEorSME] in {
// InstAliases
def : InstAlias<"mov $Zd, $Zn",
diff --git a/llvm/test/CodeGen/AArch64/spillfill-sve-different-predicate.mir b/llvm/test/CodeGen/AArch64/spillfill-sve-different-predicate.mir
new file mode 100644
index 00000000000000..abbc1d615b66f7
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/spillfill-sve-different-predicate.mir
@@ -0,0 +1,30 @@
+# RUN: llc -mtriple=aarch64-linux-gnu -start-after=virtregrewriter -stop-after=aarch64-expand-pseudo -mattr=+sme2 -verify-machineinstrs -o - %s \
+# RUN: | FileCheck %s
+
+--- |
+ target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+ target triple = "aarch64-unknown-linux-gnu"
+
+ define void @test_convert_different_reg() #0 { entry: unreachable }
+
+ attributes #0 = { "target-features"="+sme2" }
+
+---
+name: test_convert_different_reg
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ ; CHECK-LABEL: name: test_convert_different_reg
+ ; CHECK: renamable $pn8 = WHILEGE_CXX_B undef $x0, undef $x0, 0, implicit-def dead $nzcv
+ ; CHECK-NEXT: renamable $p9 = ORR_PPzPP $p8, $p8, $p8
+ ; CHECK-NEXT: STR_PXI killed renamable $p9, $sp, 7
+ ; CHECK-NEXT: renamable $p0 = LDR_PXI $sp, 7
+ early-clobber $sp = frame-setup STRXpre killed $fp, $sp, -16
+ frame-setup CFI_INSTRUCTION escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22
+ frame-setup CFI_INSTRUCTION offset $w29, -16
+ renamable $pn8 = WHILEGE_CXX_B undef $x0, undef $x0, 0, implicit-def dead $nzcv
+ renamable $p9 = ConvertPNRtoPPR killed renamable $pn8
+ STR_PXI killed renamable $p9, $sp, 7
+ renamable $p0 = LDR_PXI $sp, 7
+ early-clobber $sp, $fp = frame-destroy LDRXpost $sp, 16
+ RET undef $lr
diff --git a/llvm/test/CodeGen/AArch64/spillfill-sve.mir b/llvm/test/CodeGen/AArch64/spillfill-sve.mir
index ef7d55a1c2395f..af062d33c5642f 100644
--- a/llvm/test/CodeGen/AArch64/spillfill-sve.mir
+++ b/llvm/test/CodeGen/AArch64/spillfill-sve.mir
@@ -213,8 +213,7 @@ body: |
; EXPAND-LABEL: name: spills_fills_stack_id_virtreg_pnr
; EXPAND: renamable $pn8 = WHILEGE_CXX_B
- ; EXPAND: $p0 = ORR_PPzPP $p8, $p8, killed $p8
- ; EXPAND: STR_PXI killed renamable $p0, $sp, 7
+ ; EXPAND: STR_PXI killed renamable $p8, $sp, 7
;
; EXPAND: renamable $p0 = LDR_PXI $sp, 7
; EXPAND: $p8 = ORR_PPzPP $p0, $p0, killed $p0, implicit-def $pn8
|
@@ -4807,8 +4808,9 @@ void AArch64InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, | |||
"Unexpected register store without SVE2p1 or SME2"); | |||
if (SrcReg.isVirtual()) { | |||
auto NewSrcReg = | |||
MF.getRegInfo().createVirtualRegister(&AArch64::PPRRegClass); | |||
BuildMI(MBB, MBBI, DebugLoc(), get(TargetOpcode::COPY), NewSrcReg) | |||
MF.getRegInfo().createVirtualRegister(&AArch64::PPR_p8to15RegClass); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Changing the source register to p8to15 gives it the chance to overlap with the whilege in the test. Without this change the source register will be allocated to p0, as that is the first register in the PPR register class allocation order, and we can't elide the copy. If anyone else can think of a better approach to get regalloc to make the convert and whilege share registers (e.g. p8 and pn8 respectively), please do mention it.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Hi; I just tried an experiment to constrain the register class to a common superclass instead of introducing a copy and eliding it later. This results in 'pn8' being stored instead of 'p8', but that may be allowable syntax given some discussions I had -- I haven't found much guidance on that. I'm not sure if this the the best approach, but it may be worth investigating.
If there are problems with it, we might want to revisit our current register class hierarchy at some point.
MF.getRegInfo().createVirtualRegister(&AArch64::PPR_p8to15RegClass); | |
if (SrcReg.isVirtual()) { | |
MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::PPRRegClass); | |
}... |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I like that approach, thanks Graham. It can also be used to get rid of the fill copy.
I'll implement this and adjust the tests accordingly.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
You can test this locally with the following command:git-clang-format --diff 341d674b6f1863d027ed30c44a14cd32599eb42d 03aabd26e1f2c0b5c9360eb1f48102b69937a10c -- llvm/lib/Target/AArch64/AArch64InstrInfo.cpp llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp View the diff from clang-format here.diff --git a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
index c5dc2240cd..ab0ecc85a6 100644
--- a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
+++ b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
@@ -6167,8 +6167,7 @@ bool AArch64AsmParser::showMatchError(SMLoc Loc, unsigned ErrCode,
"4 registers apart, and the first register in the range [z0, z3] or "
"[z16, z19] and with correct element type");
case Match_AddSubLSLImm3ShiftLarge:
- return Error(Loc,
- "expected 'lsl' with optional integer in range [0, 7]");
+ return Error(Loc, "expected 'lsl' with optional integer in range [0, 7]");
default:
llvm_unreachable("unexpected error code!");
}
|
@@ -996,6 +996,16 @@ let Namespace = "AArch64" in { | |||
def psub1 : SubRegIndex<16, -1>; | |||
} | |||
|
|||
class PPRorPNRClass : RegisterClass< | |||
"AArch64", | |||
[ nxv16i1, nxv8i1, nxv4i1, nxv2i1, nxv1i1 ], 16, |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
[ nxv16i1, nxv8i1, nxv4i1, nxv2i1, nxv1i1 ], 16, | |
[ nxv16i1, nxv8i1, nxv4i1, nxv2i1, nxv1i1, aarch64svcount ], 16, |
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr(s32) = COPY %0 | ||
; CHECK-NEXT: $w0 = COPY [[COPY]](s32) | ||
; CHECK-NEXT: RET_ReallyLR implicit $w0 | ||
INLINEASM &"mov ${0:w}, 7", 0 /* attdialect */, 1310730 /* regdef:GPR32common */, def %0:gpr32common | ||
INLINEASM &"mov ${0:w}, 7", 0 /* attdialect */, 1769482 /* regdef:GPR32common */, def %0:gpr32common |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
INLINEASM &"mov ${0:w}, 7", 0 /* attdialect */, 1769482 /* regdef:GPR32common */, def %0:gpr32common | |
INLINEASM &"mov ${0:w}, 7", 0 /* attdialect */, {{[0-9]+}} /* regdef:GPR32common */, def %0:gpr32common |
(same in other places)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We can only use the regex in a CHECK statement, not in the IR being tested.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ah I didn't spot the leading ;
!
In that case, I would suggest not using a regex for these numbers, because they must match the ones from the MIR.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Good point, I've changed the .mir tests to explicitly match the regclass numbers.
@@ -54,7 +54,7 @@ entry: | |||
define i32 @test_single_register_output() nounwind ssp { | |||
; CHECK-LABEL: name: test_single_register_output | |||
; CHECK: bb.1.entry: | |||
; CHECK-NEXT: INLINEASM &"mov ${0:w}, 7", 0 /* attdialect */, 1703946 /* regdef:GPR32common */, def %0 | |||
; CHECK-NEXT: INLINEASM &"mov ${0:w}, 7", 0 /* attdialect */, {{[0-9]+}} /* regdef:GPR32common */, def %0 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It would be nice if these changes (same for the other files) could be committed as an NFC change separate from this PR, and then have this PR rebased on top.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Sounds good to me 👍
def PPRorPNR : PPRorPNRClass; | ||
def PPRorPNRAsmOpAny : PPRAsmOperand<"PPRorPNRAny", "PPRorPNR", 0>; | ||
def PPRorPNRAny : PPRRegOp<"", PPRorPNRAsmOpAny, ElementSizeNone, PPRorPNR>; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
minor nit: strange formatting.
def PPRorPNR : PPRorPNRClass; | |
def PPRorPNRAsmOpAny : PPRAsmOperand<"PPRorPNRAny", "PPRorPNR", 0>; | |
def PPRorPNRAny : PPRRegOp<"", PPRorPNRAsmOpAny, ElementSizeNone, PPRorPNR>; | |
def PPRorPNR : PPRorPNRClass; | |
def PPRorPNRAsmOpAny : PPRAsmOperand<"PPRorPNRAny", "PPRorPNR", 0>; | |
def PPRorPNRAny : PPRRegOp<"", PPRorPNRAsmOpAny, ElementSizeNone, PPRorPNR>; |
let Size = 16; | ||
} | ||
def PPRorPNR : PPRorPNRClass; | ||
def PPRorPNRAsmOpAny : PPRAsmOperand<"PPRorPNRAny", "PPRorPNR", 0>; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You need to add a case statement for `Match_InvalidSVEPPRorPNRAnyReg to:
AArch64AsmParser::showMatchError
AArch64AsmParser::MatchAndEmitInstruction
@@ -6680,7 +6680,7 @@ multiclass sve_mem_z_spill<string asm> { | |||
} | |||
|
|||
class sve_mem_p_spill<string asm> | |||
: I<(outs), (ins PPRAny:$Pt, GPR64sp:$Rn, simm9:$imm9), | |||
: I<(outs), (ins PPRorPNRAny:$Pt, GPR64sp:$Rn, simm9:$imm9), |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This removes the need for having the following InstAliases in AArch64SVEInstrInfo.td:
// Aliases for existing SVE instructions for which predicate-as-counter are
// accepted as an operand to the instruction
def : InstAlias<"ldr $Pt, [$Rn, $imm9, mul vl]",
(LDR_PXI PNRasPPRAny:$Pt, GPR64sp:$Rn, simm9:$imm9), 0>;
def : InstAlias<"ldr $Pt, [$Rn]",
(LDR_PXI PNRasPPRAny:$Pt, GPR64sp:$Rn, 0), 0>;
def : InstAlias<"str $Pt, [$Rn, $imm9, mul vl]",
(STR_PXI PNRasPPRAny:$Pt, GPR64sp:$Rn, simm9:$imm9), 0>;
def : InstAlias<"str $Pt, [$Rn]",
(STR_PXI PNRasPPRAny:$Pt, GPR64sp:$Rn, 0), 0>;
Can you remove them?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done.
8623267
to
5d8fb1f
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM with nit addressed.
let Size = 16; | ||
} | ||
def PPRorPNR : PPRorPNRClass; | ||
def PPRorPNRAsmOpAny : PPRAsmOperand<"PPRorPNRAny", "PPRorPNR", 0>; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: still strange formatting with lots of spaces.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Oh right I missed those ones, thanks! I would have expected clang-format to address this.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done.
7dc20ab introduced an extra COPY when spilling a PNR register, which can't be elided as the input (PNR predicate) and output (PPR predicate) register classes differ. This patch emits a new ConvertPNRtoPPR pseudo instruction instead. When this is expanded, it gets erased if the PNR is a subregister of the PPR, since the conversion is implicit, otherwise it is lowered to an ORR.
f830825
to
9e59c45
Compare
I've force-pushed to trigger the builds again and get rid of the unrelated mlir failure. This should be ready for another look with the new assembly parser functions, thanks! |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks for the updates to this patch, this is starting to look good!
.addReg(SrcReg); | ||
SrcReg = NewSrcReg; | ||
} else | ||
if (SrcReg.isVirtual()) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Now that you've made the instructions accept both p0-p15 and pn0-pn15, we can remove all the code that tries to handle PNR registers differently from P registers in storeRegToStackSlot
and loadRegFromStackSlot
.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Good idea, done.
Reg = Reg - AArch64::PN0 + AArch64::P0; | ||
Inst.addOperand(MCOperand::createReg(Reg)); | ||
} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can addPNRasPPRRegOperands
be removed now? Or perhaps I should ask: is PNRasPPR
still required, or can those instructions that use it also use PPRorPNRRegOperand
?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
PPRasPNR
can be removed. Thanks for the idea.
assert(N == 1 && "Invalid number of operands!"); | ||
unsigned Reg = getReg(); | ||
// Normalise to PPR | ||
if (Reg >= AArch64::PN0) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit:
if (Reg >= AArch64::PN0) | |
if (Reg >= AArch64::PN0 && Reg <= AArch64::PN15) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done
@@ -741,6 +744,18 @@ static DecodeStatus DecodeMatrixTile(MCInst &Inst, unsigned RegNo, | |||
return Success; | |||
} | |||
|
|||
static DecodeStatus DecodePPRorPNRRegisterClass(MCInst &Inst, unsigned RegNo, |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Cleanup suggestion for a separate patch:
All these decoder classes looks rather identical, it would be nice to clean this up with something like this:
template <unsigned RegClassID, unsigned FirstReg, unsigned NumRegsInClass>
static DecodeStatus DecodeSimpleReg(MCInst &Inst, unsigned RegNo,
uint64_t Addr,
const MCDisassembler *Decoder) {
if (RegNo > (NumRegsInClass-1))
return Fail;
unsigned Register =
AArch64MCRegisterClasses[RegClassID].getRegister(RegNo);
Inst.addOperand(MCOperand::createReg(Register + FirstReg));
return Success;
}
And then remove the other functions and just specify e.g. DecodeSimpleReg<AArch64::PPRorPNRRegClassID, 0, 16>
in the .td file.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
That sounds good to me. We can do it in a separate patch.
else { | ||
bool IsPPR = AArch64::PPRRegClass.hasSubClassEq(RC); | ||
bool IsPNR = AArch64::PNRRegClass.hasSubClassEq(RC); | ||
if (IsPPR || IsPNR) { | ||
assert((!IsPPR || Subtarget.hasSVEorSME()) && | ||
"Unexpected register store without SVE store instructions"); | ||
assert((!IsPNR || Subtarget.hasSVE2p1() || Subtarget.hasSME2()) && | ||
"Unexpected register store without SVE2p1 or SME2"); | ||
Opc = AArch64::STR_PXI; | ||
StackID = TargetStackID::ScalableVector; | ||
if (SrcReg.isVirtual()) | ||
MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::PPRRegClass); | ||
else if (IsPNR) | ||
// Normalise to PPR | ||
SrcReg = (SrcReg - AArch64::PN0) + AArch64::P0; | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Now that the LDR and STR instructions accept a predicate-as-counter register, we can remove this trick to change the register class and simplify this code to:
else { | |
bool IsPPR = AArch64::PPRRegClass.hasSubClassEq(RC); | |
bool IsPNR = AArch64::PNRRegClass.hasSubClassEq(RC); | |
if (IsPPR || IsPNR) { | |
assert((!IsPPR || Subtarget.hasSVEorSME()) && | |
"Unexpected register store without SVE store instructions"); | |
assert((!IsPNR || Subtarget.hasSVE2p1() || Subtarget.hasSME2()) && | |
"Unexpected register store without SVE2p1 or SME2"); | |
Opc = AArch64::STR_PXI; | |
StackID = TargetStackID::ScalableVector; | |
if (SrcReg.isVirtual()) | |
MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::PPRRegClass); | |
else if (IsPNR) | |
// Normalise to PPR | |
SrcReg = (SrcReg - AArch64::PN0) + AArch64::P0; | |
} | |
else if (AArch64::PPRRegClass.hasSubClassEq(RC) || | |
AArch64::PNRRegClass.hasSubClassEq(RC)) { | |
assert(...); | |
Opc = AArch64::STR_PXI; | |
StackID = TargetStackID::ScalableVector; | |
} |
same for the other case in loadRegFromStackSlot
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You're right! Thanks. I've kept the IsPNR
and IsPPR
variables though so we can emit the right assertions.
|
||
if ((isSVEPredicateAsCounterReg<Class>() || | ||
isSVEPredicateVectorRegOfWidth<ElementWidth, Class>()) && | ||
(Reg.ElementWidth == ElementWidth)) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: no parenthesis needed in:
(Reg.ElementWidth == ElementWidth)) | |
Reg.ElementWidth == ElementWidth) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done
@@ -6714,7 +6750,7 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, | |||
case Match_InvalidSVEVectorListStrided4x16: | |||
case Match_InvalidSVEVectorListStrided4x32: | |||
case Match_InvalidSVEVectorListStrided4x64: | |||
case Match_InvalidSVEPNRasPPRPredicateBReg: | |||
case Match_InvalidSVEPPRorPNRBReg: |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: should this be together with case Match_InvalidSVEPPRorPNRAnyReg
?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If we keep it here we retain the same error messages as the older reg class. I can move it up but it will require updating quite a few tests.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM wit little nit addressed!
assert((!IsPPR || Subtarget.hasSVEorSME()) && | ||
"Unexpected register store without SVE store instructions"); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: this can be
assert((!IsPPR || Subtarget.hasSVEorSME()) && | |
"Unexpected register store without SVE store instructions"); | |
assert(Subtarget.hasSVEorSME() && | |
"Unexpected register store without SVE store instructions"); |
because both PPR and PNR require at least either SME or SVE. (same for the assert on line 4996)
In a previous PR llvm#81716, a new decoder function was added to llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp. During code review it was suggested that, as most of the decoder functions were very similar in structure, that they be refactored into a single, templated function. I have added the refactored function, removed the definitions of the replaced functions, and replaced the references to the replaced functions in AArch64Disassembler.cpp and llvm/lib/Target/AArch64/AArch64RegisterInfo.td. To reduce the number of duplicate references in AArch64RegisterInfo.td, I have also made a small change to llvm/utils/TableGen/DecoderEmitter.cpp.
…97412) In a previous PR #81716, a new decoder function was added to llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp. During code review it was suggested that, as most of the decoder functions were very similar in structure, that they be refactored into a single, templated function. I have added the refactored function, removed the definitions of the replaced functions, and replaced the references to the replaced functions in AArch64Disassembler.cpp and llvm/lib/Target/AArch64/AArch64RegisterInfo.td. To reduce the number of duplicate references in AArch64RegisterInfo.td, I have also made a small change to llvm/utils/TableGen/DecoderEmitter.cpp.
This removes a redundant 'COPY' instruction that llvm#81716 probably forgot to remove. This redundant COPY led to an issue because because code in LiveRangeSplitting expects that the instruction emitted by `loadRegFromStackSlot` is an instruction that accesses memory, which isn't the case for the COPY instruction.
This removes a redundant 'COPY' instruction that #81716 probably forgot to remove. This redundant COPY led to an issue because because code in LiveRangeSplitting expects that the instruction emitted by `loadRegFromStackSlot` is an instruction that accesses memory, which isn't the case for the COPY instruction.
This removes a redundant 'COPY' instruction that llvm#81716 probably forgot to remove. This redundant COPY led to an issue because because code in LiveRangeSplitting expects that the instruction emitted by `loadRegFromStackSlot` is an instruction that accesses memory, which isn't the case for the COPY instruction. (cherry picked from commit 91a3c6f)
This removes a redundant 'COPY' instruction that llvm#81716 probably forgot to remove. This redundant COPY led to an issue because because code in LiveRangeSplitting expects that the instruction emitted by `loadRegFromStackSlot` is an instruction that accesses memory, which isn't the case for the COPY instruction. (cherry picked from commit 91a3c6f)
7dc20ab introduced an extra COPY when spilling and filling a PNR register, which can't be elided as the input (PNR predicate) and output (PPR predicate) register classes differ. This patch constrains the register class given to the spill and fill so that the classes match and can be elided. The patch also adds a new register class that covers both PPR and PNR so that STR_PXI and LDR_PXI can take either of them.