-
Notifications
You must be signed in to change notification settings - Fork 13.5k
[WIP][AMDGPU][MC] Support 128b rsrc reg in mimg instructions #139121
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
[WIP][AMDGPU][MC] Support 128b rsrc reg in mimg instructions #139121
Conversation
@llvm/pr-subscribers-llvm-globalisel @llvm/pr-subscribers-mc Author: Jun Wang (jwanggit86) ChangesThe r128 field in MIMG instructions indicates that the resource register Patch is 306.12 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/139121.diff 29 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index f6407479288c4..60114682b64f5 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -1781,6 +1781,7 @@ class AMDGPUAsmParser : public MCTargetAsmParser {
bool validateMIMGD16(const MCInst &Inst);
bool validateMIMGDim(const MCInst &Inst, const OperandVector &Operands);
bool validateMIMGMSAA(const MCInst &Inst);
+ bool validateMIMGR128(const MCInst &Inst, const OperandVector &Operands);
bool validateOpSel(const MCInst &Inst);
bool validateTrue16OpSel(const MCInst &Inst);
bool validateNeg(const MCInst &Inst, AMDGPU::OpName OpName);
@@ -4016,6 +4017,64 @@ bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst,
return false;
}
+bool AMDGPUAsmParser::validateMIMGR128(const MCInst &Inst,
+ const OperandVector &Operands) {
+ const unsigned Opc = Inst.getOpcode();
+ const MCInstrDesc &Desc = MII.get(Opc);
+
+ if ((Desc.TSFlags & MIMGFlags) == 0)
+ return true;
+
+ // image_bvh_intersect_ray instructions only support 128b RSRC reg
+ if (AMDGPU::getMIMGBaseOpcode(Opc)->BVH)
+ return true;
+
+ AMDGPU::OpName RSrcOpName = (Desc.TSFlags & SIInstrFlags::MIMG)
+ ? AMDGPU::OpName::srsrc
+ : AMDGPU::OpName::rsrc;
+ int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, RSrcOpName);
+ assert(SrsrcIdx != -1);
+
+ auto RsrcReg = Inst.getOperand(SrsrcIdx).getReg();
+
+ unsigned SrsrcRegSize = 4;
+ if (getMRI()->getRegClass(AMDGPU::SReg_256_XNULLRegClassID).contains(RsrcReg))
+ SrsrcRegSize = 8;
+ else {
+ switch (RsrcReg.id()) {
+ case TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_vi:
+ case TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_vi:
+ case TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15_vi:
+ case TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_gfx9plus:
+ case TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_gfx9plus:
+ case TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15_gfx9plus:
+ SrsrcRegSize = 8;
+ break;
+ default:
+ break;
+ }
+ }
+
+ int R128Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::r128);
+ bool IsR128 =
+ (hasMIMG_R128() && R128Idx != -1 && Inst.getOperand(R128Idx).getImm());
+
+ if (SrsrcRegSize == 8 && IsR128) {
+ auto Loc = getImmLoc(AMDGPUOperand::ImmTyR128A16, Operands);
+ Error(Loc, "r128 not allowed with 256-bit RSRC reg");
+ return false;
+ } else if (SrsrcRegSize == 4 && !IsR128) {
+ auto Loc = getInstLoc(Operands);
+ if (hasMIMG_R128())
+ Error(Loc,
+ "the RSRC reg should be 256-bit, or the r128 flag is required");
+ else
+ Error(Loc, "operands are not valid for this GPU or mode");
+ return false;
+ }
+ return true;
+}
+
bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
const unsigned Opc = Inst.getOpcode();
@@ -5233,6 +5292,9 @@ bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
"invalid dim; must be MSAA type");
return false;
}
+ if (!validateMIMGR128(Inst, Operands))
+ return false;
+
if (!validateMIMGDataSize(Inst, IDLoc)) {
return false;
}
diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
index 3fbba17159375..91240b8704256 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -286,6 +286,7 @@ DECODE_OPERAND_SREG_7(SReg_128, OPW128)
DECODE_OPERAND_SREG_7(SReg_128_XNULL, OPW128)
DECODE_OPERAND_SREG_7(SReg_256, OPW256)
DECODE_OPERAND_SREG_7(SReg_256_XNULL, OPW256)
+DECODE_OPERAND_SREG_7(SReg_RSRC, OPW256)
DECODE_OPERAND_SREG_7(SReg_512, OPW512)
DECODE_OPERAND_SREG_8(SReg_64, OPW64)
diff --git a/llvm/lib/Target/AMDGPU/MIMGInstructions.td b/llvm/lib/Target/AMDGPU/MIMGInstructions.td
index 8d94d73bc1aab..97f4820272ef0 100644
--- a/llvm/lib/Target/AMDGPU/MIMGInstructions.td
+++ b/llvm/lib/Target/AMDGPU/MIMGInstructions.td
@@ -424,7 +424,7 @@ class MIMG_NoSampler_Helper <mimgopc op, string asm,
RegisterClass addr_rc,
string dns="">
: MIMG_gfx6789 <op.GFX10M, (outs dst_rc:$vdata), dns> {
- let InOperandList = !con((ins addr_rc:$vaddr, SReg_256_XNULL:$srsrc,
+ let InOperandList = !con((ins addr_rc:$vaddr, SReg_RSRC:$srsrc,
DMask:$dmask, UNorm:$unorm, CPol:$cpol,
R128A16:$r128, TFE:$tfe, LWE:$lwe, DA:$da),
!if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
@@ -449,7 +449,7 @@ class MIMG_NoSampler_gfx10<mimgopc op, string opcode,
RegisterClass DataRC, RegisterClass AddrRC,
string dns="">
: MIMG_gfx10<op.GFX10M, (outs DataRC:$vdata), dns> {
- let InOperandList = !con((ins AddrRC:$vaddr0, SReg_256_XNULL:$srsrc, DMask:$dmask,
+ let InOperandList = !con((ins AddrRC:$vaddr0, SReg_RSRC:$srsrc, DMask:$dmask,
Dim:$dim, UNorm:$unorm, CPol:$cpol,
R128A16:$r128, A16:$a16, TFE:$tfe, LWE:$lwe),
!if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
@@ -462,7 +462,7 @@ class MIMG_NoSampler_nsa_gfx10<mimgopc op, string opcode,
string dns="">
: MIMG_nsa_gfx10<op.GFX10M, (outs DataRC:$vdata), num_addrs, dns> {
let InOperandList = !con(AddrIns,
- (ins SReg_256_XNULL:$srsrc, DMask:$dmask,
+ (ins SReg_RSRC:$srsrc, DMask:$dmask,
Dim:$dim, UNorm:$unorm, CPol:$cpol,
R128A16:$r128, A16:$a16, TFE:$tfe, LWE:$lwe),
!if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
@@ -474,7 +474,7 @@ class MIMG_NoSampler_gfx11<mimgopc op, string opcode,
RegisterClass DataRC, RegisterClass AddrRC,
string dns="">
: MIMG_gfx11<op.GFX11, (outs DataRC:$vdata), dns> {
- let InOperandList = !con((ins AddrRC:$vaddr0, SReg_256_XNULL:$srsrc, DMask:$dmask,
+ let InOperandList = !con((ins AddrRC:$vaddr0, SReg_RSRC:$srsrc, DMask:$dmask,
Dim:$dim, UNorm:$unorm, CPol:$cpol,
R128A16:$r128, A16:$a16, TFE:$tfe, LWE:$lwe),
!if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
@@ -487,7 +487,7 @@ class MIMG_NoSampler_nsa_gfx11<mimgopc op, string opcode,
string dns="">
: MIMG_nsa_gfx11<op.GFX11, (outs DataRC:$vdata), num_addrs, dns> {
let InOperandList = !con(AddrIns,
- (ins SReg_256_XNULL:$srsrc, DMask:$dmask,
+ (ins SReg_RSRC:$srsrc, DMask:$dmask,
Dim:$dim, UNorm:$unorm, CPol:$cpol,
R128A16:$r128, A16:$a16, TFE:$tfe, LWE:$lwe),
!if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
@@ -500,7 +500,7 @@ class VIMAGE_NoSampler_gfx12<mimgopc op, string opcode,
string dns="">
: VIMAGE_gfx12<op.GFX12, (outs DataRC:$vdata), num_addrs, dns> {
let InOperandList = !con(AddrIns,
- (ins SReg_256_XNULL:$rsrc, DMask:$dmask, Dim:$dim,
+ (ins SReg_RSRC:$rsrc, DMask:$dmask, Dim:$dim,
CPol:$cpol, R128A16:$r128, A16:$a16, TFE:$tfe),
!if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
let AsmString = opcode#" $vdata, "#AddrAsm#", $rsrc$dmask$dim$cpol$r128$a16$tfe"
@@ -512,7 +512,7 @@ class VSAMPLE_Sampler_gfx12<mimgopc op, string opcode, RegisterClass DataRC,
string dns="">
: VSAMPLE_gfx12<op.GFX12, (outs DataRC:$vdata), num_addrs, dns, Addr3RC> {
let InOperandList = !con(AddrIns,
- (ins SReg_256_XNULL:$rsrc),
+ (ins SReg_RSRC:$rsrc),
!if(BaseOpcode.Sampler, (ins SReg_128_XNULL:$samp), (ins)),
(ins DMask:$dmask, Dim:$dim, UNorm:$unorm,
CPol:$cpol, R128A16:$r128, A16:$a16, TFE:$tfe,
@@ -529,7 +529,7 @@ class VSAMPLE_Sampler_nortn_gfx12<mimgopc op, string opcode,
string dns="">
: VSAMPLE_gfx12<op.GFX12, (outs), num_addrs, dns, Addr3RC> {
let InOperandList = !con(AddrIns,
- (ins SReg_256_XNULL:$rsrc),
+ (ins SReg_RSRC:$rsrc),
!if(BaseOpcode.Sampler, (ins SReg_128_XNULL:$samp), (ins)),
(ins DMask:$dmask, Dim:$dim, UNorm:$unorm,
CPol:$cpol, R128A16:$r128, A16:$a16, TFE:$tfe,
@@ -681,7 +681,7 @@ class MIMG_Store_Helper <mimgopc op, string asm,
RegisterClass addr_rc,
string dns = "">
: MIMG_gfx6789<op.GFX10M, (outs), dns> {
- let InOperandList = !con((ins data_rc:$vdata, addr_rc:$vaddr, SReg_256_XNULL:$srsrc,
+ let InOperandList = !con((ins data_rc:$vdata, addr_rc:$vaddr, SReg_RSRC:$srsrc,
DMask:$dmask, UNorm:$unorm, CPol:$cpol,
R128A16:$r128, TFE:$tfe, LWE:$lwe, DA:$da),
!if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
@@ -707,7 +707,7 @@ class MIMG_Store_gfx10<mimgopc op, string opcode,
RegisterClass DataRC, RegisterClass AddrRC,
string dns="">
: MIMG_gfx10<op.GFX10M, (outs), dns> {
- let InOperandList = !con((ins DataRC:$vdata, AddrRC:$vaddr0, SReg_256_XNULL:$srsrc,
+ let InOperandList = !con((ins DataRC:$vdata, AddrRC:$vaddr0, SReg_RSRC:$srsrc,
DMask:$dmask, Dim:$dim, UNorm:$unorm, CPol:$cpol,
R128A16:$r128, A16:$a16, TFE:$tfe, LWE:$lwe),
!if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
@@ -721,7 +721,7 @@ class MIMG_Store_nsa_gfx10<mimgopc op, string opcode,
: MIMG_nsa_gfx10<op.GFX10M, (outs), num_addrs, dns> {
let InOperandList = !con((ins DataRC:$vdata),
AddrIns,
- (ins SReg_256_XNULL:$srsrc, DMask:$dmask,
+ (ins SReg_RSRC:$srsrc, DMask:$dmask,
Dim:$dim, UNorm:$unorm, CPol:$cpol,
R128A16:$r128, A16:$a16, TFE:$tfe, LWE:$lwe),
!if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
@@ -733,7 +733,7 @@ class MIMG_Store_gfx11<mimgopc op, string opcode,
RegisterClass DataRC, RegisterClass AddrRC,
string dns="">
: MIMG_gfx11<op.GFX11, (outs), dns> {
- let InOperandList = !con((ins DataRC:$vdata, AddrRC:$vaddr0, SReg_256_XNULL:$srsrc,
+ let InOperandList = !con((ins DataRC:$vdata, AddrRC:$vaddr0, SReg_RSRC:$srsrc,
DMask:$dmask, Dim:$dim, UNorm:$unorm, CPol:$cpol,
R128A16:$r128, A16:$a16, TFE:$tfe, LWE:$lwe),
!if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
@@ -747,7 +747,7 @@ class MIMG_Store_nsa_gfx11<mimgopc op, string opcode,
: MIMG_nsa_gfx11<op.GFX11, (outs), num_addrs, dns> {
let InOperandList = !con((ins DataRC:$vdata),
AddrIns,
- (ins SReg_256_XNULL:$srsrc, DMask:$dmask,
+ (ins SReg_RSRC:$srsrc, DMask:$dmask,
Dim:$dim, UNorm:$unorm, CPol:$cpol,
R128A16:$r128, A16:$a16, TFE:$tfe, LWE:$lwe),
!if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
@@ -761,7 +761,7 @@ class VIMAGE_Store_gfx12<mimgopc op, string opcode,
: VIMAGE_gfx12<op.GFX12, (outs), num_addrs, dns> {
let InOperandList = !con((ins DataRC:$vdata),
AddrIns,
- (ins SReg_256_XNULL:$rsrc, DMask:$dmask, Dim:$dim,
+ (ins SReg_RSRC:$rsrc, DMask:$dmask, Dim:$dim,
CPol:$cpol, R128A16:$r128, A16:$a16, TFE:$tfe),
!if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
let AsmString = opcode#" $vdata, "#AddrAsm#", $rsrc$dmask$dim$cpol$r128$a16$tfe"
@@ -877,7 +877,7 @@ class MIMG_Atomic_gfx6789_base <bits<8> op, string asm, RegisterClass data_rc,
: MIMG_gfx6789 <op, (outs data_rc:$vdst), dns> {
let Constraints = "$vdst = $vdata";
- let InOperandList = (ins data_rc:$vdata, addr_rc:$vaddr, SReg_256_XNULL:$srsrc,
+ let InOperandList = (ins data_rc:$vdata, addr_rc:$vaddr, SReg_RSRC:$srsrc,
DMask:$dmask, UNorm:$unorm, CPol:$cpol,
R128A16:$r128, TFE:$tfe, LWE:$lwe, DA:$da);
let AsmString = asm#" $vdst, $vaddr, $srsrc$dmask$unorm$cpol$r128$tfe$lwe$da";
@@ -923,7 +923,7 @@ class MIMG_Atomic_gfx10<mimgopc op, string opcode,
!if(enableDisasm, "GFX10", "")> {
let Constraints = "$vdst = $vdata";
- let InOperandList = (ins DataRC:$vdata, AddrRC:$vaddr0, SReg_256_XNULL:$srsrc,
+ let InOperandList = (ins DataRC:$vdata, AddrRC:$vaddr0, SReg_RSRC:$srsrc,
DMask:$dmask, Dim:$dim, UNorm:$unorm, CPol:$cpol,
R128A16:$r128, A16:$a16, TFE:$tfe, LWE:$lwe);
let AsmString = opcode#" $vdst, $vaddr0, $srsrc$dmask$dim$unorm$cpol$r128$a16$tfe$lwe";
@@ -938,7 +938,7 @@ class MIMG_Atomic_nsa_gfx10<mimgopc op, string opcode,
let InOperandList = !con((ins DataRC:$vdata),
AddrIns,
- (ins SReg_256_XNULL:$srsrc, DMask:$dmask,
+ (ins SReg_RSRC:$srsrc, DMask:$dmask,
Dim:$dim, UNorm:$unorm, CPol:$cpol,
R128A16:$r128, A16:$a16, TFE:$tfe, LWE:$lwe));
let AsmString = opcode#" $vdata, "#AddrAsm#", $srsrc$dmask$dim$unorm$cpol$r128$a16$tfe$lwe";
@@ -951,7 +951,7 @@ class MIMG_Atomic_gfx11<mimgopc op, string opcode,
!if(enableDisasm, "GFX11", "")> {
let Constraints = "$vdst = $vdata";
- let InOperandList = (ins DataRC:$vdata, AddrRC:$vaddr0, SReg_256_XNULL:$srsrc,
+ let InOperandList = (ins DataRC:$vdata, AddrRC:$vaddr0, SReg_RSRC:$srsrc,
DMask:$dmask, Dim:$dim, UNorm:$unorm, CPol:$cpol,
R128A16:$r128, A16:$a16, TFE:$tfe, LWE:$lwe);
let AsmString = opcode#" $vdst, $vaddr0, $srsrc$dmask$dim$unorm$cpol$r128$a16$tfe$lwe";
@@ -966,7 +966,7 @@ class MIMG_Atomic_nsa_gfx11<mimgopc op, string opcode,
let InOperandList = !con((ins DataRC:$vdata),
AddrIns,
- (ins SReg_256_XNULL:$srsrc, DMask:$dmask,
+ (ins SReg_RSRC:$srsrc, DMask:$dmask,
Dim:$dim, UNorm:$unorm, CPol:$cpol,
R128A16:$r128, A16:$a16, TFE:$tfe, LWE:$lwe));
let AsmString = opcode#" $vdata, "#AddrAsm#", $srsrc$dmask$dim$unorm$cpol$r128$a16$tfe$lwe";
@@ -980,7 +980,7 @@ class VIMAGE_Atomic_gfx12<mimgopc op, string opcode, RegisterClass DataRC,
let InOperandList = !con((ins DataRC:$vdata),
AddrIns,
- (ins SReg_256_XNULL:$rsrc, DMask:$dmask, Dim:$dim,
+ (ins SReg_RSRC:$rsrc, DMask:$dmask, Dim:$dim,
CPol:$cpol, R128A16:$r128, A16:$a16, TFE:$tfe));
let AsmString = !if(!empty(renamed), opcode, renamed)#" $vdata, "#AddrAsm#
", $rsrc$dmask$dim$cpol$r128$a16$tfe";
@@ -1130,7 +1130,7 @@ multiclass MIMG_Atomic_Renamed <mimgopc op, string asm, string renamed,
class MIMG_Sampler_Helper <mimgopc op, string asm, RegisterClass dst_rc,
RegisterClass src_rc, string dns="">
: MIMG_gfx6789 <op.VI, (outs dst_rc:$vdata), dns> {
- let InOperandList = !con((ins src_rc:$vaddr, SReg_256_XNULL:$srsrc, SReg_128_XNULL:$ssamp,
+ let InOperandList = !con((ins src_rc:$vaddr, SReg_RSRC:$srsrc, SReg_128_XNULL:$ssamp,
DMask:$dmask, UNorm:$unorm, CPol:$cpol,
R128A16:$r128, TFE:$tfe, LWE:$lwe, DA:$da),
!if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
@@ -1151,7 +1151,7 @@ class MIMG_Sampler_gfx90a<mimgopc op, string asm, RegisterClass dst_rc,
class MIMG_Sampler_OpList_gfx10p<dag OpPrefix, bit HasD16> {
dag ret = !con(OpPrefix,
- (ins SReg_256_XNULL:$srsrc, SReg_128_XNULL:$ssamp,
+ (ins SReg_RSRC:$srsrc, SReg_128_XNULL:$ssamp,
DMask:$dmask, Dim:$dim, UNorm:$unorm, CPol:$cpol,
R128A16:$r128, A16:$a16, TFE:$tfe, LWE:$lwe),
!if(HasD16, (ins D16:$d16), (ins)));
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
index 35c7b393a8ca4..1b1130d351ea7 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
@@ -905,6 +905,15 @@ defm "" : SRegClass<16, [v16i32, v16f32, v8i64, v8f64, v32i16, v32f16, v32bf16],
defm "" : SRegClass<32, [v32i32, v32f32, v16i64, v16f64], SGPR_1024Regs>;
}
+def SReg_RSRC : SIRegisterClass<"AMDGPU", [v8i32], 32,
+ (add SReg_256_XNULL, SReg_128_XNULL)> {
+ let Size = 8;
+ let CopyCost = -1;
+ let isAllocatable = 0;
+ let HasSGPR = 1;
+ let BaseClassOrder = 10000;
+}
+
def VRegOrLds_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, bf16, v2i16, v2f16, v2bf16], 32,
(add VGPR_32, LDS_DIRECT_CLASS)> {
let isAllocatable = 0;
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-inline-asm.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-inline-asm.ll
index 96c9f40e317ea..cf2fd88405cec 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-inline-asm.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-inline-asm.ll
@@ -66,7 +66,7 @@ define amdgpu_kernel void @asm_simple_agpr_clobber() {
define i32 @asm_vgpr_early_clobber() {
; CHECK-LABEL: name: asm_vgpr_early_clobber
; CHECK: bb.1 (%ir-block.0):
- ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 7; v_mov_b32 $1, 7", 1 /* sideeffect attdialect */, 2228235 /* regdef-ec:VGPR_32 */, def early-clobber %8, 2228235 /* regdef-ec:VGPR_32 */, def early-clobber %9, !1
+ ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 7; v_mov_b32 $1, 7", 1 /* sideeffect attdialect */, 3211275 /* regdef-ec:VGPR_32 */, def early-clobber %8, 3211275 /* regdef-ec:VGPR_32 */, def early-clobber %9, !1
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %8
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %9
; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY1]]
@@ -94,7 +94,7 @@ entry:
define i32 @test_single_vgpr_output() nounwind {
; CHECK-LABEL: name: test_single_vgpr_output
; CHECK: bb.1.entry:
- ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 7", 0 /* attdialect */, 2228234 /* regdef:VGPR_32 */, def %8
+ ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 7", 0 /* attdialect */, 3211274 /* regdef:VGPR_32 */, def %8
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %8
; CHECK-NEXT: $vgpr0 = COPY [[COPY]](s32)
; CHECK-NEXT: SI_RETURN implicit $vgpr0
@@ -106,7 +106,7 @@ entry:
define i32 @test_single_sgpr_output_s32() nounwind {
; CHECK-LABEL: name: test_single_sgpr_output_s32
; CHECK: bb.1.entry:
- ; CHECK-NEXT: INLINEASM &"s_mov_b32 $0, 7", 0 /* attdialect */, 2359306 /* regdef:SReg_32 */, def %8
+ ; CHECK-NEXT: INLINEASM &"s_mov_b32 $0, 7", 0 /* attdialect */, 3342346 /* regdef:SReg_32 */, def %8
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %8
; CHECK-NEXT: $vgpr0 = COPY [[COPY]](s32)
; CHECK-NEXT: SI_RETURN implicit $vgpr0
@@ -119,7 +119,7 @@ entry:
define float @test_multiple_register_outputs_same() #0 {
; CHECK-LABEL: name: test_multiple_register_outputs_same
; CHECK: bb.1 (%ir-block.0):
- ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 0; v_mov_b32 $1, 1", 0 /* attdialect */, 2228234 /* regdef:VGPR_32 */, def %8, 2228234...
[truncated]
|
@llvm/pr-subscribers-backend-amdgpu Author: Jun Wang (jwanggit86) ChangesThe r128 field in MIMG instructions indicates that the resource register Patch is 306.12 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/139121.diff 29 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index f6407479288c4..60114682b64f5 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -1781,6 +1781,7 @@ class AMDGPUAsmParser : public MCTargetAsmParser {
bool validateMIMGD16(const MCInst &Inst);
bool validateMIMGDim(const MCInst &Inst, const OperandVector &Operands);
bool validateMIMGMSAA(const MCInst &Inst);
+ bool validateMIMGR128(const MCInst &Inst, const OperandVector &Operands);
bool validateOpSel(const MCInst &Inst);
bool validateTrue16OpSel(const MCInst &Inst);
bool validateNeg(const MCInst &Inst, AMDGPU::OpName OpName);
@@ -4016,6 +4017,64 @@ bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst,
return false;
}
+bool AMDGPUAsmParser::validateMIMGR128(const MCInst &Inst,
+ const OperandVector &Operands) {
+ const unsigned Opc = Inst.getOpcode();
+ const MCInstrDesc &Desc = MII.get(Opc);
+
+ if ((Desc.TSFlags & MIMGFlags) == 0)
+ return true;
+
+ // image_bvh_intersect_ray instructions only support 128b RSRC reg
+ if (AMDGPU::getMIMGBaseOpcode(Opc)->BVH)
+ return true;
+
+ AMDGPU::OpName RSrcOpName = (Desc.TSFlags & SIInstrFlags::MIMG)
+ ? AMDGPU::OpName::srsrc
+ : AMDGPU::OpName::rsrc;
+ int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, RSrcOpName);
+ assert(SrsrcIdx != -1);
+
+ auto RsrcReg = Inst.getOperand(SrsrcIdx).getReg();
+
+ unsigned SrsrcRegSize = 4;
+ if (getMRI()->getRegClass(AMDGPU::SReg_256_XNULLRegClassID).contains(RsrcReg))
+ SrsrcRegSize = 8;
+ else {
+ switch (RsrcReg.id()) {
+ case TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_vi:
+ case TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_vi:
+ case TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15_vi:
+ case TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_gfx9plus:
+ case TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_gfx9plus:
+ case TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15_gfx9plus:
+ SrsrcRegSize = 8;
+ break;
+ default:
+ break;
+ }
+ }
+
+ int R128Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::r128);
+ bool IsR128 =
+ (hasMIMG_R128() && R128Idx != -1 && Inst.getOperand(R128Idx).getImm());
+
+ if (SrsrcRegSize == 8 && IsR128) {
+ auto Loc = getImmLoc(AMDGPUOperand::ImmTyR128A16, Operands);
+ Error(Loc, "r128 not allowed with 256-bit RSRC reg");
+ return false;
+ } else if (SrsrcRegSize == 4 && !IsR128) {
+ auto Loc = getInstLoc(Operands);
+ if (hasMIMG_R128())
+ Error(Loc,
+ "the RSRC reg should be 256-bit, or the r128 flag is required");
+ else
+ Error(Loc, "operands are not valid for this GPU or mode");
+ return false;
+ }
+ return true;
+}
+
bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
const unsigned Opc = Inst.getOpcode();
@@ -5233,6 +5292,9 @@ bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
"invalid dim; must be MSAA type");
return false;
}
+ if (!validateMIMGR128(Inst, Operands))
+ return false;
+
if (!validateMIMGDataSize(Inst, IDLoc)) {
return false;
}
diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
index 3fbba17159375..91240b8704256 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -286,6 +286,7 @@ DECODE_OPERAND_SREG_7(SReg_128, OPW128)
DECODE_OPERAND_SREG_7(SReg_128_XNULL, OPW128)
DECODE_OPERAND_SREG_7(SReg_256, OPW256)
DECODE_OPERAND_SREG_7(SReg_256_XNULL, OPW256)
+DECODE_OPERAND_SREG_7(SReg_RSRC, OPW256)
DECODE_OPERAND_SREG_7(SReg_512, OPW512)
DECODE_OPERAND_SREG_8(SReg_64, OPW64)
diff --git a/llvm/lib/Target/AMDGPU/MIMGInstructions.td b/llvm/lib/Target/AMDGPU/MIMGInstructions.td
index 8d94d73bc1aab..97f4820272ef0 100644
--- a/llvm/lib/Target/AMDGPU/MIMGInstructions.td
+++ b/llvm/lib/Target/AMDGPU/MIMGInstructions.td
@@ -424,7 +424,7 @@ class MIMG_NoSampler_Helper <mimgopc op, string asm,
RegisterClass addr_rc,
string dns="">
: MIMG_gfx6789 <op.GFX10M, (outs dst_rc:$vdata), dns> {
- let InOperandList = !con((ins addr_rc:$vaddr, SReg_256_XNULL:$srsrc,
+ let InOperandList = !con((ins addr_rc:$vaddr, SReg_RSRC:$srsrc,
DMask:$dmask, UNorm:$unorm, CPol:$cpol,
R128A16:$r128, TFE:$tfe, LWE:$lwe, DA:$da),
!if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
@@ -449,7 +449,7 @@ class MIMG_NoSampler_gfx10<mimgopc op, string opcode,
RegisterClass DataRC, RegisterClass AddrRC,
string dns="">
: MIMG_gfx10<op.GFX10M, (outs DataRC:$vdata), dns> {
- let InOperandList = !con((ins AddrRC:$vaddr0, SReg_256_XNULL:$srsrc, DMask:$dmask,
+ let InOperandList = !con((ins AddrRC:$vaddr0, SReg_RSRC:$srsrc, DMask:$dmask,
Dim:$dim, UNorm:$unorm, CPol:$cpol,
R128A16:$r128, A16:$a16, TFE:$tfe, LWE:$lwe),
!if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
@@ -462,7 +462,7 @@ class MIMG_NoSampler_nsa_gfx10<mimgopc op, string opcode,
string dns="">
: MIMG_nsa_gfx10<op.GFX10M, (outs DataRC:$vdata), num_addrs, dns> {
let InOperandList = !con(AddrIns,
- (ins SReg_256_XNULL:$srsrc, DMask:$dmask,
+ (ins SReg_RSRC:$srsrc, DMask:$dmask,
Dim:$dim, UNorm:$unorm, CPol:$cpol,
R128A16:$r128, A16:$a16, TFE:$tfe, LWE:$lwe),
!if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
@@ -474,7 +474,7 @@ class MIMG_NoSampler_gfx11<mimgopc op, string opcode,
RegisterClass DataRC, RegisterClass AddrRC,
string dns="">
: MIMG_gfx11<op.GFX11, (outs DataRC:$vdata), dns> {
- let InOperandList = !con((ins AddrRC:$vaddr0, SReg_256_XNULL:$srsrc, DMask:$dmask,
+ let InOperandList = !con((ins AddrRC:$vaddr0, SReg_RSRC:$srsrc, DMask:$dmask,
Dim:$dim, UNorm:$unorm, CPol:$cpol,
R128A16:$r128, A16:$a16, TFE:$tfe, LWE:$lwe),
!if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
@@ -487,7 +487,7 @@ class MIMG_NoSampler_nsa_gfx11<mimgopc op, string opcode,
string dns="">
: MIMG_nsa_gfx11<op.GFX11, (outs DataRC:$vdata), num_addrs, dns> {
let InOperandList = !con(AddrIns,
- (ins SReg_256_XNULL:$srsrc, DMask:$dmask,
+ (ins SReg_RSRC:$srsrc, DMask:$dmask,
Dim:$dim, UNorm:$unorm, CPol:$cpol,
R128A16:$r128, A16:$a16, TFE:$tfe, LWE:$lwe),
!if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
@@ -500,7 +500,7 @@ class VIMAGE_NoSampler_gfx12<mimgopc op, string opcode,
string dns="">
: VIMAGE_gfx12<op.GFX12, (outs DataRC:$vdata), num_addrs, dns> {
let InOperandList = !con(AddrIns,
- (ins SReg_256_XNULL:$rsrc, DMask:$dmask, Dim:$dim,
+ (ins SReg_RSRC:$rsrc, DMask:$dmask, Dim:$dim,
CPol:$cpol, R128A16:$r128, A16:$a16, TFE:$tfe),
!if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
let AsmString = opcode#" $vdata, "#AddrAsm#", $rsrc$dmask$dim$cpol$r128$a16$tfe"
@@ -512,7 +512,7 @@ class VSAMPLE_Sampler_gfx12<mimgopc op, string opcode, RegisterClass DataRC,
string dns="">
: VSAMPLE_gfx12<op.GFX12, (outs DataRC:$vdata), num_addrs, dns, Addr3RC> {
let InOperandList = !con(AddrIns,
- (ins SReg_256_XNULL:$rsrc),
+ (ins SReg_RSRC:$rsrc),
!if(BaseOpcode.Sampler, (ins SReg_128_XNULL:$samp), (ins)),
(ins DMask:$dmask, Dim:$dim, UNorm:$unorm,
CPol:$cpol, R128A16:$r128, A16:$a16, TFE:$tfe,
@@ -529,7 +529,7 @@ class VSAMPLE_Sampler_nortn_gfx12<mimgopc op, string opcode,
string dns="">
: VSAMPLE_gfx12<op.GFX12, (outs), num_addrs, dns, Addr3RC> {
let InOperandList = !con(AddrIns,
- (ins SReg_256_XNULL:$rsrc),
+ (ins SReg_RSRC:$rsrc),
!if(BaseOpcode.Sampler, (ins SReg_128_XNULL:$samp), (ins)),
(ins DMask:$dmask, Dim:$dim, UNorm:$unorm,
CPol:$cpol, R128A16:$r128, A16:$a16, TFE:$tfe,
@@ -681,7 +681,7 @@ class MIMG_Store_Helper <mimgopc op, string asm,
RegisterClass addr_rc,
string dns = "">
: MIMG_gfx6789<op.GFX10M, (outs), dns> {
- let InOperandList = !con((ins data_rc:$vdata, addr_rc:$vaddr, SReg_256_XNULL:$srsrc,
+ let InOperandList = !con((ins data_rc:$vdata, addr_rc:$vaddr, SReg_RSRC:$srsrc,
DMask:$dmask, UNorm:$unorm, CPol:$cpol,
R128A16:$r128, TFE:$tfe, LWE:$lwe, DA:$da),
!if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
@@ -707,7 +707,7 @@ class MIMG_Store_gfx10<mimgopc op, string opcode,
RegisterClass DataRC, RegisterClass AddrRC,
string dns="">
: MIMG_gfx10<op.GFX10M, (outs), dns> {
- let InOperandList = !con((ins DataRC:$vdata, AddrRC:$vaddr0, SReg_256_XNULL:$srsrc,
+ let InOperandList = !con((ins DataRC:$vdata, AddrRC:$vaddr0, SReg_RSRC:$srsrc,
DMask:$dmask, Dim:$dim, UNorm:$unorm, CPol:$cpol,
R128A16:$r128, A16:$a16, TFE:$tfe, LWE:$lwe),
!if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
@@ -721,7 +721,7 @@ class MIMG_Store_nsa_gfx10<mimgopc op, string opcode,
: MIMG_nsa_gfx10<op.GFX10M, (outs), num_addrs, dns> {
let InOperandList = !con((ins DataRC:$vdata),
AddrIns,
- (ins SReg_256_XNULL:$srsrc, DMask:$dmask,
+ (ins SReg_RSRC:$srsrc, DMask:$dmask,
Dim:$dim, UNorm:$unorm, CPol:$cpol,
R128A16:$r128, A16:$a16, TFE:$tfe, LWE:$lwe),
!if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
@@ -733,7 +733,7 @@ class MIMG_Store_gfx11<mimgopc op, string opcode,
RegisterClass DataRC, RegisterClass AddrRC,
string dns="">
: MIMG_gfx11<op.GFX11, (outs), dns> {
- let InOperandList = !con((ins DataRC:$vdata, AddrRC:$vaddr0, SReg_256_XNULL:$srsrc,
+ let InOperandList = !con((ins DataRC:$vdata, AddrRC:$vaddr0, SReg_RSRC:$srsrc,
DMask:$dmask, Dim:$dim, UNorm:$unorm, CPol:$cpol,
R128A16:$r128, A16:$a16, TFE:$tfe, LWE:$lwe),
!if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
@@ -747,7 +747,7 @@ class MIMG_Store_nsa_gfx11<mimgopc op, string opcode,
: MIMG_nsa_gfx11<op.GFX11, (outs), num_addrs, dns> {
let InOperandList = !con((ins DataRC:$vdata),
AddrIns,
- (ins SReg_256_XNULL:$srsrc, DMask:$dmask,
+ (ins SReg_RSRC:$srsrc, DMask:$dmask,
Dim:$dim, UNorm:$unorm, CPol:$cpol,
R128A16:$r128, A16:$a16, TFE:$tfe, LWE:$lwe),
!if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
@@ -761,7 +761,7 @@ class VIMAGE_Store_gfx12<mimgopc op, string opcode,
: VIMAGE_gfx12<op.GFX12, (outs), num_addrs, dns> {
let InOperandList = !con((ins DataRC:$vdata),
AddrIns,
- (ins SReg_256_XNULL:$rsrc, DMask:$dmask, Dim:$dim,
+ (ins SReg_RSRC:$rsrc, DMask:$dmask, Dim:$dim,
CPol:$cpol, R128A16:$r128, A16:$a16, TFE:$tfe),
!if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
let AsmString = opcode#" $vdata, "#AddrAsm#", $rsrc$dmask$dim$cpol$r128$a16$tfe"
@@ -877,7 +877,7 @@ class MIMG_Atomic_gfx6789_base <bits<8> op, string asm, RegisterClass data_rc,
: MIMG_gfx6789 <op, (outs data_rc:$vdst), dns> {
let Constraints = "$vdst = $vdata";
- let InOperandList = (ins data_rc:$vdata, addr_rc:$vaddr, SReg_256_XNULL:$srsrc,
+ let InOperandList = (ins data_rc:$vdata, addr_rc:$vaddr, SReg_RSRC:$srsrc,
DMask:$dmask, UNorm:$unorm, CPol:$cpol,
R128A16:$r128, TFE:$tfe, LWE:$lwe, DA:$da);
let AsmString = asm#" $vdst, $vaddr, $srsrc$dmask$unorm$cpol$r128$tfe$lwe$da";
@@ -923,7 +923,7 @@ class MIMG_Atomic_gfx10<mimgopc op, string opcode,
!if(enableDisasm, "GFX10", "")> {
let Constraints = "$vdst = $vdata";
- let InOperandList = (ins DataRC:$vdata, AddrRC:$vaddr0, SReg_256_XNULL:$srsrc,
+ let InOperandList = (ins DataRC:$vdata, AddrRC:$vaddr0, SReg_RSRC:$srsrc,
DMask:$dmask, Dim:$dim, UNorm:$unorm, CPol:$cpol,
R128A16:$r128, A16:$a16, TFE:$tfe, LWE:$lwe);
let AsmString = opcode#" $vdst, $vaddr0, $srsrc$dmask$dim$unorm$cpol$r128$a16$tfe$lwe";
@@ -938,7 +938,7 @@ class MIMG_Atomic_nsa_gfx10<mimgopc op, string opcode,
let InOperandList = !con((ins DataRC:$vdata),
AddrIns,
- (ins SReg_256_XNULL:$srsrc, DMask:$dmask,
+ (ins SReg_RSRC:$srsrc, DMask:$dmask,
Dim:$dim, UNorm:$unorm, CPol:$cpol,
R128A16:$r128, A16:$a16, TFE:$tfe, LWE:$lwe));
let AsmString = opcode#" $vdata, "#AddrAsm#", $srsrc$dmask$dim$unorm$cpol$r128$a16$tfe$lwe";
@@ -951,7 +951,7 @@ class MIMG_Atomic_gfx11<mimgopc op, string opcode,
!if(enableDisasm, "GFX11", "")> {
let Constraints = "$vdst = $vdata";
- let InOperandList = (ins DataRC:$vdata, AddrRC:$vaddr0, SReg_256_XNULL:$srsrc,
+ let InOperandList = (ins DataRC:$vdata, AddrRC:$vaddr0, SReg_RSRC:$srsrc,
DMask:$dmask, Dim:$dim, UNorm:$unorm, CPol:$cpol,
R128A16:$r128, A16:$a16, TFE:$tfe, LWE:$lwe);
let AsmString = opcode#" $vdst, $vaddr0, $srsrc$dmask$dim$unorm$cpol$r128$a16$tfe$lwe";
@@ -966,7 +966,7 @@ class MIMG_Atomic_nsa_gfx11<mimgopc op, string opcode,
let InOperandList = !con((ins DataRC:$vdata),
AddrIns,
- (ins SReg_256_XNULL:$srsrc, DMask:$dmask,
+ (ins SReg_RSRC:$srsrc, DMask:$dmask,
Dim:$dim, UNorm:$unorm, CPol:$cpol,
R128A16:$r128, A16:$a16, TFE:$tfe, LWE:$lwe));
let AsmString = opcode#" $vdata, "#AddrAsm#", $srsrc$dmask$dim$unorm$cpol$r128$a16$tfe$lwe";
@@ -980,7 +980,7 @@ class VIMAGE_Atomic_gfx12<mimgopc op, string opcode, RegisterClass DataRC,
let InOperandList = !con((ins DataRC:$vdata),
AddrIns,
- (ins SReg_256_XNULL:$rsrc, DMask:$dmask, Dim:$dim,
+ (ins SReg_RSRC:$rsrc, DMask:$dmask, Dim:$dim,
CPol:$cpol, R128A16:$r128, A16:$a16, TFE:$tfe));
let AsmString = !if(!empty(renamed), opcode, renamed)#" $vdata, "#AddrAsm#
", $rsrc$dmask$dim$cpol$r128$a16$tfe";
@@ -1130,7 +1130,7 @@ multiclass MIMG_Atomic_Renamed <mimgopc op, string asm, string renamed,
class MIMG_Sampler_Helper <mimgopc op, string asm, RegisterClass dst_rc,
RegisterClass src_rc, string dns="">
: MIMG_gfx6789 <op.VI, (outs dst_rc:$vdata), dns> {
- let InOperandList = !con((ins src_rc:$vaddr, SReg_256_XNULL:$srsrc, SReg_128_XNULL:$ssamp,
+ let InOperandList = !con((ins src_rc:$vaddr, SReg_RSRC:$srsrc, SReg_128_XNULL:$ssamp,
DMask:$dmask, UNorm:$unorm, CPol:$cpol,
R128A16:$r128, TFE:$tfe, LWE:$lwe, DA:$da),
!if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
@@ -1151,7 +1151,7 @@ class MIMG_Sampler_gfx90a<mimgopc op, string asm, RegisterClass dst_rc,
class MIMG_Sampler_OpList_gfx10p<dag OpPrefix, bit HasD16> {
dag ret = !con(OpPrefix,
- (ins SReg_256_XNULL:$srsrc, SReg_128_XNULL:$ssamp,
+ (ins SReg_RSRC:$srsrc, SReg_128_XNULL:$ssamp,
DMask:$dmask, Dim:$dim, UNorm:$unorm, CPol:$cpol,
R128A16:$r128, A16:$a16, TFE:$tfe, LWE:$lwe),
!if(HasD16, (ins D16:$d16), (ins)));
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
index 35c7b393a8ca4..1b1130d351ea7 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
@@ -905,6 +905,15 @@ defm "" : SRegClass<16, [v16i32, v16f32, v8i64, v8f64, v32i16, v32f16, v32bf16],
defm "" : SRegClass<32, [v32i32, v32f32, v16i64, v16f64], SGPR_1024Regs>;
}
+def SReg_RSRC : SIRegisterClass<"AMDGPU", [v8i32], 32,
+ (add SReg_256_XNULL, SReg_128_XNULL)> {
+ let Size = 8;
+ let CopyCost = -1;
+ let isAllocatable = 0;
+ let HasSGPR = 1;
+ let BaseClassOrder = 10000;
+}
+
def VRegOrLds_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, bf16, v2i16, v2f16, v2bf16], 32,
(add VGPR_32, LDS_DIRECT_CLASS)> {
let isAllocatable = 0;
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-inline-asm.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-inline-asm.ll
index 96c9f40e317ea..cf2fd88405cec 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-inline-asm.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-inline-asm.ll
@@ -66,7 +66,7 @@ define amdgpu_kernel void @asm_simple_agpr_clobber() {
define i32 @asm_vgpr_early_clobber() {
; CHECK-LABEL: name: asm_vgpr_early_clobber
; CHECK: bb.1 (%ir-block.0):
- ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 7; v_mov_b32 $1, 7", 1 /* sideeffect attdialect */, 2228235 /* regdef-ec:VGPR_32 */, def early-clobber %8, 2228235 /* regdef-ec:VGPR_32 */, def early-clobber %9, !1
+ ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 7; v_mov_b32 $1, 7", 1 /* sideeffect attdialect */, 3211275 /* regdef-ec:VGPR_32 */, def early-clobber %8, 3211275 /* regdef-ec:VGPR_32 */, def early-clobber %9, !1
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %8
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %9
; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY1]]
@@ -94,7 +94,7 @@ entry:
define i32 @test_single_vgpr_output() nounwind {
; CHECK-LABEL: name: test_single_vgpr_output
; CHECK: bb.1.entry:
- ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 7", 0 /* attdialect */, 2228234 /* regdef:VGPR_32 */, def %8
+ ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 7", 0 /* attdialect */, 3211274 /* regdef:VGPR_32 */, def %8
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %8
; CHECK-NEXT: $vgpr0 = COPY [[COPY]](s32)
; CHECK-NEXT: SI_RETURN implicit $vgpr0
@@ -106,7 +106,7 @@ entry:
define i32 @test_single_sgpr_output_s32() nounwind {
; CHECK-LABEL: name: test_single_sgpr_output_s32
; CHECK: bb.1.entry:
- ; CHECK-NEXT: INLINEASM &"s_mov_b32 $0, 7", 0 /* attdialect */, 2359306 /* regdef:SReg_32 */, def %8
+ ; CHECK-NEXT: INLINEASM &"s_mov_b32 $0, 7", 0 /* attdialect */, 3342346 /* regdef:SReg_32 */, def %8
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %8
; CHECK-NEXT: $vgpr0 = COPY [[COPY]](s32)
; CHECK-NEXT: SI_RETURN implicit $vgpr0
@@ -119,7 +119,7 @@ entry:
define float @test_multiple_register_outputs_same() #0 {
; CHECK-LABEL: name: test_multiple_register_outputs_same
; CHECK: bb.1 (%ir-block.0):
- ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 0; v_mov_b32 $1, 1", 0 /* attdialect */, 2228234 /* regdef:VGPR_32 */, def %8, 2228234...
[truncated]
|
This is an alternative solution to the problem addressed by PR 132264. One concern with PR 132264 is that the number of MIMG instructions is nearly doubled ( from ~14000 to 2x). In this solution, a new register class SReg_RSRC is created that contains both 256b and 128b SRegs. This doesn't increase the number of MIMG instructions. |
The r128 field in MIMG instructions indicates that the resource register is 128-bit. However, the assembler will reject instructions with 128-bit resource register even when r128 is present. This patch fixes this problem.
20165d3
to
cc05ff1
Compare
auto Loc = getImmLoc(AMDGPUOperand::ImmTyR128A16, Operands); | ||
Error(Loc, "r128 not allowed with 256-bit RSRC reg"); | ||
return false; | ||
} else if (SrsrcRegSize == 4 && !IsR128) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
No else after return
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done.
switch (RsrcReg.id()) { | ||
case TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_vi: | ||
case TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_vi: | ||
case TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15_vi: | ||
case TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_gfx9plus: | ||
case TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_gfx9plus: | ||
case TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15_gfx9plus: | ||
SrsrcRegSize = 8; | ||
break; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
there ought to be a class test you can perform for this?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
In getMCReg()
, registers such as TTMP0_TTMP1..._TTMP7 are converted to the ones with a suffix "_vi" or "_gfx9plus". Those new registers don't seem to belong to any reg class.
auto Loc = getInstLoc(Operands); | ||
if (hasMIMG_R128()) | ||
Error(Loc, | ||
"the RSRC reg should be 256-bit, or the r128 flag is required"); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
"the RSRC reg should be 256-bit, or the r128 flag is required"); | |
"rsrc reg should be 256-bit, or the r128 flag is required"); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
done.
✅ With the latest revision this PR passed the C/C++ code formatter. |
The r128 field in MIMG instructions indicates that the resource register
is 128-bit. However, the assembler will reject instructions with 128-bit
resource register even when r128 is present. This patch fixes this problem.