Skip to content

Commit 04f6f25

Browse files
committed
[AMDGPU] Fix bug when calculating user_spgr_count for Code Object V3 assembler
Stop counting explicitly disabled user_spgr's in the user_sgpr_count field of the kernel descriptor. Differential Revision: https://reviews.llvm.org/D66900 llvm-svn: 370250
1 parent bf47734 commit 04f6f25

File tree

2 files changed

+42
-8
lines changed

2 files changed

+42
-8
lines changed

llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3617,37 +3617,44 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
36173617
PARSE_BITS_ENTRY(KD.kernel_code_properties,
36183618
KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
36193619
Val, ValRange);
3620-
UserSGPRCount += 4;
3620+
if (Val)
3621+
UserSGPRCount += 4;
36213622
} else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
36223623
PARSE_BITS_ENTRY(KD.kernel_code_properties,
36233624
KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
36243625
ValRange);
3625-
UserSGPRCount += 2;
3626+
if (Val)
3627+
UserSGPRCount += 2;
36263628
} else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
36273629
PARSE_BITS_ENTRY(KD.kernel_code_properties,
36283630
KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
36293631
ValRange);
3630-
UserSGPRCount += 2;
3632+
if (Val)
3633+
UserSGPRCount += 2;
36313634
} else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
36323635
PARSE_BITS_ENTRY(KD.kernel_code_properties,
36333636
KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
36343637
Val, ValRange);
3635-
UserSGPRCount += 2;
3638+
if (Val)
3639+
UserSGPRCount += 2;
36363640
} else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
36373641
PARSE_BITS_ENTRY(KD.kernel_code_properties,
36383642
KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
36393643
ValRange);
3640-
UserSGPRCount += 2;
3644+
if (Val)
3645+
UserSGPRCount += 2;
36413646
} else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
36423647
PARSE_BITS_ENTRY(KD.kernel_code_properties,
36433648
KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
36443649
ValRange);
3645-
UserSGPRCount += 2;
3650+
if (Val)
3651+
UserSGPRCount += 2;
36463652
} else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
36473653
PARSE_BITS_ENTRY(KD.kernel_code_properties,
36483654
KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
36493655
Val, ValRange);
3650-
UserSGPRCount += 1;
3656+
if (Val)
3657+
UserSGPRCount += 1;
36513658
} else if (ID == ".amdhsa_wavefront_size32") {
36523659
if (IVersion.Major < 10)
36533660
return getParser().Error(IDRange.Start, "directive requires gfx10+",

llvm/test/MC/AMDGPU/hsa-v3.s

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,16 +8,19 @@
88

99
// READOBJ: Section Headers
1010
// READOBJ: .text PROGBITS {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9]+}} AX {{[0-9]+}} {{[0-9]+}} 256
11-
// READOBJ: .rodata PROGBITS {{[0-9a-f]+}} {{[0-9a-f]+}} 0000c0 {{[0-9]+}} A {{[0-9]+}} {{[0-9]+}} 64
11+
// READOBJ: .rodata PROGBITS {{[0-9a-f]+}} {{[0-9a-f]+}} 000100 {{[0-9]+}} A {{[0-9]+}} {{[0-9]+}} 64
1212

1313
// READOBJ: Relocation section '.rela.rodata' at offset
1414
// READOBJ: 0000000000000010 {{[0-9a-f]+}}00000005 R_AMDGPU_REL64 0000000000000000 .text + 10
1515
// READOBJ: 0000000000000050 {{[0-9a-f]+}}00000005 R_AMDGPU_REL64 0000000000000000 .text + 110
1616
// READOBJ: 0000000000000090 {{[0-9a-f]+}}00000005 R_AMDGPU_REL64 0000000000000000 .text + 210
17+
// READOBJ: 00000000000000d0 {{[0-9a-f]+}}00000005 R_AMDGPU_REL64 0000000000000000 .text + 310
1718

1819
// READOBJ: Symbol table '.symtab' contains {{[0-9]+}} entries:
1920
// READOBJ: {{[0-9]+}}: 0000000000000100 0 FUNC LOCAL PROTECTED 2 complete
2021
// READOBJ: {{[0-9]+}}: 0000000000000040 64 OBJECT LOCAL DEFAULT 3 complete.kd
22+
// READOBJ: {{[0-9]+}}: 0000000000000300 0 FUNC LOCAL PROTECTED 2 disabled_user_sgpr
23+
// READOBJ: {{[0-9]+}}: 00000000000000c0 64 OBJECT LOCAL DEFAULT 3 disabled_user_sgpr.kd
2124
// READOBJ: {{[0-9]+}}: 0000000000000000 0 FUNC LOCAL PROTECTED 2 minimal
2225
// READOBJ: {{[0-9]+}}: 0000000000000000 64 OBJECT LOCAL DEFAULT 3 minimal.kd
2326
// READOBJ: {{[0-9]+}}: 0000000000000200 0 FUNC LOCAL PROTECTED 2 special_sgpr
@@ -40,6 +43,11 @@
4043
// OBJDUMP-NEXT: 0090 00000000 00000000 00000000 00000000
4144
// OBJDUMP-NEXT: 00a0 00000000 00000000 00000000 00000000
4245
// OBJDUMP-NEXT: 00b0 00010000 80000000 00000000 00000000
46+
// disabled_user_sgpr
47+
// OBJDUMP-NEXT: 00c0 00000000 00000000 00000000 00000000
48+
// OBJDUMP-NEXT: 00d0 00000000 00000000 00000000 00000000
49+
// OBJDUMP-NEXT: 00e0 00000000 00000000 00000000 00000000
50+
// OBJDUMP-NEXT: 00f0 0000ac00 80000000 00000000 00000000
4351

4452
.text
4553
// ASM: .text
@@ -62,6 +70,11 @@ complete:
6270
special_sgpr:
6371
s_endpgm
6472

73+
.p2align 8
74+
.type disabled_user_sgpr,@function
75+
disabled_user_sgpr:
76+
s_endpgm
77+
6578
.rodata
6679
// ASM: .rodata
6780

@@ -182,6 +195,20 @@ special_sgpr:
182195
// ASM-NEXT: .amdhsa_ieee_mode 0
183196
// ASM: .end_amdhsa_kernel
184197

198+
// Test that explicitly disabling user_sgpr's does not affect the user_sgpr
199+
// count, i.e. this should produce the same descriptor as minimal.
200+
.p2align 6
201+
.amdhsa_kernel disabled_user_sgpr
202+
.amdhsa_user_sgpr_private_segment_buffer 0
203+
.amdhsa_next_free_vgpr 0
204+
.amdhsa_next_free_sgpr 0
205+
.end_amdhsa_kernel
206+
207+
// ASM: .amdhsa_kernel disabled_user_sgpr
208+
// ASM: .amdhsa_next_free_vgpr 0
209+
// ASM-NEXT: .amdhsa_next_free_sgpr 0
210+
// ASM: .end_amdhsa_kernel
211+
185212
.section .foo
186213

187214
.byte .amdgcn.gfx_generation_number

0 commit comments

Comments
 (0)