Skip to content

Commit d00833e

Browse files
committed
Factor common emit hidden kernel args metadata.
1 parent c4bc0cd commit d00833e

File tree

2 files changed

+44
-162
lines changed

2 files changed

+44
-162
lines changed

llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp

Lines changed: 34 additions & 153 deletions
Original file line numberDiff line numberDiff line change
@@ -631,6 +631,13 @@ void MetadataStreamerMsgPackV5::emitVersion() {
631631
getRootMetadata("amdhsa.version") = Version;
632632
}
633633

634+
void MetadataStreamerMsgPackV5::emitHiddenKernelArg(
635+
const DataLayout &DL, Type *ArgTy, Align Alignment, StringRef ArgName,
636+
unsigned &Offset, msgpack::ArrayDocNode Args,
637+
KernArgPreload::HiddenArg HiddenArg, const AMDGPUFunctionArgInfo *ArgInfo) {
638+
emitKernelArgImpl(DL, ArgTy, Alignment, ArgName, Offset, Args);
639+
}
640+
634641
void MetadataStreamerMsgPackV5::emitHiddenKernelArgs(
635642
const MachineFunction &MF, unsigned &Offset, msgpack::ArrayDocNode Args) {
636643
auto &Func = MF.getFunction();
@@ -649,20 +656,27 @@ void MetadataStreamerMsgPackV5::emitHiddenKernelArgs(
649656
auto *Int16Ty = Type::getInt16Ty(Func.getContext());
650657

651658
Offset = alignTo(Offset, ST.getAlignmentForImplicitArgPtr());
652-
emitKernelArgImpl(DL, Int32Ty, Align(4), "hidden_block_count_x", Offset,
653-
Args);
654-
emitKernelArgImpl(DL, Int32Ty, Align(4), "hidden_block_count_y", Offset,
655-
Args);
656-
emitKernelArgImpl(DL, Int32Ty, Align(4), "hidden_block_count_z", Offset,
657-
Args);
658-
659-
emitKernelArgImpl(DL, Int16Ty, Align(2), "hidden_group_size_x", Offset, Args);
660-
emitKernelArgImpl(DL, Int16Ty, Align(2), "hidden_group_size_y", Offset, Args);
661-
emitKernelArgImpl(DL, Int16Ty, Align(2), "hidden_group_size_z", Offset, Args);
662-
663-
emitKernelArgImpl(DL, Int16Ty, Align(2), "hidden_remainder_x", Offset, Args);
664-
emitKernelArgImpl(DL, Int16Ty, Align(2), "hidden_remainder_y", Offset, Args);
665-
emitKernelArgImpl(DL, Int16Ty, Align(2), "hidden_remainder_z", Offset, Args);
659+
const AMDGPUFunctionArgInfo &ArgInfo = MFI.getArgInfo();
660+
emitHiddenKernelArg(DL, Int32Ty, Align(4), "hidden_block_count_x", Offset,
661+
Args, KernArgPreload::HIDDEN_BLOCK_COUNT_X, &ArgInfo);
662+
emitHiddenKernelArg(DL, Int32Ty, Align(4), "hidden_block_count_y", Offset,
663+
Args, KernArgPreload::HIDDEN_BLOCK_COUNT_Y, &ArgInfo);
664+
emitHiddenKernelArg(DL, Int32Ty, Align(4), "hidden_block_count_z", Offset,
665+
Args, KernArgPreload::HIDDEN_BLOCK_COUNT_Z, &ArgInfo);
666+
667+
emitHiddenKernelArg(DL, Int16Ty, Align(2), "hidden_group_size_x", Offset,
668+
Args, KernArgPreload::HIDDEN_GROUP_SIZE_X, &ArgInfo);
669+
emitHiddenKernelArg(DL, Int16Ty, Align(2), "hidden_group_size_y", Offset,
670+
Args, KernArgPreload::HIDDEN_GROUP_SIZE_Y, &ArgInfo);
671+
emitHiddenKernelArg(DL, Int16Ty, Align(2), "hidden_group_size_z", Offset,
672+
Args, KernArgPreload::HIDDEN_GROUP_SIZE_Z, &ArgInfo);
673+
674+
emitHiddenKernelArg(DL, Int16Ty, Align(2), "hidden_remainder_x", Offset, Args,
675+
KernArgPreload::HIDDEN_REMAINDER_X, &ArgInfo);
676+
emitHiddenKernelArg(DL, Int16Ty, Align(2), "hidden_remainder_y", Offset, Args,
677+
KernArgPreload::HIDDEN_REMAINDER_Y, &ArgInfo);
678+
emitHiddenKernelArg(DL, Int16Ty, Align(2), "hidden_remainder_z", Offset, Args,
679+
KernArgPreload::HIDDEN_REMAINDER_Z, &ArgInfo);
666680

667681
// Reserved for hidden_tool_correlation_id.
668682
Offset += 8;
@@ -768,13 +782,14 @@ void MetadataStreamerMsgPackV6::emitVersion() {
768782
getRootMetadata("amdhsa.version") = Version;
769783
}
770784

771-
void MetadataStreamerMsgPackV6::emitHiddenKernelArgWithPreload(
772-
const DataLayout &DL, Type *ArgTy, Align Alignment,
773-
KernArgPreload::HiddenArg HiddenArg, StringRef ArgName, unsigned &Offset,
774-
msgpack::ArrayDocNode Args, const AMDGPUFunctionArgInfo &ArgInfo) {
785+
void MetadataStreamerMsgPackV6::emitHiddenKernelArg(
786+
const DataLayout &DL, Type *ArgTy, Align Alignment, StringRef ArgName,
787+
unsigned &Offset, msgpack::ArrayDocNode Args,
788+
KernArgPreload::HiddenArg HiddenArg, const AMDGPUFunctionArgInfo *ArgInfo) {
789+
assert(ArgInfo && HiddenArg != KernArgPreload::END_HIDDEN_ARGS);
775790

776791
SmallString<16> PreloadStr;
777-
const auto *PreloadDesc = ArgInfo.getHiddenArgPreloadDescriptor(HiddenArg);
792+
const auto *PreloadDesc = ArgInfo->getHiddenArgPreloadDescriptor(HiddenArg);
778793
if (PreloadDesc) {
779794
const auto &Regs = PreloadDesc->Regs;
780795
for (unsigned I = 0; I < Regs.size(); ++I) {
@@ -786,140 +801,6 @@ void MetadataStreamerMsgPackV6::emitHiddenKernelArgWithPreload(
786801
emitKernelArgImpl(DL, ArgTy, Alignment, ArgName, Offset, Args, PreloadStr);
787802
}
788803

789-
void MetadataStreamerMsgPackV6::emitHiddenKernelArgs(
790-
const MachineFunction &MF, unsigned &Offset, msgpack::ArrayDocNode Args) {
791-
auto &Func = MF.getFunction();
792-
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
793-
794-
// No implicit kernel argument is used.
795-
if (ST.getImplicitArgNumBytes(Func) == 0)
796-
return;
797-
798-
const Module *M = Func.getParent();
799-
auto &DL = M->getDataLayout();
800-
const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
801-
802-
auto *Int64Ty = Type::getInt64Ty(Func.getContext());
803-
auto *Int32Ty = Type::getInt32Ty(Func.getContext());
804-
auto *Int16Ty = Type::getInt16Ty(Func.getContext());
805-
806-
Offset = alignTo(Offset, ST.getAlignmentForImplicitArgPtr());
807-
808-
const AMDGPUFunctionArgInfo &ArgInfo = MFI.getArgInfo();
809-
emitHiddenKernelArgWithPreload(DL, Int32Ty, Align(4),
810-
KernArgPreload::HIDDEN_BLOCK_COUNT_X,
811-
"hidden_block_count_x", Offset, Args, ArgInfo);
812-
emitHiddenKernelArgWithPreload(DL, Int32Ty, Align(4),
813-
KernArgPreload::HIDDEN_BLOCK_COUNT_Y,
814-
"hidden_block_count_y", Offset, Args, ArgInfo);
815-
emitHiddenKernelArgWithPreload(DL, Int32Ty, Align(4),
816-
KernArgPreload::HIDDEN_BLOCK_COUNT_Z,
817-
"hidden_block_count_z", Offset, Args, ArgInfo);
818-
819-
emitHiddenKernelArgWithPreload(DL, Int16Ty, Align(2),
820-
KernArgPreload::HIDDEN_GROUP_SIZE_X,
821-
"hidden_group_size_x", Offset, Args, ArgInfo);
822-
emitHiddenKernelArgWithPreload(DL, Int16Ty, Align(2),
823-
KernArgPreload::HIDDEN_GROUP_SIZE_Y,
824-
"hidden_group_size_y", Offset, Args, ArgInfo);
825-
emitHiddenKernelArgWithPreload(DL, Int16Ty, Align(2),
826-
KernArgPreload::HIDDEN_GROUP_SIZE_Z,
827-
"hidden_group_size_z", Offset, Args, ArgInfo);
828-
829-
emitHiddenKernelArgWithPreload(DL, Int16Ty, Align(2),
830-
KernArgPreload::HIDDEN_REMAINDER_X,
831-
"hidden_remainder_x", Offset, Args, ArgInfo);
832-
emitHiddenKernelArgWithPreload(DL, Int16Ty, Align(2),
833-
KernArgPreload::HIDDEN_REMAINDER_Y,
834-
"hidden_remainder_y", Offset, Args, ArgInfo);
835-
emitHiddenKernelArgWithPreload(DL, Int16Ty, Align(2),
836-
KernArgPreload::HIDDEN_REMAINDER_Z,
837-
"hidden_remainder_z", Offset, Args, ArgInfo);
838-
839-
// Reserved for hidden_tool_correlation_id.
840-
Offset += 8;
841-
842-
Offset += 8; // Reserved.
843-
844-
emitKernelArgImpl(DL, Int64Ty, Align(8), "hidden_global_offset_x", Offset,
845-
Args);
846-
emitKernelArgImpl(DL, Int64Ty, Align(8), "hidden_global_offset_y", Offset,
847-
Args);
848-
emitKernelArgImpl(DL, Int64Ty, Align(8), "hidden_global_offset_z", Offset,
849-
Args);
850-
851-
emitKernelArgImpl(DL, Int16Ty, Align(2), "hidden_grid_dims", Offset, Args);
852-
853-
Offset += 6; // Reserved.
854-
auto *Int8PtrTy =
855-
PointerType::get(Func.getContext(), AMDGPUAS::GLOBAL_ADDRESS);
856-
857-
if (M->getNamedMetadata("llvm.printf.fmts")) {
858-
emitKernelArgImpl(DL, Int8PtrTy, Align(8), "hidden_printf_buffer", Offset,
859-
Args);
860-
} else {
861-
Offset += 8; // Skipped.
862-
}
863-
864-
if (!Func.hasFnAttribute("amdgpu-no-hostcall-ptr")) {
865-
emitKernelArgImpl(DL, Int8PtrTy, Align(8), "hidden_hostcall_buffer", Offset,
866-
Args);
867-
} else {
868-
Offset += 8; // Skipped.
869-
}
870-
871-
if (!Func.hasFnAttribute("amdgpu-no-multigrid-sync-arg")) {
872-
emitKernelArgImpl(DL, Int8PtrTy, Align(8), "hidden_multigrid_sync_arg",
873-
Offset, Args);
874-
} else {
875-
Offset += 8; // Skipped.
876-
}
877-
878-
if (!Func.hasFnAttribute("amdgpu-no-heap-ptr"))
879-
emitKernelArgImpl(DL, Int8PtrTy, Align(8), "hidden_heap_v1", Offset, Args);
880-
else
881-
Offset += 8; // Skipped.
882-
883-
if (!Func.hasFnAttribute("amdgpu-no-default-queue")) {
884-
emitKernelArgImpl(DL, Int8PtrTy, Align(8), "hidden_default_queue", Offset,
885-
Args);
886-
} else {
887-
Offset += 8; // Skipped.
888-
}
889-
890-
if (!Func.hasFnAttribute("amdgpu-no-completion-action")) {
891-
emitKernelArgImpl(DL, Int8PtrTy, Align(8), "hidden_completion_action",
892-
Offset, Args);
893-
} else {
894-
Offset += 8; // Skipped.
895-
}
896-
897-
// Emit argument for hidden dynamic lds size
898-
if (MFI.isDynamicLDSUsed()) {
899-
emitKernelArgImpl(DL, Int32Ty, Align(4), "hidden_dynamic_lds_size", Offset,
900-
Args);
901-
} else {
902-
Offset += 4; // skipped
903-
}
904-
905-
Offset += 68; // Reserved.
906-
907-
// hidden_private_base and hidden_shared_base are only when the subtarget has
908-
// ApertureRegs.
909-
if (!ST.hasApertureRegs()) {
910-
emitKernelArgImpl(DL, Int32Ty, Align(4), "hidden_private_base", Offset,
911-
Args);
912-
emitKernelArgImpl(DL, Int32Ty, Align(4), "hidden_shared_base", Offset,
913-
Args);
914-
} else {
915-
Offset += 8; // Skipped.
916-
}
917-
918-
if (MFI.getUserSGPRInfo().hasQueuePtr())
919-
emitKernelArgImpl(DL, Int8PtrTy, Align(8), "hidden_queue_ptr", Offset,
920-
Args);
921-
}
922-
923804
void MetadataStreamerMsgPackV6::emitKernelArg(const Argument &Arg,
924805
unsigned &Offset,
925806
msgpack::ArrayDocNode Args,

llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -162,6 +162,11 @@ class MetadataStreamerMsgPackV5 : public MetadataStreamerMsgPackV4 {
162162
msgpack::ArrayDocNode Args) override;
163163
void emitKernelAttrs(const AMDGPUTargetMachine &TM, const Function &Func,
164164
msgpack::MapDocNode Kern) override;
165+
virtual void emitHiddenKernelArg(
166+
const DataLayout &DL, Type *ArgTy, Align Alignment, StringRef ArgName,
167+
unsigned &Offset, msgpack::ArrayDocNode Args,
168+
KernArgPreload::HiddenArg HiddenArg = KernArgPreload::END_HIDDEN_ARGS,
169+
const AMDGPUFunctionArgInfo *ArgInfo = nullptr);
165170

166171
public:
167172
MetadataStreamerMsgPackV5() = default;
@@ -171,18 +176,14 @@ class MetadataStreamerMsgPackV5 : public MetadataStreamerMsgPackV4 {
171176
class MetadataStreamerMsgPackV6 final : public MetadataStreamerMsgPackV5 {
172177
protected:
173178
void emitVersion() override;
174-
void emitHiddenKernelArgs(const MachineFunction &MF, unsigned &Offset,
175-
msgpack::ArrayDocNode Args) override;
176179
void emitKernelArg(const Argument &Arg, unsigned &Offset,
177180
msgpack::ArrayDocNode Args,
178181
const MachineFunction &MF) override;
179-
180-
void emitHiddenKernelArgWithPreload(const DataLayout &DL, Type *ArgTy,
181-
Align Alignment,
182-
KernArgPreload::HiddenArg HiddenArg,
183-
StringRef ArgName, unsigned &Offset,
184-
msgpack::ArrayDocNode Args,
185-
const AMDGPUFunctionArgInfo &ArgInfo);
182+
void emitHiddenKernelArg(
183+
const DataLayout &DL, Type *ArgTy, Align Alignment, StringRef ArgName,
184+
unsigned &Offset, msgpack::ArrayDocNode Args,
185+
KernArgPreload::HiddenArg HiddenArg = KernArgPreload::END_HIDDEN_ARGS,
186+
const AMDGPUFunctionArgInfo *ArgInfo = nullptr) override;
186187

187188
public:
188189
MetadataStreamerMsgPackV6() = default;

0 commit comments

Comments
 (0)