@@ -631,6 +631,13 @@ void MetadataStreamerMsgPackV5::emitVersion() {
631631 getRootMetadata (" amdhsa.version" ) = Version;
632632}
633633
634+ void MetadataStreamerMsgPackV5::emitHiddenKernelArg (
635+ const DataLayout &DL, Type *ArgTy, Align Alignment, StringRef ArgName,
636+ unsigned &Offset, msgpack::ArrayDocNode Args,
637+ KernArgPreload::HiddenArg HiddenArg, const AMDGPUFunctionArgInfo *ArgInfo) {
638+ emitKernelArgImpl (DL, ArgTy, Alignment, ArgName, Offset, Args);
639+ }
640+
634641void MetadataStreamerMsgPackV5::emitHiddenKernelArgs (
635642 const MachineFunction &MF, unsigned &Offset, msgpack::ArrayDocNode Args) {
636643 auto &Func = MF.getFunction ();
@@ -649,20 +656,27 @@ void MetadataStreamerMsgPackV5::emitHiddenKernelArgs(
649656 auto *Int16Ty = Type::getInt16Ty (Func.getContext ());
650657
651658 Offset = alignTo (Offset, ST.getAlignmentForImplicitArgPtr ());
652- emitKernelArgImpl (DL, Int32Ty, Align (4 ), " hidden_block_count_x" , Offset,
653- Args);
654- emitKernelArgImpl (DL, Int32Ty, Align (4 ), " hidden_block_count_y" , Offset,
655- Args);
656- emitKernelArgImpl (DL, Int32Ty, Align (4 ), " hidden_block_count_z" , Offset,
657- Args);
658-
659- emitKernelArgImpl (DL, Int16Ty, Align (2 ), " hidden_group_size_x" , Offset, Args);
660- emitKernelArgImpl (DL, Int16Ty, Align (2 ), " hidden_group_size_y" , Offset, Args);
661- emitKernelArgImpl (DL, Int16Ty, Align (2 ), " hidden_group_size_z" , Offset, Args);
662-
663- emitKernelArgImpl (DL, Int16Ty, Align (2 ), " hidden_remainder_x" , Offset, Args);
664- emitKernelArgImpl (DL, Int16Ty, Align (2 ), " hidden_remainder_y" , Offset, Args);
665- emitKernelArgImpl (DL, Int16Ty, Align (2 ), " hidden_remainder_z" , Offset, Args);
659+ const AMDGPUFunctionArgInfo &ArgInfo = MFI.getArgInfo ();
660+ emitHiddenKernelArg (DL, Int32Ty, Align (4 ), " hidden_block_count_x" , Offset,
661+ Args, KernArgPreload::HIDDEN_BLOCK_COUNT_X, &ArgInfo);
662+ emitHiddenKernelArg (DL, Int32Ty, Align (4 ), " hidden_block_count_y" , Offset,
663+ Args, KernArgPreload::HIDDEN_BLOCK_COUNT_Y, &ArgInfo);
664+ emitHiddenKernelArg (DL, Int32Ty, Align (4 ), " hidden_block_count_z" , Offset,
665+ Args, KernArgPreload::HIDDEN_BLOCK_COUNT_Z, &ArgInfo);
666+
667+ emitHiddenKernelArg (DL, Int16Ty, Align (2 ), " hidden_group_size_x" , Offset,
668+ Args, KernArgPreload::HIDDEN_GROUP_SIZE_X, &ArgInfo);
669+ emitHiddenKernelArg (DL, Int16Ty, Align (2 ), " hidden_group_size_y" , Offset,
670+ Args, KernArgPreload::HIDDEN_GROUP_SIZE_Y, &ArgInfo);
671+ emitHiddenKernelArg (DL, Int16Ty, Align (2 ), " hidden_group_size_z" , Offset,
672+ Args, KernArgPreload::HIDDEN_GROUP_SIZE_Z, &ArgInfo);
673+
674+ emitHiddenKernelArg (DL, Int16Ty, Align (2 ), " hidden_remainder_x" , Offset, Args,
675+ KernArgPreload::HIDDEN_REMAINDER_X, &ArgInfo);
676+ emitHiddenKernelArg (DL, Int16Ty, Align (2 ), " hidden_remainder_y" , Offset, Args,
677+ KernArgPreload::HIDDEN_REMAINDER_Y, &ArgInfo);
678+ emitHiddenKernelArg (DL, Int16Ty, Align (2 ), " hidden_remainder_z" , Offset, Args,
679+ KernArgPreload::HIDDEN_REMAINDER_Z, &ArgInfo);
666680
667681 // Reserved for hidden_tool_correlation_id.
668682 Offset += 8 ;
@@ -768,13 +782,14 @@ void MetadataStreamerMsgPackV6::emitVersion() {
768782 getRootMetadata (" amdhsa.version" ) = Version;
769783}
770784
771- void MetadataStreamerMsgPackV6::emitHiddenKernelArgWithPreload (
772- const DataLayout &DL, Type *ArgTy, Align Alignment,
773- KernArgPreload::HiddenArg HiddenArg, StringRef ArgName, unsigned &Offset,
774- msgpack::ArrayDocNode Args, const AMDGPUFunctionArgInfo &ArgInfo) {
785+ void MetadataStreamerMsgPackV6::emitHiddenKernelArg (
786+ const DataLayout &DL, Type *ArgTy, Align Alignment, StringRef ArgName,
787+ unsigned &Offset, msgpack::ArrayDocNode Args,
788+ KernArgPreload::HiddenArg HiddenArg, const AMDGPUFunctionArgInfo *ArgInfo) {
789+ assert (ArgInfo && HiddenArg != KernArgPreload::END_HIDDEN_ARGS);
775790
776791 SmallString<16 > PreloadStr;
777- const auto *PreloadDesc = ArgInfo. getHiddenArgPreloadDescriptor (HiddenArg);
792+ const auto *PreloadDesc = ArgInfo-> getHiddenArgPreloadDescriptor (HiddenArg);
778793 if (PreloadDesc) {
779794 const auto &Regs = PreloadDesc->Regs ;
780795 for (unsigned I = 0 ; I < Regs.size (); ++I) {
@@ -786,140 +801,6 @@ void MetadataStreamerMsgPackV6::emitHiddenKernelArgWithPreload(
786801 emitKernelArgImpl (DL, ArgTy, Alignment, ArgName, Offset, Args, PreloadStr);
787802}
788803
789- void MetadataStreamerMsgPackV6::emitHiddenKernelArgs (
790- const MachineFunction &MF, unsigned &Offset, msgpack::ArrayDocNode Args) {
791- auto &Func = MF.getFunction ();
792- const GCNSubtarget &ST = MF.getSubtarget <GCNSubtarget>();
793-
794- // No implicit kernel argument is used.
795- if (ST.getImplicitArgNumBytes (Func) == 0 )
796- return ;
797-
798- const Module *M = Func.getParent ();
799- auto &DL = M->getDataLayout ();
800- const SIMachineFunctionInfo &MFI = *MF.getInfo <SIMachineFunctionInfo>();
801-
802- auto *Int64Ty = Type::getInt64Ty (Func.getContext ());
803- auto *Int32Ty = Type::getInt32Ty (Func.getContext ());
804- auto *Int16Ty = Type::getInt16Ty (Func.getContext ());
805-
806- Offset = alignTo (Offset, ST.getAlignmentForImplicitArgPtr ());
807-
808- const AMDGPUFunctionArgInfo &ArgInfo = MFI.getArgInfo ();
809- emitHiddenKernelArgWithPreload (DL, Int32Ty, Align (4 ),
810- KernArgPreload::HIDDEN_BLOCK_COUNT_X,
811- " hidden_block_count_x" , Offset, Args, ArgInfo);
812- emitHiddenKernelArgWithPreload (DL, Int32Ty, Align (4 ),
813- KernArgPreload::HIDDEN_BLOCK_COUNT_Y,
814- " hidden_block_count_y" , Offset, Args, ArgInfo);
815- emitHiddenKernelArgWithPreload (DL, Int32Ty, Align (4 ),
816- KernArgPreload::HIDDEN_BLOCK_COUNT_Z,
817- " hidden_block_count_z" , Offset, Args, ArgInfo);
818-
819- emitHiddenKernelArgWithPreload (DL, Int16Ty, Align (2 ),
820- KernArgPreload::HIDDEN_GROUP_SIZE_X,
821- " hidden_group_size_x" , Offset, Args, ArgInfo);
822- emitHiddenKernelArgWithPreload (DL, Int16Ty, Align (2 ),
823- KernArgPreload::HIDDEN_GROUP_SIZE_Y,
824- " hidden_group_size_y" , Offset, Args, ArgInfo);
825- emitHiddenKernelArgWithPreload (DL, Int16Ty, Align (2 ),
826- KernArgPreload::HIDDEN_GROUP_SIZE_Z,
827- " hidden_group_size_z" , Offset, Args, ArgInfo);
828-
829- emitHiddenKernelArgWithPreload (DL, Int16Ty, Align (2 ),
830- KernArgPreload::HIDDEN_REMAINDER_X,
831- " hidden_remainder_x" , Offset, Args, ArgInfo);
832- emitHiddenKernelArgWithPreload (DL, Int16Ty, Align (2 ),
833- KernArgPreload::HIDDEN_REMAINDER_Y,
834- " hidden_remainder_y" , Offset, Args, ArgInfo);
835- emitHiddenKernelArgWithPreload (DL, Int16Ty, Align (2 ),
836- KernArgPreload::HIDDEN_REMAINDER_Z,
837- " hidden_remainder_z" , Offset, Args, ArgInfo);
838-
839- // Reserved for hidden_tool_correlation_id.
840- Offset += 8 ;
841-
842- Offset += 8 ; // Reserved.
843-
844- emitKernelArgImpl (DL, Int64Ty, Align (8 ), " hidden_global_offset_x" , Offset,
845- Args);
846- emitKernelArgImpl (DL, Int64Ty, Align (8 ), " hidden_global_offset_y" , Offset,
847- Args);
848- emitKernelArgImpl (DL, Int64Ty, Align (8 ), " hidden_global_offset_z" , Offset,
849- Args);
850-
851- emitKernelArgImpl (DL, Int16Ty, Align (2 ), " hidden_grid_dims" , Offset, Args);
852-
853- Offset += 6 ; // Reserved.
854- auto *Int8PtrTy =
855- PointerType::get (Func.getContext (), AMDGPUAS::GLOBAL_ADDRESS);
856-
857- if (M->getNamedMetadata (" llvm.printf.fmts" )) {
858- emitKernelArgImpl (DL, Int8PtrTy, Align (8 ), " hidden_printf_buffer" , Offset,
859- Args);
860- } else {
861- Offset += 8 ; // Skipped.
862- }
863-
864- if (!Func.hasFnAttribute (" amdgpu-no-hostcall-ptr" )) {
865- emitKernelArgImpl (DL, Int8PtrTy, Align (8 ), " hidden_hostcall_buffer" , Offset,
866- Args);
867- } else {
868- Offset += 8 ; // Skipped.
869- }
870-
871- if (!Func.hasFnAttribute (" amdgpu-no-multigrid-sync-arg" )) {
872- emitKernelArgImpl (DL, Int8PtrTy, Align (8 ), " hidden_multigrid_sync_arg" ,
873- Offset, Args);
874- } else {
875- Offset += 8 ; // Skipped.
876- }
877-
878- if (!Func.hasFnAttribute (" amdgpu-no-heap-ptr" ))
879- emitKernelArgImpl (DL, Int8PtrTy, Align (8 ), " hidden_heap_v1" , Offset, Args);
880- else
881- Offset += 8 ; // Skipped.
882-
883- if (!Func.hasFnAttribute (" amdgpu-no-default-queue" )) {
884- emitKernelArgImpl (DL, Int8PtrTy, Align (8 ), " hidden_default_queue" , Offset,
885- Args);
886- } else {
887- Offset += 8 ; // Skipped.
888- }
889-
890- if (!Func.hasFnAttribute (" amdgpu-no-completion-action" )) {
891- emitKernelArgImpl (DL, Int8PtrTy, Align (8 ), " hidden_completion_action" ,
892- Offset, Args);
893- } else {
894- Offset += 8 ; // Skipped.
895- }
896-
897- // Emit argument for hidden dynamic lds size
898- if (MFI.isDynamicLDSUsed ()) {
899- emitKernelArgImpl (DL, Int32Ty, Align (4 ), " hidden_dynamic_lds_size" , Offset,
900- Args);
901- } else {
902- Offset += 4 ; // skipped
903- }
904-
905- Offset += 68 ; // Reserved.
906-
907- // hidden_private_base and hidden_shared_base are only when the subtarget has
908- // ApertureRegs.
909- if (!ST.hasApertureRegs ()) {
910- emitKernelArgImpl (DL, Int32Ty, Align (4 ), " hidden_private_base" , Offset,
911- Args);
912- emitKernelArgImpl (DL, Int32Ty, Align (4 ), " hidden_shared_base" , Offset,
913- Args);
914- } else {
915- Offset += 8 ; // Skipped.
916- }
917-
918- if (MFI.getUserSGPRInfo ().hasQueuePtr ())
919- emitKernelArgImpl (DL, Int8PtrTy, Align (8 ), " hidden_queue_ptr" , Offset,
920- Args);
921- }
922-
923804void MetadataStreamerMsgPackV6::emitKernelArg (const Argument &Arg,
924805 unsigned &Offset,
925806 msgpack::ArrayDocNode Args,
0 commit comments