@@ -631,6 +631,13 @@ void MetadataStreamerMsgPackV5::emitVersion() {
631
631
getRootMetadata (" amdhsa.version" ) = Version;
632
632
}
633
633
634
+ void MetadataStreamerMsgPackV5::emitHiddenKernelArg (
635
+ const DataLayout &DL, Type *ArgTy, Align Alignment, StringRef ArgName,
636
+ unsigned &Offset, msgpack::ArrayDocNode Args,
637
+ KernArgPreload::HiddenArg HiddenArg, const AMDGPUFunctionArgInfo *ArgInfo) {
638
+ emitKernelArgImpl (DL, ArgTy, Alignment, ArgName, Offset, Args);
639
+ }
640
+
634
641
void MetadataStreamerMsgPackV5::emitHiddenKernelArgs (
635
642
const MachineFunction &MF, unsigned &Offset, msgpack::ArrayDocNode Args) {
636
643
auto &Func = MF.getFunction ();
@@ -649,20 +656,27 @@ void MetadataStreamerMsgPackV5::emitHiddenKernelArgs(
649
656
auto *Int16Ty = Type::getInt16Ty (Func.getContext ());
650
657
651
658
Offset = alignTo (Offset, ST.getAlignmentForImplicitArgPtr ());
652
- emitKernelArgImpl (DL, Int32Ty, Align (4 ), " hidden_block_count_x" , Offset,
653
- Args);
654
- emitKernelArgImpl (DL, Int32Ty, Align (4 ), " hidden_block_count_y" , Offset,
655
- Args);
656
- emitKernelArgImpl (DL, Int32Ty, Align (4 ), " hidden_block_count_z" , Offset,
657
- Args);
658
-
659
- emitKernelArgImpl (DL, Int16Ty, Align (2 ), " hidden_group_size_x" , Offset, Args);
660
- emitKernelArgImpl (DL, Int16Ty, Align (2 ), " hidden_group_size_y" , Offset, Args);
661
- emitKernelArgImpl (DL, Int16Ty, Align (2 ), " hidden_group_size_z" , Offset, Args);
662
-
663
- emitKernelArgImpl (DL, Int16Ty, Align (2 ), " hidden_remainder_x" , Offset, Args);
664
- emitKernelArgImpl (DL, Int16Ty, Align (2 ), " hidden_remainder_y" , Offset, Args);
665
- emitKernelArgImpl (DL, Int16Ty, Align (2 ), " hidden_remainder_z" , Offset, Args);
659
+ const AMDGPUFunctionArgInfo &ArgInfo = MFI.getArgInfo ();
660
+ emitHiddenKernelArg (DL, Int32Ty, Align (4 ), " hidden_block_count_x" , Offset,
661
+ Args, KernArgPreload::HIDDEN_BLOCK_COUNT_X, &ArgInfo);
662
+ emitHiddenKernelArg (DL, Int32Ty, Align (4 ), " hidden_block_count_y" , Offset,
663
+ Args, KernArgPreload::HIDDEN_BLOCK_COUNT_Y, &ArgInfo);
664
+ emitHiddenKernelArg (DL, Int32Ty, Align (4 ), " hidden_block_count_z" , Offset,
665
+ Args, KernArgPreload::HIDDEN_BLOCK_COUNT_Z, &ArgInfo);
666
+
667
+ emitHiddenKernelArg (DL, Int16Ty, Align (2 ), " hidden_group_size_x" , Offset,
668
+ Args, KernArgPreload::HIDDEN_GROUP_SIZE_X, &ArgInfo);
669
+ emitHiddenKernelArg (DL, Int16Ty, Align (2 ), " hidden_group_size_y" , Offset,
670
+ Args, KernArgPreload::HIDDEN_GROUP_SIZE_Y, &ArgInfo);
671
+ emitHiddenKernelArg (DL, Int16Ty, Align (2 ), " hidden_group_size_z" , Offset,
672
+ Args, KernArgPreload::HIDDEN_GROUP_SIZE_Z, &ArgInfo);
673
+
674
+ emitHiddenKernelArg (DL, Int16Ty, Align (2 ), " hidden_remainder_x" , Offset, Args,
675
+ KernArgPreload::HIDDEN_REMAINDER_X, &ArgInfo);
676
+ emitHiddenKernelArg (DL, Int16Ty, Align (2 ), " hidden_remainder_y" , Offset, Args,
677
+ KernArgPreload::HIDDEN_REMAINDER_Y, &ArgInfo);
678
+ emitHiddenKernelArg (DL, Int16Ty, Align (2 ), " hidden_remainder_z" , Offset, Args,
679
+ KernArgPreload::HIDDEN_REMAINDER_Z, &ArgInfo);
666
680
667
681
// Reserved for hidden_tool_correlation_id.
668
682
Offset += 8 ;
@@ -768,13 +782,14 @@ void MetadataStreamerMsgPackV6::emitVersion() {
768
782
getRootMetadata (" amdhsa.version" ) = Version;
769
783
}
770
784
771
- void MetadataStreamerMsgPackV6::emitHiddenKernelArgWithPreload (
772
- const DataLayout &DL, Type *ArgTy, Align Alignment,
773
- KernArgPreload::HiddenArg HiddenArg, StringRef ArgName, unsigned &Offset,
774
- msgpack::ArrayDocNode Args, const AMDGPUFunctionArgInfo &ArgInfo) {
785
+ void MetadataStreamerMsgPackV6::emitHiddenKernelArg (
786
+ const DataLayout &DL, Type *ArgTy, Align Alignment, StringRef ArgName,
787
+ unsigned &Offset, msgpack::ArrayDocNode Args,
788
+ KernArgPreload::HiddenArg HiddenArg, const AMDGPUFunctionArgInfo *ArgInfo) {
789
+ assert (ArgInfo && HiddenArg != KernArgPreload::END_HIDDEN_ARGS);
775
790
776
791
SmallString<16 > PreloadStr;
777
- const auto *PreloadDesc = ArgInfo. getHiddenArgPreloadDescriptor (HiddenArg);
792
+ const auto *PreloadDesc = ArgInfo-> getHiddenArgPreloadDescriptor (HiddenArg);
778
793
if (PreloadDesc) {
779
794
const auto &Regs = PreloadDesc->Regs ;
780
795
for (unsigned I = 0 ; I < Regs.size (); ++I) {
@@ -786,140 +801,6 @@ void MetadataStreamerMsgPackV6::emitHiddenKernelArgWithPreload(
786
801
emitKernelArgImpl (DL, ArgTy, Alignment, ArgName, Offset, Args, PreloadStr);
787
802
}
788
803
789
- void MetadataStreamerMsgPackV6::emitHiddenKernelArgs (
790
- const MachineFunction &MF, unsigned &Offset, msgpack::ArrayDocNode Args) {
791
- auto &Func = MF.getFunction ();
792
- const GCNSubtarget &ST = MF.getSubtarget <GCNSubtarget>();
793
-
794
- // No implicit kernel argument is used.
795
- if (ST.getImplicitArgNumBytes (Func) == 0 )
796
- return ;
797
-
798
- const Module *M = Func.getParent ();
799
- auto &DL = M->getDataLayout ();
800
- const SIMachineFunctionInfo &MFI = *MF.getInfo <SIMachineFunctionInfo>();
801
-
802
- auto *Int64Ty = Type::getInt64Ty (Func.getContext ());
803
- auto *Int32Ty = Type::getInt32Ty (Func.getContext ());
804
- auto *Int16Ty = Type::getInt16Ty (Func.getContext ());
805
-
806
- Offset = alignTo (Offset, ST.getAlignmentForImplicitArgPtr ());
807
-
808
- const AMDGPUFunctionArgInfo &ArgInfo = MFI.getArgInfo ();
809
- emitHiddenKernelArgWithPreload (DL, Int32Ty, Align (4 ),
810
- KernArgPreload::HIDDEN_BLOCK_COUNT_X,
811
- " hidden_block_count_x" , Offset, Args, ArgInfo);
812
- emitHiddenKernelArgWithPreload (DL, Int32Ty, Align (4 ),
813
- KernArgPreload::HIDDEN_BLOCK_COUNT_Y,
814
- " hidden_block_count_y" , Offset, Args, ArgInfo);
815
- emitHiddenKernelArgWithPreload (DL, Int32Ty, Align (4 ),
816
- KernArgPreload::HIDDEN_BLOCK_COUNT_Z,
817
- " hidden_block_count_z" , Offset, Args, ArgInfo);
818
-
819
- emitHiddenKernelArgWithPreload (DL, Int16Ty, Align (2 ),
820
- KernArgPreload::HIDDEN_GROUP_SIZE_X,
821
- " hidden_group_size_x" , Offset, Args, ArgInfo);
822
- emitHiddenKernelArgWithPreload (DL, Int16Ty, Align (2 ),
823
- KernArgPreload::HIDDEN_GROUP_SIZE_Y,
824
- " hidden_group_size_y" , Offset, Args, ArgInfo);
825
- emitHiddenKernelArgWithPreload (DL, Int16Ty, Align (2 ),
826
- KernArgPreload::HIDDEN_GROUP_SIZE_Z,
827
- " hidden_group_size_z" , Offset, Args, ArgInfo);
828
-
829
- emitHiddenKernelArgWithPreload (DL, Int16Ty, Align (2 ),
830
- KernArgPreload::HIDDEN_REMAINDER_X,
831
- " hidden_remainder_x" , Offset, Args, ArgInfo);
832
- emitHiddenKernelArgWithPreload (DL, Int16Ty, Align (2 ),
833
- KernArgPreload::HIDDEN_REMAINDER_Y,
834
- " hidden_remainder_y" , Offset, Args, ArgInfo);
835
- emitHiddenKernelArgWithPreload (DL, Int16Ty, Align (2 ),
836
- KernArgPreload::HIDDEN_REMAINDER_Z,
837
- " hidden_remainder_z" , Offset, Args, ArgInfo);
838
-
839
- // Reserved for hidden_tool_correlation_id.
840
- Offset += 8 ;
841
-
842
- Offset += 8 ; // Reserved.
843
-
844
- emitKernelArgImpl (DL, Int64Ty, Align (8 ), " hidden_global_offset_x" , Offset,
845
- Args);
846
- emitKernelArgImpl (DL, Int64Ty, Align (8 ), " hidden_global_offset_y" , Offset,
847
- Args);
848
- emitKernelArgImpl (DL, Int64Ty, Align (8 ), " hidden_global_offset_z" , Offset,
849
- Args);
850
-
851
- emitKernelArgImpl (DL, Int16Ty, Align (2 ), " hidden_grid_dims" , Offset, Args);
852
-
853
- Offset += 6 ; // Reserved.
854
- auto *Int8PtrTy =
855
- PointerType::get (Func.getContext (), AMDGPUAS::GLOBAL_ADDRESS);
856
-
857
- if (M->getNamedMetadata (" llvm.printf.fmts" )) {
858
- emitKernelArgImpl (DL, Int8PtrTy, Align (8 ), " hidden_printf_buffer" , Offset,
859
- Args);
860
- } else {
861
- Offset += 8 ; // Skipped.
862
- }
863
-
864
- if (!Func.hasFnAttribute (" amdgpu-no-hostcall-ptr" )) {
865
- emitKernelArgImpl (DL, Int8PtrTy, Align (8 ), " hidden_hostcall_buffer" , Offset,
866
- Args);
867
- } else {
868
- Offset += 8 ; // Skipped.
869
- }
870
-
871
- if (!Func.hasFnAttribute (" amdgpu-no-multigrid-sync-arg" )) {
872
- emitKernelArgImpl (DL, Int8PtrTy, Align (8 ), " hidden_multigrid_sync_arg" ,
873
- Offset, Args);
874
- } else {
875
- Offset += 8 ; // Skipped.
876
- }
877
-
878
- if (!Func.hasFnAttribute (" amdgpu-no-heap-ptr" ))
879
- emitKernelArgImpl (DL, Int8PtrTy, Align (8 ), " hidden_heap_v1" , Offset, Args);
880
- else
881
- Offset += 8 ; // Skipped.
882
-
883
- if (!Func.hasFnAttribute (" amdgpu-no-default-queue" )) {
884
- emitKernelArgImpl (DL, Int8PtrTy, Align (8 ), " hidden_default_queue" , Offset,
885
- Args);
886
- } else {
887
- Offset += 8 ; // Skipped.
888
- }
889
-
890
- if (!Func.hasFnAttribute (" amdgpu-no-completion-action" )) {
891
- emitKernelArgImpl (DL, Int8PtrTy, Align (8 ), " hidden_completion_action" ,
892
- Offset, Args);
893
- } else {
894
- Offset += 8 ; // Skipped.
895
- }
896
-
897
- // Emit argument for hidden dynamic lds size
898
- if (MFI.isDynamicLDSUsed ()) {
899
- emitKernelArgImpl (DL, Int32Ty, Align (4 ), " hidden_dynamic_lds_size" , Offset,
900
- Args);
901
- } else {
902
- Offset += 4 ; // skipped
903
- }
904
-
905
- Offset += 68 ; // Reserved.
906
-
907
- // hidden_private_base and hidden_shared_base are only when the subtarget has
908
- // ApertureRegs.
909
- if (!ST.hasApertureRegs ()) {
910
- emitKernelArgImpl (DL, Int32Ty, Align (4 ), " hidden_private_base" , Offset,
911
- Args);
912
- emitKernelArgImpl (DL, Int32Ty, Align (4 ), " hidden_shared_base" , Offset,
913
- Args);
914
- } else {
915
- Offset += 8 ; // Skipped.
916
- }
917
-
918
- if (MFI.getUserSGPRInfo ().hasQueuePtr ())
919
- emitKernelArgImpl (DL, Int8PtrTy, Align (8 ), " hidden_queue_ptr" , Offset,
920
- Args);
921
- }
922
-
923
804
void MetadataStreamerMsgPackV6::emitKernelArg (const Argument &Arg,
924
805
unsigned &Offset,
925
806
msgpack::ArrayDocNode Args,
0 commit comments