@@ -3142,6 +3142,14 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
31423142 OMPX_AdjustNumTeamsForXteamRedSmallBlockSize =
31433143 EnvarConfig.OMPX_AdjustNumTeamsForXteamRedSmallBlockSize ;
31443144 }
3145+ if (!OMPX_XteamBlockSize.isPresent ()) {
3146+ OMPX_XteamBlockSize =
3147+ EnvarConfig.OMPX_XteamBlockSize ;
3148+ }
3149+ if (!OMPX_XTeamReductionOccupancyBasedOpt.isPresent ()) {
3150+ OMPX_XTeamReductionOccupancyBasedOpt =
3151+ EnvarConfig.OMPX_XTeamReductionOccupancyBasedOpt ;
3152+ }
31453153 }
31463154
31473155 ~AMDGPUDeviceTy () {}
@@ -4887,6 +4895,8 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
48874895 // / done.
48884896 UInt32Envar OMPX_AdjustNumTeamsForSmallBlockSize;
48894897
4898+ BoolEnvar OMPX_XTeamReductionOccupancyBasedOpt;
4899+
48904900 // / Envar to allow scaling up the number of teams for Xteam-Reduction,
48914901 // / whenever the blocksize has been reduced from the max. The value 0
48924902 // / indicates that this functionality is disabled. The default value is 1,
@@ -5083,24 +5093,42 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
50835093 OMPX_UseMultipleSdmaEngines; // LIBOMPTARGET_AMDGPU_USE_MULTIPLE_SDMA_ENGINES
50845094 bool
50855095 OMPX_AdjustNumTeamsForXteamRedSmallBlockSize;
5096+ int
5097+ OMPX_XteamBlockSize;
5098+ bool
5099+ OMPX_XTeamReductionOccupancyBasedOpt;
50865100 };
50875101
50885102 static inline const std::unordered_map<std::string, DeviceEnvarConfigTy>
50895103 EnvarConfigs = {{" MI210" , {.OMPX_UseMultipleSdmaEngines = true ,
5104+ .OMPX_XteamBlockSize = 256 ,
5105+ .OMPX_XTeamReductionOccupancyBasedOpt = true ,
50905106 .OMPX_AdjustNumTeamsForXteamRedSmallBlockSize =0 }},
50915107 {" MI250X" ,{.OMPX_UseMultipleSdmaEngines = true ,
5108+ .OMPX_XteamBlockSize = 256 ,
5109+ .OMPX_XTeamReductionOccupancyBasedOpt = true ,
50925110 .OMPX_AdjustNumTeamsForXteamRedSmallBlockSize =0 }},
50935111 {" MI250X/MI250" ,{
50945112 .OMPX_UseMultipleSdmaEngines = true ,
5113+ .OMPX_XteamBlockSize = 256 ,
5114+ .OMPX_XTeamReductionOccupancyBasedOpt = true ,
50955115 .OMPX_AdjustNumTeamsForXteamRedSmallBlockSize =0 }},
50965116 {" MI300A" , {.OMPX_UseMultipleSdmaEngines = false ,
5117+ .OMPX_XteamBlockSize = 512 ,
5118+ .OMPX_XTeamReductionOccupancyBasedOpt = false ,
50975119 .OMPX_AdjustNumTeamsForXteamRedSmallBlockSize =1 }},
50985120 {" MI300X" , {.OMPX_UseMultipleSdmaEngines = true ,
5121+ .OMPX_XteamBlockSize = 512 ,
5122+ .OMPX_XTeamReductionOccupancyBasedOpt = false ,
50995123 .OMPX_AdjustNumTeamsForXteamRedSmallBlockSize =1 }},
51005124 {" MI355X" , {.OMPX_UseMultipleSdmaEngines = true ,
5125+ .OMPX_XteamBlockSize = 512 ,
5126+ .OMPX_XTeamReductionOccupancyBasedOpt = false ,
51015127 .OMPX_AdjustNumTeamsForXteamRedSmallBlockSize =1 }},
51025128 // Default config for unknown devices.
51035129 {" DEFAULT" , {.OMPX_UseMultipleSdmaEngines = true ,
5130+ .OMPX_XteamBlockSize = 512 ,
5131+ .OMPX_XTeamReductionOccupancyBasedOpt = false ,
51045132 .OMPX_AdjustNumTeamsForXteamRedSmallBlockSize =1 }}};
51055133
51065134 const DeviceEnvarConfigTy &getEnvarConfig () const {
0 commit comments