@@ -993,8 +993,14 @@ static unsigned getCallOpcode(const MachineFunction &CallerF, bool IsIndirect,
993993 return IsWave32 ? AMDGPU::SI_CS_CHAIN_TC_W32 : AMDGPU::SI_CS_CHAIN_TC_W64;
994994 }
995995
996- return CC == CallingConv::AMDGPU_Gfx ? AMDGPU::SI_TCRETURN_GFX :
997- AMDGPU::SI_TCRETURN;
996+ if (CallerF.getFunction ().getCallingConv () ==
997+ CallingConv::AMDGPU_Gfx_WholeWave)
998+ return AMDGPU::SI_TCRETURN_GFX_WholeWave;
999+
1000+ if (CC == CallingConv::AMDGPU_Gfx || CC == CallingConv::AMDGPU_Gfx_WholeWave)
1001+ return AMDGPU::SI_TCRETURN_GFX;
1002+
1003+ return AMDGPU::SI_TCRETURN;
9981004}
9991005
10001006// Add operands to call instruction to track the callee.
@@ -1273,6 +1279,13 @@ bool AMDGPUCallLowering::lowerTailCall(
12731279 unsigned Opc = getCallOpcode (MF, Info.Callee .isReg (), /* IsTailCall*/ true ,
12741280 ST.isWave32 (), CalleeCC, IsDynamicVGPRChainCall);
12751281 auto MIB = MIRBuilder.buildInstrNoInsert (Opc);
1282+
1283+ if (FuncInfo->isWholeWaveFunction ())
1284+ addOriginalExecToReturn (MF, MIB);
1285+
1286+ // Keep track of the index of the next operand to be added to the call
1287+ unsigned CalleeIdx = MIB->getNumOperands ();
1288+
12761289 if (!addCallTargetOperands (MIB, MIRBuilder, Info, IsDynamicVGPRChainCall))
12771290 return false ;
12781291
@@ -1390,7 +1403,7 @@ bool AMDGPUCallLowering::lowerTailCall(
13901403 // If we have -tailcallopt, we need to adjust the stack. We'll do the call
13911404 // sequence start and end here.
13921405 if (!IsSibCall) {
1393- MIB->getOperand (1 ).setImm (FPDiff);
1406+ MIB->getOperand (CalleeIdx + 1 ).setImm (FPDiff);
13941407 CallSeqStart.addImm (NumBytes).addImm (0 );
13951408 // End the call sequence *before* emitting the call. Normally, we would
13961409 // tidy the frame up after the call. However, here, we've laid out the
@@ -1402,16 +1415,24 @@ bool AMDGPUCallLowering::lowerTailCall(
14021415 // Now we can add the actual call instruction to the correct basic block.
14031416 MIRBuilder.insertInstr (MIB);
14041417
1418+ // If this is a whole wave tail call, we need to constrain the register for
1419+ // the original EXEC.
1420+ if (MIB->getOpcode () == AMDGPU::SI_TCRETURN_GFX_WholeWave) {
1421+ MIB->getOperand (0 ).setReg (
1422+ constrainOperandRegClass (MF, *TRI, MRI, *TII, *ST.getRegBankInfo (),
1423+ *MIB, MIB->getDesc (), MIB->getOperand (0 ), 0 ));
1424+ }
1425+
14051426 // If Callee is a reg, since it is used by a target specific
14061427 // instruction, it must have a register class matching the
14071428 // constraint of that instruction.
14081429
14091430 // FIXME: We should define regbankselectable call instructions to handle
14101431 // divergent call targets.
1411- if (MIB->getOperand (0 ).isReg ()) {
1412- MIB->getOperand (0 ).setReg (
1413- constrainOperandRegClass ( MF, *TRI, MRI, *TII, *ST.getRegBankInfo (),
1414- * MIB, MIB-> getDesc (), MIB-> getOperand (0 ), 0 ));
1432+ if (MIB->getOperand (CalleeIdx ).isReg ()) {
1433+ MIB->getOperand (CalleeIdx ).setReg ( constrainOperandRegClass (
1434+ MF, *TRI, MRI, *TII, *ST.getRegBankInfo (), *MIB, MIB-> getDesc (),
1435+ MIB-> getOperand (CalleeIdx ), CalleeIdx ));
14151436 }
14161437
14171438 MF.getFrameInfo ().setHasTailCall ();
0 commit comments