Skip to content
This repository was archived by the owner on Apr 23, 2020. It is now read-only.

M2s #13

Closed
wants to merge 2 commits into from
Closed

M2s #13

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions include/llvm/ADT/Triple.h
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,7 @@ class Triple {
CUDA, // NVIDIA CUDA
NVCL, // NVIDIA OpenCL
AMDHSA, // AMD HSA Runtime
M2S,
PS4,
LastOSType = PS4
};
Expand Down
2 changes: 2 additions & 0 deletions lib/Support/Triple.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,7 @@ const char *Triple::getOSTypeName(OSType Kind) {
case CUDA: return "cuda";
case NVCL: return "nvcl";
case AMDHSA: return "amdhsa";
case M2S: return "m2s";
case PS4: return "ps4";
}

Expand Down Expand Up @@ -415,6 +416,7 @@ static Triple::OSType parseOS(StringRef OSName) {
.StartsWith("cuda", Triple::CUDA)
.StartsWith("nvcl", Triple::NVCL)
.StartsWith("amdhsa", Triple::AMDHSA)
.StartsWith("m2s", Triple::M2S)
.StartsWith("ps4", Triple::PS4)
.Default(Triple::UnknownOS);
}
Expand Down
1 change: 1 addition & 0 deletions lib/Target/AMDGPU/AMDGPUISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2720,6 +2720,7 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(INTERP_P2)
NODE_NAME_CASE(STORE_MSKOR)
NODE_NAME_CASE(TBUFFER_STORE_FORMAT)
NODE_NAME_CASE(M2S_METADATA)
case AMDGPUISD::LAST_AMDGPU_ISD_NUMBER: break;
}
return nullptr;
Expand Down
2 changes: 2 additions & 0 deletions lib/Target/AMDGPU/AMDGPUISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -299,6 +299,8 @@ enum NodeType : unsigned {
STORE_MSKOR,
LOAD_CONSTANT,
TBUFFER_STORE_FORMAT,
// Multi2Sim node
M2S_METADATA,
LAST_AMDGPU_ISD_NUMBER
};

Expand Down
3 changes: 3 additions & 0 deletions lib/Target/AMDGPU/AMDGPUSubtarget.h
Original file line number Diff line number Diff line change
Expand Up @@ -285,6 +285,9 @@ class AMDGPUSubtarget : public AMDGPUGenSubtargetInfo {
bool isAmdHsaOS() const {
return TargetTriple.getOS() == Triple::AMDHSA;
}
bool isM2S() const {
return TargetTriple.getOS() == Triple::M2S;
}
bool isVGPRSpillingEnabled(const SIMachineFunctionInfo *MFI) const;

unsigned getMaxWavesPerCU() const {
Expand Down
112 changes: 97 additions & 15 deletions lib/Target/AMDGPU/SIISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -592,7 +592,7 @@ SDValue SITargetLowering::LowerFormalArguments(
}

// The pointer to the list of arguments is stored in SGPR0, SGPR1
// The pointer to the scratch buffer is stored in SGPR2, SGPR3
// The pointer to the scratch buffer is stored in SGPR2, SGPR3
if (Info->getShaderType() == ShaderType::COMPUTE) {
if (Subtarget->isAmdHsaOS())
Info->NumUserSGPRs = 2; // FIXME: Need to support scratch buffers.
Expand Down Expand Up @@ -1023,6 +1023,44 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
SDLoc DL(Op);
unsigned IntrinsicID = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();

// Multi2sim
if (Subtarget->isM2S()) {
switch (IntrinsicID) {
case Intrinsic::r600_read_ngroups_x:
return getM2SMetadata(DAG, VT, VT, DL, DAG.getEntryNode(),
SI::M2SMetadataOffsets::NGROUPS_X, false);
case Intrinsic::r600_read_ngroups_y:
return getM2SMetadata(DAG, VT, VT, DL, DAG.getEntryNode(),
SI::M2SMetadataOffsets::NGROUPS_Y, false);
case Intrinsic::r600_read_ngroups_z:
return getM2SMetadata(DAG, VT, VT, DL, DAG.getEntryNode(),
SI::M2SMetadataOffsets::NGROUPS_Z, false);
case Intrinsic::r600_read_global_size_x:
return getM2SMetadata(DAG, VT, VT, DL, DAG.getEntryNode(),
SI::M2SMetadataOffsets::GLOBAL_SIZE_X, false);
case Intrinsic::r600_read_global_size_y:
return getM2SMetadata(DAG, VT, VT, DL, DAG.getEntryNode(),
SI::M2SMetadataOffsets::GLOBAL_SIZE_Y, false);
case Intrinsic::r600_read_global_size_z:
return getM2SMetadata(DAG, VT, VT, DL, DAG.getEntryNode(),
SI::M2SMetadataOffsets::GLOBAL_SIZE_Z, false);
case Intrinsic::r600_read_local_size_x:
return getM2SMetadata(DAG, VT, VT, DL, DAG.getEntryNode(),
SI::M2SMetadataOffsets::LOCAL_SIZE_X, false);
case Intrinsic::r600_read_local_size_y:
return getM2SMetadata(DAG, VT, VT, DL, DAG.getEntryNode(),
SI::M2SMetadataOffsets::LOCAL_SIZE_Y, false);
case Intrinsic::r600_read_local_size_z:
return getM2SMetadata(DAG, VT, VT, DL, DAG.getEntryNode(),
SI::M2SMetadataOffsets::LOCAL_SIZE_Z, false);
case Intrinsic::AMDGPU_read_workdim:
return getM2SMetadata(DAG, VT, VT, DL, DAG.getEntryNode(),
getImplicitParameterOffset(MFI, GRID_DIM), false);
default:
break;
}
}

switch (IntrinsicID) {
case Intrinsic::r600_read_ngroups_x:
return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
Expand Down Expand Up @@ -1051,29 +1089,33 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
case Intrinsic::r600_read_local_size_z:
return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
SI::KernelInputOffsets::LOCAL_SIZE_Z, false);

case Intrinsic::AMDGPU_read_workdim:
return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
getImplicitParameterOffset(MFI, GRID_DIM), false);

case Intrinsic::r600_read_tgid_x:
return CreateLiveInRegister(DAG, &AMDGPU::SReg_32RegClass,
TRI->getPreloadedValue(MF, SIRegisterInfo::TGID_X), VT);
return CreateLiveInRegister(
DAG, &AMDGPU::SReg_32RegClass,
TRI->getPreloadedValue(MF, SIRegisterInfo::TGID_X), VT);
case Intrinsic::r600_read_tgid_y:
return CreateLiveInRegister(DAG, &AMDGPU::SReg_32RegClass,
TRI->getPreloadedValue(MF, SIRegisterInfo::TGID_Y), VT);
return CreateLiveInRegister(
DAG, &AMDGPU::SReg_32RegClass,
TRI->getPreloadedValue(MF, SIRegisterInfo::TGID_Y), VT);
case Intrinsic::r600_read_tgid_z:
return CreateLiveInRegister(DAG, &AMDGPU::SReg_32RegClass,
TRI->getPreloadedValue(MF, SIRegisterInfo::TGID_Z), VT);
return CreateLiveInRegister(
DAG, &AMDGPU::SReg_32RegClass,
TRI->getPreloadedValue(MF, SIRegisterInfo::TGID_Z), VT);
case Intrinsic::r600_read_tidig_x:
return CreateLiveInRegister(DAG, &AMDGPU::VGPR_32RegClass,
TRI->getPreloadedValue(MF, SIRegisterInfo::TIDIG_X), VT);
return CreateLiveInRegister(
DAG, &AMDGPU::VGPR_32RegClass,
TRI->getPreloadedValue(MF, SIRegisterInfo::TIDIG_X), VT);
case Intrinsic::r600_read_tidig_y:
return CreateLiveInRegister(DAG, &AMDGPU::VGPR_32RegClass,
TRI->getPreloadedValue(MF, SIRegisterInfo::TIDIG_Y), VT);
return CreateLiveInRegister(
DAG, &AMDGPU::VGPR_32RegClass,
TRI->getPreloadedValue(MF, SIRegisterInfo::TIDIG_Y), VT);
case Intrinsic::r600_read_tidig_z:
return CreateLiveInRegister(DAG, &AMDGPU::VGPR_32RegClass,
TRI->getPreloadedValue(MF, SIRegisterInfo::TIDIG_Z), VT);
return CreateLiveInRegister(
DAG, &AMDGPU::VGPR_32RegClass,
TRI->getPreloadedValue(MF, SIRegisterInfo::TIDIG_Z), VT);
case AMDGPUIntrinsic::SI_load_const: {
SDValue Ops[] = {
Op.getOperand(1),
Expand Down Expand Up @@ -2338,3 +2380,43 @@ SITargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
}
return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
}

//===----------------------------------------------------------------------===//
// Multi2Sim related code
//===----------------------------------------------------------------------===//

// Most workitem functions load data imm_const_buffer_1 with offset
// Workitem function will be lowered to s_buffer_load_dword SGRR, S[4:7], offset
SDValue SITargetLowering::getM2SMetadata(SelectionDAG &DAG, EVT VT, EVT MemVT,
SDLoc DL, SDValue Chain,
unsigned Offset, bool Signed) const {
const SIRegisterInfo *TRI =
static_cast<const SIRegisterInfo *>(Subtarget->getRegisterInfo());
MachineFunction &MF = DAG.getMachineFunction();

// Buffer description of imm_const_buffer_1 is preloaded to SReg[4:7]
unsigned ImmConstBufferOne =
TRI->getPreloadedValue(MF, SIRegisterInfo::IMM_CONST_BUFFER_ONE);
MF.addLiveIn(ImmConstBufferOne, &AMDGPU::SReg_128RegClass);

MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo();

SDValue ImmConstBufferOneReg = DAG.getCopyFromReg(
Chain, DL, MRI.getLiveInVirtReg(ImmConstBufferOne), MVT::v4i32);

SDValue ImmOffset = DAG.getConstant(Offset, DL, MVT::i32);

const SDValue Ops[] = {ImmConstBufferOneReg, ImmOffset};

SDVTList VTs = DAG.getVTList(MVT::i32);

MachineMemOperand *MMO = MF.getMachineMemOperand(
MachinePointerInfo(),
MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant,
VT.getStoreSize(), 4);

SDValue M2sMetadataNode =
DAG.getMemIntrinsicNode(AMDGPUISD::M2S_METADATA, DL, VTs, Ops, VT, MMO);
return M2sMetadataNode;
}

5 changes: 5 additions & 0 deletions lib/Target/AMDGPU/SIISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,11 @@ class SITargetLowering : public AMDGPUTargetLowering {

bool isLegalFlatAddressingMode(const AddrMode &AM) const;
bool isLegalMUBUFAddressingMode(const AddrMode &AM) const;

// Multi2Sim
SDValue getM2SMetadata(SelectionDAG &DAG, EVT VT, EVT MemVT, SDLoc DL,
SDValue Chain, unsigned Offset, bool Signed) const;

public:
SITargetLowering(TargetMachine &tm, const AMDGPUSubtarget &STI);

Expand Down
57 changes: 57 additions & 0 deletions lib/Target/AMDGPU/SIInstrInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -381,6 +381,63 @@ enum Offsets {
};

} // End namespace KernelInputOffsets

// Metadata offset in imm_const_buffer_1
enum M2SMetadataOffsets {
GLOBAL_SIZE_X = 0,
GLOBAL_SIZE_Y = 4,
GLOBAL_SIZE_Z = 8,
WORK_DIM = 12,
LOCAL_SIZE_X = 16,
LOCAL_SIZE_Y = 20,
LOCAL_SIZE_Z = 24,
RESERVED_0 = 28,
NGROUPS_X = 32,
NGROUPS_Y = 36,
NGROUPS_Z = 40,
RESERVED_1 = 44,
OFFSET_TO_PRIVATE_MEM_RING = 48,
PRIVATE_MEM_ALLOCATED_PER_THREAD = 52,
RESERVED_2 = 56,
RESERVED_3 = 60,
OFFSET_TO_LOCAL_MEM_RING = 64,
LOCAL_MEM_ALLOCATED_PER_GROUP = 68,
RESERVED_4 = 72,
PTR_MATH_LIB = 76,
IEEE_32_FLOAT_ZERO = 80,
IEEE_32_FLOAT_HALF = 84,
IEEE_32_FLOAT_ONE = 88,
IEEE_32_FLOAT_TWO = 92,
GLOBAL_OFFSET_X_THREAD_SPWAN = 96,
GLOBAL_OFFSET_Y_THREAD_SPAWN = 100,
GLOBAL_OFFSET_Z_THREAD_SPAWN = 104,
GLOBAL_OFFSET_FLAT_THREAD_SPAWN = 108,
GROUP_OFFSET_X_THREAD_SPAWN = 112,
GROUP_OFFSET_Y_THREAD_SPAWN = 116,
GROUP_OFFSET_Z_THREAD_SPAWN = 120,
GROUP_OFFSET_FLAT_THREAD_SPAWN = 124,
OFFSET_DATA_SEG_GLOBAL_MEM = 128,
OFFSET_PRINT_SUPPORT = 132,
SIZE_PRINT_BUFFER = 136,
RESERVED_5 = 140,
GLOBAL_OFFSET_X = 144,
GLOBAL_OFFSET_Y = 148,
GLOBAL_OFFSET_Z = 152,
RESERVED_6 = 156,
REGION_WORK_SIZE_X = 160,
REGION_WORK_SIZE_Y = 164,
REGION_WORK_SIZE_Z = 168,
RESERVED_7 = 172,
GLOBAL_SIZE_X_DIV_REGION_WORK_SIZE_X = 176,
GLOBAL_SIZE_Y_DIV_REGION_WORK_SIZE_Y = 180,
GLOBAL_SIZE_Z_DIV_REGION_WORK_SIZE_Z = 184,
RESERVED_8 = 188,
REGION_WORK_SIZE_X_DIV_LOCAL_SIZE_X = 192,
REGION_WORK_SIZE_Y_DIV_LOCAL_SIZE_Y = 196,
REGION_WORK_SIZE_Z_DIV_LOCAL_SIZE_Z = 200,
OFFSET_SW_GWS = 204
};

} // End namespace SI

} // End namespace llvm
Expand Down
6 changes: 6 additions & 0 deletions lib/Target/AMDGPU/SIInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,12 @@ def SIload_constant : SDNode<"AMDGPUISD::LOAD_CONSTANT",
[SDNPMayLoad, SDNPMemOperand]
>;

// Multi2Sim node
def m2s_metadata : SDNode<"AMDGPUISD::M2S_METADATA",
SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisVT<1, v4i32>, SDTCisVT<2, i32>]>,
[SDNPMayLoad]
>;

def SItbuffer_store : SDNode<"AMDGPUISD::TBUFFER_STORE_FORMAT",
SDTypeProfile<0, 13,
[SDTCisVT<0, v4i32>, // rsrc(SGPR)
Expand Down
9 changes: 9 additions & 0 deletions lib/Target/AMDGPU/SIInstructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -2077,6 +2077,15 @@ def : Pat <
$src0, $src1, $src2, $src3)
>;

/* Multi2Sim */
//
// Pattern to load metadata from imm_const_buffer_1 for most workitem functions
def : Pat <
(m2s_metadata v4i32:$sbase, i32:$offset),
(i32 (S_BUFFER_LOAD_DWORD_IMM $sbase, (as_dword_i32imm $offset)))
>;


//===----------------------------------------------------------------------===//
// SMRD Patterns
//===----------------------------------------------------------------------===//
Expand Down
7 changes: 7 additions & 0 deletions lib/Target/AMDGPU/SIRegisterInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -486,6 +486,13 @@ unsigned SIRegisterInfo::getPreloadedValue(const MachineFunction &MF,
return AMDGPU::VGPR1;
case SIRegisterInfo::TIDIG_Z:
return AMDGPU::VGPR2;
// Multi2Sim
case SIRegisterInfo::PTR_UAV_TABLE:
return AMDGPU::SGPR0_SGPR1;
case SIRegisterInfo::IMM_CONST_BUFFER_ZERO:
return AMDGPU::SGPR4_SGPR5_SGPR6_SGPR7;
case SIRegisterInfo::IMM_CONST_BUFFER_ONE:
return AMDGPU::SGPR8_SGPR9_SGPR10_SGPR11;
}
llvm_unreachable("unexpected preloaded value type");
}
Expand Down
5 changes: 4 additions & 1 deletion lib/Target/AMDGPU/SIRegisterInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,10 @@ struct SIRegisterInfo : public AMDGPURegisterInfo {
INPUT_PTR,
TIDIG_X,
TIDIG_Y,
TIDIG_Z
TIDIG_Z,
PTR_UAV_TABLE,
IMM_CONST_BUFFER_ZERO,
IMM_CONST_BUFFER_ONE
};

/// \brief Returns the physical register that \p Value is stored in.
Expand Down