Skip to content

[NVPTX] Add support for Shared Cluster Memory address space [1/2] #135444

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Apr 22, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions clang/lib/Basic/Targets/NVPTX.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -71,10 +71,11 @@ NVPTXTargetInfo::NVPTXTargetInfo(const llvm::Triple &Triple,

if (TargetPointerWidth == 32)
resetDataLayout(
"e-p:32:32-p6:32:32-i64:64-i128:128-v16:16-v32:32-n16:32:64");
"e-p:32:32-p6:32:32-p7:32:32-i64:64-i128:128-v16:16-v32:32-n16:32:64");
else if (Opts.NVPTXUseShortPointers)
resetDataLayout("e-p3:32:32-p4:32:32-p5:32:32-p6:32:32-i64:64-i128:128-v16:"
"16-v32:32-n16:32:64");
resetDataLayout(
"e-p3:32:32-p4:32:32-p5:32:32-p6:32:32-p7:32:32-i64:64-i128:128-v16:"
"16-v32:32-n16:32:64");
else
resetDataLayout("e-p6:32:32-i64:64-i128:128-v16:16-v32:32-n16:32:64");

Expand Down
2 changes: 1 addition & 1 deletion clang/test/CodeGen/target-data.c
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@

// RUN: %clang_cc1 -triple nvptx-unknown -o - -emit-llvm %s | \
// RUN: FileCheck %s -check-prefix=NVPTX
// NVPTX: target datalayout = "e-p:32:32-p6:32:32-i64:64-i128:128-v16:16-v32:32-n16:32:64"
// NVPTX: target datalayout = "e-p:32:32-p6:32:32-p7:32:32-i64:64-i128:128-v16:16-v32:32-n16:32:64"

// RUN: %clang_cc1 -triple nvptx64-unknown -o - -emit-llvm %s | \
// RUN: FileCheck %s -check-prefix=NVPTX64
Expand Down
1 change: 1 addition & 0 deletions llvm/include/llvm/Support/NVPTXAddrSpace.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ enum AddressSpace : unsigned {
ADDRESS_SPACE_CONST = 4,
ADDRESS_SPACE_LOCAL = 5,
ADDRESS_SPACE_TENSOR = 6,
ADDRESS_SPACE_SHARED_CLUSTER = 7,

ADDRESS_SPACE_PARAM = 101,
};
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -285,6 +285,7 @@ void NVPTXInstPrinter::printLdStCode(const MCInst *MI, int OpNum,
case NVPTX::AddressSpace::Global:
case NVPTX::AddressSpace::Const:
case NVPTX::AddressSpace::Shared:
case NVPTX::AddressSpace::SharedCluster:
case NVPTX::AddressSpace::Param:
case NVPTX::AddressSpace::Local:
O << "." << A;
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/NVPTX/NVPTX.h
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,7 @@ enum AddressSpace : AddressSpaceUnderlyingType {
Shared = 3,
Const = 4,
Local = 5,
SharedCluster = 7,

// NVPTX Backend Private:
Param = 101
Expand Down
6 changes: 6 additions & 0 deletions llvm/lib/Target/NVPTX/NVPTXAliasAnalysis.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,12 @@ static AliasResult::Kind getAliasResult(unsigned AS1, unsigned AS2) {
// TODO: cvta.param is not yet supported. We need to change aliasing
// rules once it is added.

// Distributed shared memory aliases with shared memory.
if (((AS1 == ADDRESS_SPACE_SHARED) &&
(AS2 == ADDRESS_SPACE_SHARED_CLUSTER)) ||
((AS1 == ADDRESS_SPACE_SHARED_CLUSTER) && (AS2 == ADDRESS_SPACE_SHARED)))
return AliasResult::MayAlias;

return (AS1 == AS2 ? AliasResult::MayAlias : AliasResult::NoAlias);
}

Expand Down
17 changes: 16 additions & 1 deletion llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -513,6 +513,8 @@ static std::optional<unsigned> convertAS(unsigned AS) {
return NVPTX::AddressSpace::Global;
case llvm::ADDRESS_SPACE_SHARED:
return NVPTX::AddressSpace::Shared;
case llvm::ADDRESS_SPACE_SHARED_CLUSTER:
return NVPTX::AddressSpace::SharedCluster;
case llvm::ADDRESS_SPACE_GENERIC:
return NVPTX::AddressSpace::Generic;
case llvm::ADDRESS_SPACE_PARAM:
Expand Down Expand Up @@ -658,7 +660,8 @@ getOperationOrderings(MemSDNode *N, const NVPTXSubtarget *Subtarget) {
bool AddrGenericOrGlobalOrShared =
(CodeAddrSpace == NVPTX::AddressSpace::Generic ||
CodeAddrSpace == NVPTX::AddressSpace::Global ||
CodeAddrSpace == NVPTX::AddressSpace::Shared);
CodeAddrSpace == NVPTX::AddressSpace::Shared ||
CodeAddrSpace == NVPTX::AddressSpace::SharedCluster);
if (!AddrGenericOrGlobalOrShared)
return NVPTX::Ordering::NotAtomic;

Expand Down Expand Up @@ -979,6 +982,12 @@ void NVPTXDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) {
case ADDRESS_SPACE_SHARED:
Opc = TM.is64Bit() ? NVPTX::cvta_shared_64 : NVPTX::cvta_shared;
break;
case ADDRESS_SPACE_SHARED_CLUSTER:
if (!TM.is64Bit())
report_fatal_error(
"Shared cluster address space is only supported in 64-bit mode");
Opc = NVPTX::cvta_shared_cluster_64;
break;
case ADDRESS_SPACE_CONST:
Opc = TM.is64Bit() ? NVPTX::cvta_const_64 : NVPTX::cvta_const;
break;
Expand All @@ -1004,6 +1013,12 @@ void NVPTXDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) {
case ADDRESS_SPACE_SHARED:
Opc = TM.is64Bit() ? NVPTX::cvta_to_shared_64 : NVPTX::cvta_to_shared;
break;
case ADDRESS_SPACE_SHARED_CLUSTER:
if (!TM.is64Bit())
report_fatal_error(
"Shared cluster address space is only supported in 64-bit mode");
Opc = NVPTX::cvta_to_shared_cluster_64;
break;
case ADDRESS_SPACE_CONST:
Opc = TM.is64Bit() ? NVPTX::cvta_to_const_64 : NVPTX::cvta_to_const;
break;
Expand Down
21 changes: 20 additions & 1 deletion llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3043,8 +3043,27 @@ SDValue NVPTXTargetLowering::LowerADDRSPACECAST(SDValue Op,
unsigned SrcAS = N->getSrcAddressSpace();
unsigned DestAS = N->getDestAddressSpace();
if (SrcAS != llvm::ADDRESS_SPACE_GENERIC &&
DestAS != llvm::ADDRESS_SPACE_GENERIC)
DestAS != llvm::ADDRESS_SPACE_GENERIC) {
// Shared and SharedCluster can be converted to each other through generic
// space
if ((SrcAS == llvm::ADDRESS_SPACE_SHARED &&
DestAS == llvm::ADDRESS_SPACE_SHARED_CLUSTER) ||
(SrcAS == llvm::ADDRESS_SPACE_SHARED_CLUSTER &&
DestAS == llvm::ADDRESS_SPACE_SHARED)) {
SDLoc DL(Op.getNode());
const MVT GenerictVT =
getPointerTy(DAG.getDataLayout(), ADDRESS_SPACE_GENERIC);
SDValue GenericConversion = DAG.getAddrSpaceCast(
DL, GenerictVT, Op.getOperand(0), SrcAS, ADDRESS_SPACE_GENERIC);
SDValue SharedClusterConversion =
DAG.getAddrSpaceCast(DL, Op.getValueType(), GenericConversion,
ADDRESS_SPACE_GENERIC, DestAS);
return SharedClusterConversion;
}

return DAG.getUNDEF(Op.getValueType());
}

return Op;
}

Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,7 @@ def hasAtomBitwise64 : Predicate<"Subtarget->hasAtomBitwise64()">;
def hasAtomMinMax64 : Predicate<"Subtarget->hasAtomMinMax64()">;
def hasVote : Predicate<"Subtarget->hasVote()">;
def hasDouble : Predicate<"Subtarget->hasDouble()">;
def hasClusters : Predicate<"Subtarget->hasClusters()">;
def hasLDG : Predicate<"Subtarget->hasLDG()">;
def hasLDU : Predicate<"Subtarget->hasLDU()">;
def hasPTXASUnreachableBug : Predicate<"Subtarget->hasPTXASUnreachableBug()">;
Expand Down
35 changes: 24 additions & 11 deletions llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,9 @@ def AS_match {
code shared = [{
return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_SHARED);
}];
code shared_cluster = [{
return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_SHARED_CLUSTER);
}];
code global = [{
return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GLOBAL);
}];
Expand Down Expand Up @@ -2039,10 +2042,11 @@ class ATOMIC_GLOBAL_CHK <dag frag>
: PatFrag<!setdagop(frag, ops), frag, AS_match.global>;
class ATOMIC_SHARED_CHK <dag frag>
: PatFrag<!setdagop(frag, ops), frag, AS_match.shared>;
class ATOMIC_SHARED_CLUSTER_CHK <dag frag>
: PatFrag<!setdagop(frag, ops), frag, AS_match.shared_cluster>;
class ATOMIC_GENERIC_CHK <dag frag>
: PatFrag<!setdagop(frag, ops), frag, AS_match.generic>;


multiclass F_ATOMIC_2<RegTyInfo t, string sem_str, string as_str, string op_str,
SDPatternOperator op, list<Predicate> preds> {
defvar asm_str = "atom" # sem_str # as_str # "." # op_str # " \t$dst, [$addr], $b;";
Expand Down Expand Up @@ -2094,13 +2098,15 @@ multiclass F_ATOMIC_2_AS<RegTyInfo t, SDPatternOperator frag, string op_str, lis
defvar frag_pat = (frag node:$a, node:$b);
defm _G : F_ATOMIC_2<t, "", ".global", op_str, ATOMIC_GLOBAL_CHK<frag_pat>, preds>;
defm _S : F_ATOMIC_2<t, "", ".shared", op_str, ATOMIC_SHARED_CHK<frag_pat>, preds>;
defm _S_C : F_ATOMIC_2<t, "", ".shared::cluster", op_str, ATOMIC_SHARED_CLUSTER_CHK<frag_pat>, !listconcat([hasClusters], preds)>;
defm _GEN : F_ATOMIC_2<t, "", "", op_str, ATOMIC_GENERIC_CHK<frag_pat>, preds>;
}

multiclass F_ATOMIC_3_AS<RegTyInfo t, SDPatternOperator frag, string sem_str, string op_str, list<Predicate> preds = []> {
defvar frag_pat = (frag node:$a, node:$b, node:$c);
defm _G : F_ATOMIC_3<t, sem_str, ".global", op_str, ATOMIC_GLOBAL_CHK<frag_pat>, preds>;
defm _S : F_ATOMIC_3<t, sem_str, ".shared", op_str, ATOMIC_SHARED_CHK<frag_pat>, preds>;
defm _S_C : F_ATOMIC_3<t, sem_str, ".shared::cluster", op_str, ATOMIC_SHARED_CLUSTER_CHK<frag_pat>, !listconcat([hasClusters], preds)>;
defm _GEN : F_ATOMIC_3<t, sem_str, "", op_str, ATOMIC_GENERIC_CHK<frag_pat>, preds>;
}

Expand Down Expand Up @@ -2381,25 +2387,32 @@ def INT_PTX_LDG_G_v4i32_ELE : VLDG_G_ELE_V4<"u32", Int32Regs>;
def INT_PTX_LDG_G_v4f32_ELE : VLDG_G_ELE_V4<"f32", Float32Regs>;


multiclass NG_TO_G<string Str> {
def "" : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src),
"cvta." # Str # ".u32 \t$result, $src;", []>;
def _64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src),
"cvta." # Str # ".u64 \t$result, $src;", []>;
multiclass NG_TO_G<string Str, bit Supports32 = 1, list<Predicate> Preds = []> {
if Supports32 then
def "" : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src),
"cvta." # Str # ".u32 \t$result, $src;", []>, Requires<Preds>;

def _64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src),
"cvta." # Str # ".u64 \t$result, $src;", []>, Requires<Preds>;
}

multiclass G_TO_NG<string Str> {
def "" : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src),
"cvta.to." # Str # ".u32 \t$result, $src;", []>;
def _64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src),
"cvta.to." # Str # ".u64 \t$result, $src;", []>;
multiclass G_TO_NG<string Str, bit Supports32 = 1, list<Predicate> Preds = []> {
if Supports32 then
def "" : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src),
"cvta.to." # Str # ".u32 \t$result, $src;", []>, Requires<Preds>;

def _64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src),
"cvta.to." # Str # ".u64 \t$result, $src;", []>, Requires<Preds>;
}

foreach space = ["local", "shared", "global", "const", "param"] in {
defm cvta_#space : NG_TO_G<space>;
defm cvta_to_#space : G_TO_NG<space>;
}

defm cvta_shared_cluster : NG_TO_G<"shared::cluster", false, [hasClusters]>;
defm cvta_to_shared_cluster : G_TO_NG<"shared::cluster", false, [hasClusters]>;

def : Pat<(int_nvvm_ptr_param_to_gen i32:$src),
(cvta_param $src)>;

Expand Down
18 changes: 11 additions & 7 deletions llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -117,13 +117,15 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeNVPTXTarget() {
static std::string computeDataLayout(bool is64Bit, bool UseShortPointers) {
std::string Ret = "e";

if (!is64Bit)
Ret += "-p:32:32";
else if (UseShortPointers)
Ret += "-p3:32:32-p4:32:32-p5:32:32";

// Tensor Memory (addrspace:6) is always 32-bits.
Ret += "-p6:32:32";
// Distributed Shared Memory (addrspace:7) follows shared memory
// (addrspace:3).
if (!is64Bit)
Ret += "-p:32:32-p6:32:32-p7:32:32";
else if (UseShortPointers) {
Ret += "-p3:32:32-p4:32:32-p5:32:32-p6:32:32-p7:32:32";
} else
Ret += "-p6:32:32";

Ret += "-i64:64-i128:128-v16:16-v32:32-n16:32:64";

Expand Down Expand Up @@ -280,8 +282,10 @@ NVPTXTargetMachine::getPredicatedAddrSpace(const Value *V) const {
case Intrinsic::nvvm_isspacep_local:
return std::make_pair(II->getArgOperand(0), llvm::ADDRESS_SPACE_LOCAL);
case Intrinsic::nvvm_isspacep_shared:
case Intrinsic::nvvm_isspacep_shared_cluster:
return std::make_pair(II->getArgOperand(0), llvm::ADDRESS_SPACE_SHARED);
case Intrinsic::nvvm_isspacep_shared_cluster:
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Technically this is conservative since nvvm_isspacep_shared_cluster == true implies that the pointer is also under ADDRESS_SPACE_SHARED. However the current interface only allows returning one address so that'll need to get updated. I'm planning on improving InferAddrSpaces to also detect from mapa/mapa_shared_cluster so I'll update this part as well during that.

return std::make_pair(II->getArgOperand(0),
llvm::ADDRESS_SPACE_SHARED_CLUSTER);
default:
break;
}
Expand Down
9 changes: 5 additions & 4 deletions llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -424,12 +424,13 @@ static std::optional<bool> evaluateIsSpace(Intrinsic::ID IID, unsigned AS) {
case Intrinsic::nvvm_isspacep_local:
return AS == NVPTXAS::ADDRESS_SPACE_LOCAL;
case Intrinsic::nvvm_isspacep_shared:
// If shared cluster this can't be evaluated at compile time.
if (AS == NVPTXAS::ADDRESS_SPACE_SHARED_CLUSTER)
return std::nullopt;
return AS == NVPTXAS::ADDRESS_SPACE_SHARED;
case Intrinsic::nvvm_isspacep_shared_cluster:
// We can't tell shared from shared_cluster at compile time from AS alone,
// but it can't be either is AS is not shared.
return AS == NVPTXAS::ADDRESS_SPACE_SHARED ? std::nullopt
: std::optional{false};
return AS == NVPTXAS::ADDRESS_SPACE_SHARED_CLUSTER ||
AS == NVPTXAS::ADDRESS_SPACE_SHARED;
case Intrinsic::nvvm_isspacep_const:
return AS == NVPTXAS::ADDRESS_SPACE_CONST;
default:
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Target/NVPTX/NVPTXUtilities.h
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,8 @@ inline std::string AddressSpaceToString(AddressSpace A) {
return "const";
case AddressSpace::Shared:
return "shared";
case AddressSpace::SharedCluster:
return "shared::cluster";
case AddressSpace::Param:
return "param";
case AddressSpace::Local:
Expand Down
Loading
Loading