Skip to content

Commit

Permalink
[CUDA][HIP] Externalize kernels in anonymous name space
Browse files Browse the repository at this point in the history
kernels in anonymous name space needs to have unique name
to avoid duplicate symbols.

Fixes: llvm#54560

Reviewed by: Artem Belevich

Differential Revision: https://reviews.llvm.org/D123353
  • Loading branch information
yxsamliu committed Apr 20, 2022
1 parent 87c1150 commit 63fa543
Show file tree
Hide file tree
Showing 6 changed files with 52 additions and 25 deletions.
8 changes: 4 additions & 4 deletions clang/include/clang/AST/ASTContext.h
Original file line number Diff line number Diff line change
Expand Up @@ -3289,11 +3289,11 @@ OPT_LIST(V)
/// Return a new OMPTraitInfo object owned by this context.
OMPTraitInfo &getNewOMPTraitInfo();

/// Whether a C++ static variable may be externalized.
bool mayExternalizeStaticVar(const Decl *D) const;
/// Whether a C++ static variable or CUDA/HIP kernel may be externalized.
bool mayExternalize(const Decl *D) const;

/// Whether a C++ static variable should be externalized.
bool shouldExternalizeStaticVar(const Decl *D) const;
/// Whether a C++ static variable or CUDA/HIP kernel should be externalized.
bool shouldExternalize(const Decl *D) const;

StringRef getCUIDHash() const;

Expand Down
18 changes: 10 additions & 8 deletions clang/lib/AST/ASTContext.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11329,7 +11329,7 @@ static GVALinkage adjustGVALinkageForAttributes(const ASTContext &Context,
// name between the host and device compilation which is the same for the
// same compilation unit whereas different among different compilation
// units.
if (Context.shouldExternalizeStaticVar(D))
if (Context.shouldExternalize(D))
return GVA_StrongExternal;
}
return L;
Expand Down Expand Up @@ -12278,22 +12278,24 @@ operator<<(const StreamingDiagnostic &DB,
return DB << "a prior #pragma section";
}

bool ASTContext::mayExternalizeStaticVar(const Decl *D) const {
bool ASTContext::mayExternalize(const Decl *D) const {
bool IsStaticVar =
isa<VarDecl>(D) && cast<VarDecl>(D)->getStorageClass() == SC_Static;
bool IsExplicitDeviceVar = (D->hasAttr<CUDADeviceAttr>() &&
!D->getAttr<CUDADeviceAttr>()->isImplicit()) ||
(D->hasAttr<CUDAConstantAttr>() &&
!D->getAttr<CUDAConstantAttr>()->isImplicit());
// CUDA/HIP: static managed variables need to be externalized since it is
// a declaration in IR, therefore cannot have internal linkage.
return IsStaticVar &&
(D->hasAttr<HIPManagedAttr>() || IsExplicitDeviceVar);
// a declaration in IR, therefore cannot have internal linkage. Kernels in
// anonymous name space needs to be externalized to avoid duplicate symbols.
return (IsStaticVar &&
(D->hasAttr<HIPManagedAttr>() || IsExplicitDeviceVar)) ||
(D->hasAttr<CUDAGlobalAttr>() && D->isInAnonymousNamespace());
}

bool ASTContext::shouldExternalizeStaticVar(const Decl *D) const {
return mayExternalizeStaticVar(D) &&
(D->hasAttr<HIPManagedAttr>() ||
bool ASTContext::shouldExternalize(const Decl *D) const {
return mayExternalize(D) &&
(D->hasAttr<HIPManagedAttr>() || D->hasAttr<CUDAGlobalAttr>() ||
CUDADeviceVarODRUsedByHost.count(cast<VarDecl>(D)));
}

Expand Down
4 changes: 2 additions & 2 deletions clang/lib/CodeGen/CGCUDANV.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -281,13 +281,13 @@ std::string CGNVCUDARuntime::getDeviceSideName(const NamedDecl *ND) {
DeviceSideName = std::string(ND->getIdentifier()->getName());

// Make unique name for device side static file-scope variable for HIP.
if (CGM.getContext().shouldExternalizeStaticVar(ND) &&
if (CGM.getContext().shouldExternalize(ND) &&
CGM.getLangOpts().GPURelocatableDeviceCode &&
!CGM.getLangOpts().CUID.empty()) {
SmallString<256> Buffer;
llvm::raw_svector_ostream Out(Buffer);
Out << DeviceSideName;
CGM.printPostfixForExternalizedStaticVar(Out);
CGM.printPostfixForExternalizedDecl(Out, ND);
DeviceSideName = std::string(Out.str());
}
return DeviceSideName;
Expand Down
16 changes: 8 additions & 8 deletions clang/lib/CodeGen/CodeGenModule.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1382,10 +1382,10 @@ static std::string getMangledNameImpl(CodeGenModule &CGM, GlobalDecl GD,
}

// Make unique name for device side static file-scope variable for HIP.
if (CGM.getContext().shouldExternalizeStaticVar(ND) &&
if (CGM.getContext().shouldExternalize(ND) &&
CGM.getLangOpts().GPURelocatableDeviceCode &&
CGM.getLangOpts().CUDAIsDevice && !CGM.getLangOpts().CUID.empty())
CGM.printPostfixForExternalizedStaticVar(Out);
CGM.printPostfixForExternalizedDecl(Out, ND);
return std::string(Out.str());
}

Expand Down Expand Up @@ -1452,8 +1452,7 @@ StringRef CodeGenModule::getMangledName(GlobalDecl GD) {
// static device variable depends on whether the variable is referenced by
// a host or device host function. Therefore the mangled name cannot be
// cached.
if (!LangOpts.CUDAIsDevice ||
!getContext().mayExternalizeStaticVar(GD.getDecl())) {
if (!LangOpts.CUDAIsDevice || !getContext().mayExternalize(GD.getDecl())) {
auto FoundName = MangledDeclNames.find(CanonicalGD);
if (FoundName != MangledDeclNames.end())
return FoundName->second;
Expand All @@ -1473,7 +1472,7 @@ StringRef CodeGenModule::getMangledName(GlobalDecl GD) {
// directly between host- and device-compilations, the host- and
// device-mangling in host compilation could help catching certain ones.
assert(!isa<FunctionDecl>(ND) || !ND->hasAttr<CUDAGlobalAttr>() ||
getLangOpts().CUDAIsDevice ||
getContext().shouldExternalize(ND) || getLangOpts().CUDAIsDevice ||
(getContext().getAuxTargetInfo() &&
(getContext().getAuxTargetInfo()->getCXXABI() !=
getContext().getTargetInfo().getCXXABI())) ||
Expand Down Expand Up @@ -6778,9 +6777,10 @@ bool CodeGenModule::stopAutoInit() {
return false;
}

void CodeGenModule::printPostfixForExternalizedStaticVar(
llvm::raw_ostream &OS) const {
OS << "__static__" << getContext().getCUIDHash();
void CodeGenModule::printPostfixForExternalizedDecl(llvm::raw_ostream &OS,
const Decl *D) const {
OS << (isa<VarDecl>(D) ? "__static__" : ".anon.")
<< getContext().getCUIDHash();
}

namespace {
Expand Down
7 changes: 4 additions & 3 deletions clang/lib/CodeGen/CodeGenModule.h
Original file line number Diff line number Diff line change
Expand Up @@ -1459,9 +1459,10 @@ class CodeGenModule : public CodeGenTypeCache {
TBAAAccessInfo *TBAAInfo = nullptr);
bool stopAutoInit();

/// Print the postfix for externalized static variable for single source
/// offloading languages CUDA and HIP.
void printPostfixForExternalizedStaticVar(llvm::raw_ostream &OS) const;
/// Print the postfix for externalized static variable or kernels for single
/// source offloading languages CUDA and HIP.
void printPostfixForExternalizedDecl(llvm::raw_ostream &OS,
const Decl *D) const;

/// Helper functions for generating a NoLoop kernel
/// For a captured statement, get the single For statement, if it exists,
Expand Down
24 changes: 24 additions & 0 deletions clang/test/CodeGenCUDA/kernel-in-anon-ns.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -cuid=abc \
// RUN: -aux-triple x86_64-unknown-linux-gnu -std=c++11 -fgpu-rdc \
// RUN: -emit-llvm -o - -x hip %s > %t.dev

// RUN: %clang_cc1 -triple x86_64-gnu-linux -cuid=abc \
// RUN: -aux-triple amdgcn-amd-amdhsa -std=c++11 -fgpu-rdc \
// RUN: -emit-llvm -o - -x hip %s > %t.host

// RUN: cat %t.dev %t.host | FileCheck %s

#include "Inputs/cuda.h"

// CHECK: define weak_odr {{.*}}void @[[KERN:_ZN12_GLOBAL__N_16kernelEv\.anon\.b04fd23c98500190]](
// CHECK: @[[STR:.*]] = {{.*}} c"[[KERN]]\00"
// CHECK: call i32 @__hipRegisterFunction({{.*}}@[[STR]]

namespace {
__global__ void kernel() {
}
}

void test() {
kernel<<<1, 1>>>();
}

0 comments on commit 63fa543

Please sign in to comment.