Skip to content

Commit c28c683

Browse files
author
Artem Gindinson
committed
Merge remote-tracking branch 'intel/sycl' into nvptx-macro-init
Signed-off-by: Artem Gindinson <artem.gindinson@intel.com>
2 parents f8afe38 + ec61222 commit c28c683

36 files changed

+5870
-106
lines changed

buildbot/configure.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ def do_configure(args):
2626
libclc_targets_to_build = ''
2727
sycl_build_pi_cuda = 'OFF'
2828
sycl_build_pi_esimd_cpu = 'ON'
29+
sycl_build_pi_rocm = 'OFF'
2930
sycl_werror = 'ON'
3031
llvm_enable_assertions = 'ON'
3132
llvm_enable_doxygen = 'OFF'
@@ -47,6 +48,14 @@ def do_configure(args):
4748

4849
if args.disable_esimd_cpu:
4950
sycl_build_pi_esimd_cpu = 'OFF'
51+
52+
if args.rocm:
53+
llvm_targets_to_build += ';AMDGPU'
54+
# TODO libclc should be added once,
55+
# TODO when we build DPC++ with both CUDA and ROCM support
56+
llvm_enable_projects += ';libclc'
57+
libclc_targets_to_build = 'amdgcn--;amdgcn--amdhsa'
58+
sycl_build_pi_rocm = 'ON'
5059

5160
if args.no_werror:
5261
sycl_werror = 'OFF'
@@ -82,6 +91,7 @@ def do_configure(args):
8291
"-DLLVM_ENABLE_PROJECTS={}".format(llvm_enable_projects),
8392
"-DLIBCLC_TARGETS_TO_BUILD={}".format(libclc_targets_to_build),
8493
"-DSYCL_BUILD_PI_CUDA={}".format(sycl_build_pi_cuda),
94+
"-DSYCL_BUILD_PI_ROCM={}".format(sycl_build_pi_rocm),
8595
"-DLLVM_BUILD_TOOLS=ON",
8696
"-DSYCL_ENABLE_WERROR={}".format(sycl_werror),
8797
"-DCMAKE_INSTALL_PREFIX={}".format(install_dir),
@@ -151,6 +161,7 @@ def main():
151161
parser.add_argument("-t", "--build-type",
152162
metavar="BUILD_TYPE", default="Release", help="build type: Debug, Release")
153163
parser.add_argument("--cuda", action='store_true', help="switch from OpenCL to CUDA")
164+
parser.add_argument("--rocm", action='store_true', help="swith from OpenCL to ROCM")
154165
parser.add_argument("--arm", action='store_true', help="build ARM support rather than x86")
155166
parser.add_argument("--disable-esimd-cpu", action='store_true', help="build without ESIMD_CPU support")
156167
parser.add_argument("--no-assertions", action='store_true', help="build without assertions")

clang/include/clang/Analysis/CallGraph.h

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929

3030
namespace clang {
3131

32+
class ASTContext;
3233
class CallGraphNode;
3334
class Decl;
3435
class DeclContext;
@@ -51,6 +52,12 @@ class CallGraph : public RecursiveASTVisitor<CallGraph> {
5152
/// This is a virtual root node that has edges to all the functions.
5253
CallGraphNode *Root;
5354

55+
/// A setting to determine whether this should include calls that are done in
56+
/// a constant expression's context. This DOES require the ASTContext object
57+
/// for constexpr-if, so setting it requires a valid ASTContext.
58+
bool ShouldSkipConstexpr = false;
59+
ASTContext *Ctx;
60+
5461
public:
5562
CallGraph();
5663
~CallGraph();
@@ -66,7 +73,7 @@ class CallGraph : public RecursiveASTVisitor<CallGraph> {
6673
/// Determine if a declaration should be included in the graph.
6774
static bool includeInGraph(const Decl *D);
6875

69-
/// Determine if a declaration should be included in the graph for the
76+
/// Determine if a declaration should be included in the graph for the
7077
/// purposes of being a callee. This is similar to includeInGraph except
7178
/// it permits declarations, not just definitions.
7279
static bool includeCalleeInGraph(const Decl *D);
@@ -138,6 +145,15 @@ class CallGraph : public RecursiveASTVisitor<CallGraph> {
138145
bool shouldWalkTypesOfTypeLocs() const { return false; }
139146
bool shouldVisitTemplateInstantiations() const { return true; }
140147
bool shouldVisitImplicitCode() const { return true; }
148+
bool shouldSkipConstantExpressions() const { return ShouldSkipConstexpr; }
149+
void setSkipConstantExpressions(ASTContext &Context) {
150+
Ctx = &Context;
151+
ShouldSkipConstexpr = true;
152+
}
153+
ASTContext &getASTContext() {
154+
assert(Ctx);
155+
return *Ctx;
156+
}
141157

142158
private:
143159
/// Add the given declaration to the call graph.

clang/lib/Analysis/CallGraph.cpp

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
//===----------------------------------------------------------------------===//
1212

1313
#include "clang/Analysis/CallGraph.h"
14+
#include "clang/AST/ASTContext.h"
1415
#include "clang/AST/Decl.h"
1516
#include "clang/AST/DeclBase.h"
1617
#include "clang/AST/DeclObjC.h"
@@ -136,6 +137,37 @@ class CGBuilder : public StmtVisitor<CGBuilder> {
136137
}
137138
}
138139

140+
void VisitIfStmt(IfStmt *If) {
141+
if (G->shouldSkipConstantExpressions()) {
142+
if (llvm::Optional<Stmt *> ActiveStmt =
143+
If->getNondiscardedCase(G->getASTContext())) {
144+
if (*ActiveStmt)
145+
this->Visit(*ActiveStmt);
146+
return;
147+
}
148+
}
149+
150+
StmtVisitor::VisitIfStmt(If);
151+
}
152+
153+
void VisitDeclStmt(DeclStmt *DS) {
154+
if (G->shouldSkipConstantExpressions()) {
155+
auto IsConstexprVarDecl = [](Decl *D) {
156+
if (const auto *VD = dyn_cast<VarDecl>(D))
157+
return VD->isConstexpr();
158+
return false;
159+
};
160+
if (llvm::any_of(DS->decls(), IsConstexprVarDecl)) {
161+
assert(llvm::all_of(DS->decls(), IsConstexprVarDecl) &&
162+
"Situation where a decl-group would be a mix of decl types, or "
163+
"constexpr and not?");
164+
return;
165+
}
166+
}
167+
168+
StmtVisitor::VisitDeclStmt(DS);
169+
}
170+
139171
void VisitChildren(Stmt *S) {
140172
for (Stmt *SubStmt : S->children())
141173
if (SubStmt)

clang/lib/CodeGen/CGCall.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -760,7 +760,8 @@ CodeGenTypes::arrangeLLVMFunctionInfo(CanQualType resultType,
760760
unsigned CC = ClangCallConvToLLVMCallConv(info.getCC());
761761
// This is required so SYCL kernels are successfully processed by tools from CUDA. Kernels
762762
// with a `spir_kernel` calling convention are ignored otherwise.
763-
if (CC == llvm::CallingConv::SPIR_KERNEL && CGM.getTriple().isNVPTX() &&
763+
if (CC == llvm::CallingConv::SPIR_KERNEL &&
764+
(CGM.getTriple().isNVPTX() || CGM.getTriple().isAMDGCN()) &&
764765
getContext().getLangOpts().SYCLIsDevice) {
765766
CC = llvm::CallingConv::C;
766767
}

clang/lib/Driver/Driver.cpp

Lines changed: 34 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -700,6 +700,11 @@ static bool isValidSYCLTriple(llvm::Triple T) {
700700
// NVPTX is valid for SYCL.
701701
if (T.isNVPTX())
702702
return true;
703+
704+
// AMDGCN is valid for SYCL
705+
if (T.isAMDGCN())
706+
return true;
707+
703708
// Check for invalid SYCL device triple values.
704709
// Non-SPIR arch.
705710
if (!T.isSPIR())
@@ -3898,6 +3903,21 @@ class OffloadingActionBuilder final {
38983903
return BA;
38993904
}
39003905

3906+
Action *finalizeAMDGCNDependences(Action *Input, const llvm::Triple &TT) {
3907+
auto *BA = C.getDriver().ConstructPhaseAction(
3908+
C, Args, phases::Backend, Input, AssociatedOffloadKind);
3909+
3910+
auto *AA = C.getDriver().ConstructPhaseAction(C, Args, phases::Assemble,
3911+
BA, AssociatedOffloadKind);
3912+
3913+
ActionList AL = {AA};
3914+
Action *LinkAction = C.MakeAction<LinkJobAction>(AL, types::TY_Image);
3915+
ActionList HIPActions = {LinkAction};
3916+
Action *HIPFatBinary =
3917+
C.MakeAction<LinkJobAction>(HIPActions, types::TY_HIP_FATBIN);
3918+
return HIPFatBinary;
3919+
}
3920+
39013921
public:
39023922
SYCLActionBuilder(Compilation &C, DerivedArgList &Args,
39033923
const Driver::InputList &Inputs)
@@ -4294,6 +4314,7 @@ class OffloadingActionBuilder final {
42944314
ActionList LinkObjects;
42954315
auto TT = SYCLTripleList[I];
42964316
auto isNVPTX = (*TC)->getTriple().isNVPTX();
4317+
auto isAMDGCN = (*TC)->getTriple().isAMDGCN();
42974318
bool isSpirvAOT = TT.getSubArch() == llvm::Triple::SPIRSubArch_fpga ||
42984319
TT.getSubArch() == llvm::Triple::SPIRSubArch_gen ||
42994320
TT.getSubArch() == llvm::Triple::SPIRSubArch_x86_64;
@@ -4391,7 +4412,7 @@ class OffloadingActionBuilder final {
43914412
// When spv online link is supported by all backends, the fallback
43924413
// device libraries are only needed when current toolchain is using
43934414
// AOT compilation.
4394-
if (!isNVPTX) {
4415+
if (!isNVPTX && !isAMDGCN) {
43954416
SYCLDeviceLibLinked = addSYCLDeviceLibs(
43964417
*TC, FullLinkObjects, true,
43974418
C.getDefaultToolChain().getTriple().isWindowsMSVCEnvironment());
@@ -4405,7 +4426,7 @@ class OffloadingActionBuilder final {
44054426
FullDeviceLinkAction = DeviceLinkAction;
44064427
// setup some flags upfront
44074428

4408-
if (isNVPTX && DeviceCodeSplit) {
4429+
if ((isNVPTX || isAMDGCN) && DeviceCodeSplit) {
44094430
// TODO Temporary limitation, need to support code splitting for PTX
44104431
const Driver &D = C.getDriver();
44114432
const std::string &OptName =
@@ -4417,14 +4438,14 @@ class OffloadingActionBuilder final {
44174438
}
44184439
// reflects whether current target is ahead-of-time and can't support
44194440
// runtime setting of specialization constants
4420-
bool isAOT = isNVPTX || isSpirvAOT;
4441+
bool isAOT = isNVPTX || isAMDGCN || isSpirvAOT;
44214442
// TODO support device code split for NVPTX target
44224443

44234444
ActionList WrapperInputs;
44244445
// post link is not optional - even if not splitting, always need to
44254446
// process specialization constants
44264447
types::ID PostLinkOutType =
4427-
isNVPTX ? types::TY_LLVM_BC : types::TY_Tempfiletable;
4448+
isNVPTX || isAMDGCN ? types::TY_LLVM_BC : types::TY_Tempfiletable;
44284449
auto *PostLinkAction = C.MakeAction<SYCLPostLinkJobAction>(
44294450
FullDeviceLinkAction, PostLinkOutType);
44304451
PostLinkAction->setRTSetsSpecConstants(!isAOT);
@@ -4433,6 +4454,10 @@ class OffloadingActionBuilder final {
44334454
Action *FinAction =
44344455
finalizeNVPTXDependences(PostLinkAction, (*TC)->getTriple());
44354456
WrapperInputs.push_back(FinAction);
4457+
} else if (isAMDGCN) {
4458+
Action *FinAction =
4459+
finalizeAMDGCNDependences(PostLinkAction, (*TC)->getTriple());
4460+
WrapperInputs.push_back(FinAction);
44364461
} else {
44374462
// For SPIRV-based targets - translate to SPIRV then optionally
44384463
// compile ahead-of-time to native architecture
@@ -7276,7 +7301,7 @@ const ToolChain &Driver::getOffloadingDeviceToolChain(const ArgList &Args,
72767301
break;
72777302
case Action::OFK_HIP:
72787303
TC = std::make_unique<toolchains::HIPToolChain>(
7279-
*this, Target, HostTC, Args);
7304+
*this, Target, HostTC, Args, TargetDeviceOffloadKind);
72807305
break;
72817306
case Action::OFK_OpenMP:
72827307
// omp + nvptx
@@ -7295,6 +7320,10 @@ const ToolChain &Driver::getOffloadingDeviceToolChain(const ArgList &Args,
72957320
TC = std::make_unique<toolchains::CudaToolChain>(
72967321
*this, Target, HostTC, Args, TargetDeviceOffloadKind);
72977322
break;
7323+
case llvm::Triple::amdgcn:
7324+
TC = std::make_unique<toolchains::HIPToolChain>(
7325+
*this, Target, HostTC, Args, TargetDeviceOffloadKind);
7326+
break;
72987327
default:
72997328
break;
73007329
}

clang/lib/Driver/ToolChain.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1169,7 +1169,9 @@ llvm::opt::DerivedArgList *ToolChain::TranslateOffloadTargetArgs(
11691169
// matches the current toolchain triple. If it is not present
11701170
// at all, target and host share a toolchain.
11711171
if (A->getOption().matches(options::OPT_m_Group)) {
1172-
if (SameTripleAsHost)
1172+
// AMD GPU is a special case, as -mcpu is required for the device
1173+
// compilation.
1174+
if (SameTripleAsHost || getTriple().getArch() == llvm::Triple::amdgcn)
11731175
DAL->append(A);
11741176
else
11751177
Modified = true;

clang/lib/Driver/ToolChains/Clang.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8634,7 +8634,8 @@ void SPIRVTranslator::ConstructJob(Compilation &C, const JobAction &JA,
86348634
",+SPV_INTEL_arbitrary_precision_floating_point"
86358635
",+SPV_INTEL_variable_length_array,+SPV_INTEL_fp_fast_math_mode"
86368636
",+SPV_INTEL_fpga_cluster_attributes,+SPV_INTEL_loop_fuse"
8637-
",+SPV_INTEL_long_constant_composite";
8637+
",+SPV_INTEL_long_constant_composite"
8638+
",+SPV_INTEL_fpga_invocation_pipelining_attributes";
86388639
ExtArg = ExtArg + DefaultExtArg + INTELExtArg;
86398640
if (getToolChain().getTriple().getSubArch() ==
86408641
llvm::Triple::SPIRSubArch_fpga) {

clang/lib/Driver/ToolChains/HIP.cpp

Lines changed: 74 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -118,7 +118,8 @@ void AMDGCN::constructHIPFatbinCommand(Compilation &C, const JobAction &JA,
118118
// for backward compatibility. For code object version 4 and greater, the
119119
// offload kind in bundle ID is 'hipv4'.
120120
std::string OffloadKind = "hip";
121-
if (getAMDGPUCodeObjectVersion(C.getDriver(), Args) >= 4)
121+
if (haveAMDGPUCodeObjectVersionArgument(C.getDriver(), Args) &&
122+
getAMDGPUCodeObjectVersion(C.getDriver(), Args) >= 4)
122123
OffloadKind = OffloadKind + "v4";
123124
for (const auto &II : Inputs) {
124125
const auto* A = II.getAction();
@@ -231,8 +232,9 @@ void AMDGCN::Linker::ConstructJob(Compilation &C, const JobAction &JA,
231232
}
232233

233234
HIPToolChain::HIPToolChain(const Driver &D, const llvm::Triple &Triple,
234-
const ToolChain &HostTC, const ArgList &Args)
235-
: ROCMToolChain(D, Triple, Args), HostTC(HostTC) {
235+
const ToolChain &HostTC, const ArgList &Args,
236+
const Action::OffloadKind OK)
237+
: ROCMToolChain(D, Triple, Args), HostTC(HostTC), OK(OK) {
236238
// Lookup binaries into the driver directory, this is used to
237239
// discover the clang-offload-bundler executable.
238240
getProgramPaths().push_back(getDriver().Dir);
@@ -244,8 +246,11 @@ void HIPToolChain::addClangTargetOptions(
244246
Action::OffloadKind DeviceOffloadingKind) const {
245247
HostTC.addClangTargetOptions(DriverArgs, CC1Args, DeviceOffloadingKind);
246248

247-
assert(DeviceOffloadingKind == Action::OFK_HIP &&
248-
"Only HIP offloading kinds are supported for GPUs.");
249+
assert((DeviceOffloadingKind == Action::OFK_HIP ||
250+
DeviceOffloadingKind == Action::OFK_SYCL) &&
251+
"Only HIP and SYCL offloading kinds are supported for GPUs.");
252+
253+
StringRef GpuArch = getGPUArch(DriverArgs);
249254

250255
CC1Args.push_back("-fcuda-is-device");
251256

@@ -275,6 +280,57 @@ void HIPToolChain::addClangTargetOptions(
275280
CC1Args.push_back("-fapply-global-visibility-to-externs");
276281
}
277282

283+
if (DeviceOffloadingKind == Action::OFK_SYCL) {
284+
toolchains::SYCLToolChain::AddSYCLIncludeArgs(getDriver(), DriverArgs,
285+
CC1Args);
286+
}
287+
288+
auto NoLibSpirv = DriverArgs.hasArg(options::OPT_fno_sycl_libspirv,
289+
options::OPT_fsycl_device_only);
290+
if (DeviceOffloadingKind == Action::OFK_SYCL && !NoLibSpirv) {
291+
std::string LibSpirvFile;
292+
293+
if (DriverArgs.hasArg(clang::driver::options::OPT_fsycl_libspirv_path_EQ)) {
294+
auto ProvidedPath =
295+
DriverArgs
296+
.getLastArgValue(
297+
clang::driver::options::OPT_fsycl_libspirv_path_EQ)
298+
.str();
299+
if (llvm::sys::fs::exists(ProvidedPath))
300+
LibSpirvFile = ProvidedPath;
301+
} else {
302+
SmallVector<StringRef, 8> LibraryPaths;
303+
304+
// Expected path w/out install.
305+
SmallString<256> WithoutInstallPath(getDriver().ResourceDir);
306+
llvm::sys::path::append(WithoutInstallPath, Twine("../../clc"));
307+
LibraryPaths.emplace_back(WithoutInstallPath.c_str());
308+
309+
// Expected path w/ install.
310+
SmallString<256> WithInstallPath(getDriver().ResourceDir);
311+
llvm::sys::path::append(WithInstallPath, Twine("../../../share/clc"));
312+
LibraryPaths.emplace_back(WithInstallPath.c_str());
313+
314+
std::string LibSpirvTargetName = "libspirv-amdgcn--amdhsa.bc";
315+
for (StringRef LibraryPath : LibraryPaths) {
316+
SmallString<128> LibSpirvTargetFile(LibraryPath);
317+
llvm::sys::path::append(LibSpirvTargetFile, LibSpirvTargetName);
318+
if (llvm::sys::fs::exists(LibSpirvTargetFile)) {
319+
LibSpirvFile = std::string(LibSpirvTargetFile.str());
320+
break;
321+
}
322+
}
323+
}
324+
325+
if (LibSpirvFile.empty()) {
326+
getDriver().Diag(diag::err_drv_no_sycl_libspirv);
327+
return;
328+
}
329+
330+
CC1Args.push_back("-mlink-builtin-bitcode");
331+
CC1Args.push_back(DriverArgs.MakeArgString(LibSpirvFile));
332+
}
333+
278334
llvm::for_each(getHIPDeviceLibs(DriverArgs), [&](StringRef BCFile) {
279335
CC1Args.push_back("-mlink-builtin-bitcode");
280336
CC1Args.push_back(DriverArgs.MakeArgString(BCFile));
@@ -308,9 +364,22 @@ HIPToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args,
308364

309365
Tool *HIPToolChain::buildLinker() const {
310366
assert(getTriple().getArch() == llvm::Triple::amdgcn);
367+
if (OK == Action::OFK_SYCL)
368+
return new tools::AMDGCN::SYCLLinker(*this);
311369
return new tools::AMDGCN::Linker(*this);
312370
}
313371

372+
Tool *HIPToolChain::SelectTool(const JobAction &JA) const {
373+
if (OK == Action::OFK_SYCL) {
374+
if (JA.getKind() == Action::LinkJobClass &&
375+
JA.getType() == types::TY_LLVM_BC) {
376+
return static_cast<tools::AMDGCN::SYCLLinker *>(ToolChain::SelectTool(JA))
377+
->GetSYCLToolChainLinker();
378+
}
379+
}
380+
return ToolChain::SelectTool(JA);
381+
}
382+
314383
void HIPToolChain::addClangWarningOptions(ArgStringList &CC1Args) const {
315384
HostTC.addClangWarningOptions(CC1Args);
316385
}

0 commit comments

Comments
 (0)