Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[SYCL] Propagate attributes of original kernel to wrapper kernel generated for range-rounding #3306

Merged
merged 4 commits into from
Mar 16, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions clang/include/clang/Basic/DiagnosticSemaKinds.td
Original file line number Diff line number Diff line change
Expand Up @@ -3195,6 +3195,9 @@ def warn_dllimport_dropped_from_inline_function : Warning<
InGroup<IgnoredAttributes>;
def warn_attribute_ignored : Warning<"%0 attribute ignored">,
InGroup<IgnoredAttributes>;
def warn_attribute_on_direct_kernel_callee_only : Warning<"%0 attribute allowed"
" only on a function directly called from a SYCL kernel function; attribute ignored">,
InGroup<IgnoredAttributes>;
def warn_nothrow_attribute_ignored : Warning<"'nothrow' attribute conflicts with"
" exception specification; attribute ignored">,
InGroup<IgnoredAttributes>;
Expand Down
1 change: 1 addition & 0 deletions clang/include/clang/Sema/Sema.h
Original file line number Diff line number Diff line change
Expand Up @@ -13017,6 +13017,7 @@ class Sema final {

bool isKnownGoodSYCLDecl(const Decl *D);
void checkSYCLDeviceVarDecl(VarDecl *Var);
void copySYCLKernelAttrs(const CXXRecordDecl *KernelObj);
void ConstructOpenCLKernel(FunctionDecl *KernelCallerFunc, MangleContext &MC);
void MarkDevice();

Expand Down
147 changes: 101 additions & 46 deletions clang/lib/Sema/SemaSYCL.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -306,6 +306,37 @@ static int64_t getIntExprValue(const Expr *E, ASTContext &Ctx) {
return E->getIntegerConstantExpr(Ctx)->getSExtValue();
}

// Collect function attributes related to SYCL.
static void collectSYCLAttributes(Sema &S, FunctionDecl *FD,
AaronBallman marked this conversation as resolved.
Show resolved Hide resolved
llvm::SmallVector<Attr *, 4> &Attrs,
bool DirectlyCalled = true) {
if (!FD->hasAttrs())
return;

llvm::copy_if(FD->getAttrs(), std::back_inserter(Attrs), [](Attr *A) {
// FIXME: Make this list self-adapt as new SYCL attributes are added.
return isa<IntelReqdSubGroupSizeAttr, ReqdWorkGroupSizeAttr,
AaronBallman marked this conversation as resolved.
Show resolved Hide resolved
SYCLIntelKernelArgsRestrictAttr, SYCLIntelNumSimdWorkItemsAttr,
SYCLIntelSchedulerTargetFmaxMhzAttr,
SYCLIntelMaxWorkGroupSizeAttr, SYCLIntelMaxGlobalWorkDimAttr,
SYCLIntelNoGlobalWorkOffsetAttr, SYCLSimdAttr>(A);
});

// Allow the kernel attribute "use_stall_enable_clusters" only on lambda
// functions and function objects called directly from a kernel.
// For all other cases, emit a warning and ignore.
if (auto *A = FD->getAttr<SYCLIntelUseStallEnableClustersAttr>()) {
AaronBallman marked this conversation as resolved.
Show resolved Hide resolved
if (DirectlyCalled) {
Attrs.push_back(A);
} else {
S.Diag(A->getLocation(),
diag::warn_attribute_on_direct_kernel_callee_only)
<< A;
FD->dropAttr<SYCLIntelUseStallEnableClustersAttr>();
}
}
}

class MarkDeviceFunction : public RecursiveASTVisitor<MarkDeviceFunction> {
// Used to keep track of the constexpr depth, so we know whether to skip
// diagnostics.
Expand Down Expand Up @@ -477,7 +508,7 @@ class MarkDeviceFunction : public RecursiveASTVisitor<MarkDeviceFunction> {
// Returns the kernel body function found during traversal.
FunctionDecl *
CollectPossibleKernelAttributes(FunctionDecl *SYCLKernel,
llvm::SmallPtrSet<Attr *, 4> &Attrs) {
llvm::SmallVector<Attr *, 4> &Attrs) {
typedef std::pair<FunctionDecl *, FunctionDecl *> ChildParentPair;
llvm::SmallPtrSet<FunctionDecl *, 16> Visited;
llvm::SmallVector<ChildParentPair, 16> WorkList;
Expand Down Expand Up @@ -508,55 +539,23 @@ class MarkDeviceFunction : public RecursiveASTVisitor<MarkDeviceFunction> {
"function can be called");
KernelBody = FD;
}

WorkList.pop_back();
if (!Visited.insert(FD).second)
continue; // We've already seen this Decl

if (auto *A = FD->getAttr<IntelReqdSubGroupSizeAttr>())
Attrs.insert(A);

if (auto *A = FD->getAttr<ReqdWorkGroupSizeAttr>())
Attrs.insert(A);

if (auto *A = FD->getAttr<SYCLIntelKernelArgsRestrictAttr>())
Attrs.insert(A);

if (auto *A = FD->getAttr<SYCLIntelNumSimdWorkItemsAttr>())
Attrs.insert(A);

if (auto *A = FD->getAttr<SYCLIntelSchedulerTargetFmaxMhzAttr>())
Attrs.insert(A);

if (auto *A = FD->getAttr<SYCLIntelMaxWorkGroupSizeAttr>())
Attrs.insert(A);

if (auto *A = FD->getAttr<SYCLIntelMaxGlobalWorkDimAttr>())
Attrs.insert(A);

if (auto *A = FD->getAttr<SYCLIntelNoGlobalWorkOffsetAttr>())
Attrs.insert(A);

if (auto *A = FD->getAttr<SYCLSimdAttr>())
Attrs.insert(A);

// Allow the kernel attribute "use_stall_enable_clusters" only on lambda
// functions and function objects that are called directly from a kernel
// (i.e. the one passed to the single_task or parallel_for functions).
// For all other cases, emit a warning and ignore.
if (auto *A = FD->getAttr<SYCLIntelUseStallEnableClustersAttr>()) {
if (ParentFD == SYCLKernel) {
Attrs.insert(A);
} else {
SemaRef.Diag(A->getLocation(), diag::warn_attribute_ignored) << A;
FD->dropAttr<SYCLIntelUseStallEnableClustersAttr>();
}
}
// Gather all attributes of FD that are SYCL related.
// Some attributes are allowed only on lambda functions and function
// objects called directly from a kernel (i.e. the one passed to the
// single_task or parallel_for functions).
bool DirectlyCalled = (ParentFD == SYCLKernel);
collectSYCLAttributes(SemaRef, FD, Attrs, DirectlyCalled);

// Attribute "loop_fuse" can be applied explicitly on kernel function.
// Attribute should not be propagated from device functions to kernel.
if (auto *A = FD->getAttr<SYCLIntelLoopFuseAttr>()) {
if (ParentFD == SYCLKernel) {
Attrs.insert(A);
Attrs.push_back(A);
}
}

Expand Down Expand Up @@ -2058,8 +2057,8 @@ class SyclKernelArgsSizeChecker : public SyclKernelFieldHandler {
using SyclKernelFieldHandler::handleSyclHalfType;
};

static const CXXMethodDecl *getOperatorParens(const CXXRecordDecl *Rec) {
for (const auto *MD : Rec->methods()) {
static CXXMethodDecl *getOperatorParens(const CXXRecordDecl *Rec) {
for (auto *MD : Rec->methods()) {
if (MD->getOverloadedOperator() == OO_Call)
return MD;
}
Expand Down Expand Up @@ -3149,6 +3148,56 @@ void Sema::CheckSYCLKernelCall(FunctionDecl *KernelFunc, SourceRange CallLoc,
KernelFunc->setInvalidDecl();
}

// For a wrapped parallel_for, copy attributes from original
// kernel to wrapped kernel.
void Sema::copySYCLKernelAttrs(const CXXRecordDecl *KernelObj) {
AaronBallman marked this conversation as resolved.
Show resolved Hide resolved
// Get the operator() function of the wrapper.
CXXMethodDecl *OpParens = getOperatorParens(KernelObj);
assert(OpParens && "invalid kernel object");

typedef std::pair<FunctionDecl *, FunctionDecl *> ChildParentPair;
llvm::SmallPtrSet<FunctionDecl *, 16> Visited;
llvm::SmallVector<ChildParentPair, 16> WorkList;
WorkList.push_back({OpParens, nullptr});
FunctionDecl *KernelBody = nullptr;

CallGraph SYCLCG;
SYCLCG.addToCallGraph(getASTContext().getTranslationUnitDecl());
while (!WorkList.empty()) {
FunctionDecl *FD = WorkList.back().first;
FunctionDecl *ParentFD = WorkList.back().second;

if ((ParentFD == OpParens) && isSYCLKernelBodyFunction(FD)) {
KernelBody = FD;
break;
}

WorkList.pop_back();
if (!Visited.insert(FD).second)
continue; // We've already seen this Decl

CallGraphNode *N = SYCLCG.getNode(FD);
if (!N)
continue;

for (const CallGraphNode *CI : *N) {
if (auto *Callee = dyn_cast<FunctionDecl>(CI->getDecl())) {
Callee = Callee->getMostRecentDecl();
if (!Visited.count(Callee))
WorkList.push_back({Callee, FD});
}
}
}

assert(KernelBody && "improper parallel_for wrap");
if (KernelBody) {
llvm::SmallVector<Attr *, 4> Attrs;
collectSYCLAttributes(*this, KernelBody, Attrs);
if (!Attrs.empty())
llvm::for_each(Attrs, [OpParens](Attr *A) { OpParens->addAttr(A); });
}
}

// Generates the OpenCL kernel using KernelCallerFunc (kernel caller
// function) defined is SYCL headers.
// Generated OpenCL kernel contains the body of the kernel caller function,
Expand Down Expand Up @@ -3181,14 +3230,20 @@ void Sema::ConstructOpenCLKernel(FunctionDecl *KernelCallerFunc,
if (KernelObj->isInvalidDecl())
return;

bool IsSIMDKernel = isESIMDKernelType(KernelObj);

// Calculate both names, since Integration headers need both.
std::string CalculatedName, StableName;
std::tie(CalculatedName, StableName) =
constructKernelName(*this, KernelCallerFunc, MC);
StringRef KernelName(getLangOpts().SYCLUnnamedLambda ? StableName
: CalculatedName);

// Attributes of a user-written SYCL kernel must be copied to the internally
// generated alternative kernel, identified by a known string in its name.
if (StableName.find("__pf_kernel_wrapper") != std::string::npos)
copySYCLKernelAttrs(KernelObj);

bool IsSIMDKernel = isESIMDKernelType(KernelObj);

SyclKernelDeclCreator kernel_decl(*this, KernelName, KernelObj->getLocation(),
KernelCallerFunc->isInlined(),
IsSIMDKernel);
Expand Down Expand Up @@ -3226,7 +3281,7 @@ void Sema::MarkDevice(void) {
Marker.CollectKernelSet(SYCLKernel, SYCLKernel, VisitedSet);

// Let's propagate attributes from device functions to a SYCL kernels
llvm::SmallPtrSet<Attr *, 4> Attrs;
llvm::SmallVector<Attr *, 4> Attrs;
// This function collects all kernel attributes which might be applied to
// a device functions, but need to be propagated down to callers, i.e.
// SYCL kernels
Expand Down
23 changes: 23 additions & 0 deletions clang/test/SemaSYCL/Inputs/sycl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -206,11 +206,24 @@ template <typename Type>
struct get_kernel_name_t<auto_name, Type> {
using name = Type;
};

// Used when parallel_for range is rounded-up.
template <typename Type> class __pf_kernel_wrapper;

template <typename Type> struct get_kernel_wrapper_name_t {
using name =
__pf_kernel_wrapper<typename get_kernel_name_t<auto_name, Type>::name>;
};

#define ATTR_SYCL_KERNEL __attribute__((sycl_kernel))
template <typename KernelName = auto_name, typename KernelType>
ATTR_SYCL_KERNEL void kernel_single_task(const KernelType &kernelFunc) {
kernelFunc();
}
template <typename KernelName = auto_name, typename KernelType>
ATTR_SYCL_KERNEL void kernel_parallel_for(const KernelType &kernelFunc) {
kernelFunc();
}
class handler {
public:
template <typename KernelName = auto_name, typename KernelType>
Expand All @@ -220,6 +233,16 @@ class handler {
kernel_single_task<NameT>(kernelFunc);
#else
kernelFunc();
#endif
}
template <typename KernelName = auto_name, typename KernelType>
void parallel_for(const KernelType &kernelObj) {
using NameT = typename get_kernel_name_t<KernelName, KernelType>::name;
using NameWT = typename get_kernel_wrapper_name_t<NameT>::name;
#ifdef __SYCL_DEVICE_ONLY__
kernel_parallel_for<NameT>(kernelObj);
#else
kernelObj();
#endif
}
};
Expand Down
4 changes: 2 additions & 2 deletions clang/test/SemaSYCL/args-size-overflow.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,9 @@ queue q;
using Accessor =
accessor<int, 1, cl::sycl::access::mode::read_write, cl::sycl::access::target::global_buffer>;
#ifdef SPIR64
// expected-warning@Inputs/sycl.hpp:220 {{size of kernel arguments (7994 bytes) may exceed the supported maximum of 2048 bytes on some devices}}
// expected-warning@Inputs/sycl.hpp:233 {{size of kernel arguments (7994 bytes) may exceed the supported maximum of 2048 bytes on some devices}}
#elif SPIR32
// expected-warning@Inputs/sycl.hpp:220 {{size of kernel arguments (7986 bytes) may exceed the supported maximum of 2048 bytes on some devices}}
// expected-warning@Inputs/sycl.hpp:233 {{size of kernel arguments (7986 bytes) may exceed the supported maximum of 2048 bytes on some devices}}
#endif

void use() {
Expand Down
2 changes: 1 addition & 1 deletion clang/test/SemaSYCL/deferred-diagnostics-aux-builtin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ int main(int argc, char **argv) {
_mm_prefetch("test", 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}}

deviceQueue.submit([&](sycl::handler &h) {
// expected-note@Inputs/sycl.hpp:212 {{called by 'kernel_single_task<AName, (lambda}}
// expected-note@Inputs/sycl.hpp:221 {{called by 'kernel_single_task<AName, (lambda}}
h.single_task<class AName>([]() {
_mm_prefetch("test", 4); // expected-error {{builtin is not supported on this target}}
_mm_prefetch("test", 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} expected-error {{builtin is not supported on this target}}
Expand Down
4 changes: 2 additions & 2 deletions clang/test/SemaSYCL/deferred-diagnostics-emit.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ template <typename T>
void setup_sycl_operation(const T VA[]) {

deviceQueue.submit([&](sycl::handler &h) {
// expected-note@Inputs/sycl.hpp:212 {{called by 'kernel_single_task<AName, (lambda}}
// expected-note@Inputs/sycl.hpp:221 {{called by 'kernel_single_task<AName, (lambda}}
h.single_task<class AName>([]() {
// ======= Zero Length Arrays Not Allowed in Kernel ==========
// expected-error@+1 {{zero-length arrays are not permitted in C++}}
Expand Down Expand Up @@ -156,7 +156,7 @@ int main(int argc, char **argv) {

// --- direct lambda testing ---
deviceQueue.submit([&](sycl::handler &h) {
// expected-note@Inputs/sycl.hpp:212 2 {{called by 'kernel_single_task<AName, (lambda}}
// expected-note@Inputs/sycl.hpp:221 2 {{called by 'kernel_single_task<AName, (lambda}}
h.single_task<class AName>([]() {
// expected-error@+1 {{zero-length arrays are not permitted in C++}}
int BadArray[0];
Expand Down
6 changes: 3 additions & 3 deletions clang/test/SemaSYCL/float128.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ int main() {
__float128 CapturedToDevice = 1;
host_ok();
deviceQueue.submit([&](sycl::handler &h) {
// expected-note@Inputs/sycl.hpp:212 {{called by 'kernel_single_task<variables, (lambda}}
// expected-note@Inputs/sycl.hpp:221 {{called by 'kernel_single_task<variables, (lambda}}
h.single_task<class variables>([=]() {
// expected-error@+1 {{'__float128' is not supported on this target}}
decltype(CapturedToDevice) D;
Expand All @@ -88,7 +88,7 @@ int main() {
});

deviceQueue.submit([&](sycl::handler &h) {
// expected-note@Inputs/sycl.hpp:212 4{{called by 'kernel_single_task<functions, (lambda}}
// expected-note@Inputs/sycl.hpp:221 4{{called by 'kernel_single_task<functions, (lambda}}
h.single_task<class functions>([=]() {
// expected-note@+1 2{{called by 'operator()'}}
usage();
Expand All @@ -104,7 +104,7 @@ int main() {
});

deviceQueue.submit([&](sycl::handler &h) {
// expected-note@Inputs/sycl.hpp:212 {{called by 'kernel_single_task<ok, (lambda}}
// expected-note@Inputs/sycl.hpp:221 {{called by 'kernel_single_task<ok, (lambda}}
h.single_task<class ok>([=]() {
// expected-note@+1 3{{used here}}
Z<__float128> S;
Expand Down
16 changes: 8 additions & 8 deletions clang/test/SemaSYCL/implicit_kernel_type.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,12 @@ int main() {
queue q;

#if defined(WARN)
// expected-error@Inputs/sycl.hpp:220 {{'InvalidKernelName1' is an invalid kernel name type}}
// expected-note@Inputs/sycl.hpp:220 {{'InvalidKernelName1' should be globally-visible}}
// expected-error@Inputs/sycl.hpp:233 {{'InvalidKernelName1' is an invalid kernel name type}}
// expected-note@Inputs/sycl.hpp:233 {{'InvalidKernelName1' should be globally-visible}}
// expected-note@+8 {{in instantiation of function template specialization}}
#elif defined(ERROR)
// expected-error@Inputs/sycl.hpp:220 {{'InvalidKernelName1' is an invalid kernel name type}}
// expected-note@Inputs/sycl.hpp:220 {{'InvalidKernelName1' should be globally-visible}}
// expected-error@Inputs/sycl.hpp:233 {{'InvalidKernelName1' is an invalid kernel name type}}
// expected-note@Inputs/sycl.hpp:233 {{'InvalidKernelName1' should be globally-visible}}
// expected-note@+4 {{in instantiation of function template specialization}}
#endif
class InvalidKernelName1 {};
Expand All @@ -39,9 +39,9 @@ int main() {
});

#if defined(WARN)
// expected-warning@Inputs/sycl.hpp:220 {{SYCL 1.2.1 specification requires an explicit forward declaration for a kernel type name; your program may not be portable}}
// expected-warning@Inputs/sycl.hpp:233 {{SYCL 1.2.1 specification requires an explicit forward declaration for a kernel type name; your program may not be portable}}
#elif defined(ERROR)
// expected-error@Inputs/sycl.hpp:220 {{SYCL 1.2.1 specification requires an explicit forward declaration for a kernel type name; your program may not be portable}}
// expected-error@Inputs/sycl.hpp:233 {{SYCL 1.2.1 specification requires an explicit forward declaration for a kernel type name; your program may not be portable}}
#endif

q.submit([&](handler &h) {
Expand All @@ -53,9 +53,9 @@ int main() {
});

#if defined(WARN)
// expected-warning@Inputs/sycl.hpp:220 {{SYCL 1.2.1 specification requires an explicit forward declaration for a kernel type name; your program may not be portable}}
// expected-warning@Inputs/sycl.hpp:233 {{SYCL 1.2.1 specification requires an explicit forward declaration for a kernel type name; your program may not be portable}}
#elif defined(ERROR)
// expected-error@Inputs/sycl.hpp:220 {{SYCL 1.2.1 specification requires an explicit forward declaration for a kernel type name; your program may not be portable}}
// expected-error@Inputs/sycl.hpp:233 {{SYCL 1.2.1 specification requires an explicit forward declaration for a kernel type name; your program may not be portable}}
#endif

q.submit([&](handler &h) {
Expand Down
Loading