Skip to content

Commit b91b732

Browse files
[SYCL] Always inline kernel lambda operator in entry point (#6977)
This patch marks the `operator()` of the kernel lambda as `always_inline` so that it gets inlined into the kernel entry point. Kernel entry point are functions that take the captured variables as parameters, create a lambda object from that, setup the index structs and then call `operator()` on the lambda. Inlining the operator into the entry point should be beneficial in most cases as it allows the compiler to optimize out the lambda creation, which can be very important for kernels capturing a lot of variables. In a lot of cases the inliner will already do it, but when it doesn't it can lead to very confusing performance implications since the kernel entry point isn't directly visible to users. Because the always inliner runs very early this patch broke a number of lit tests that were checking for the operator function, I believe I've managed to fix most of them while maintaining the spirit of the test, but some reviews and/or suggestions on these would be appreciated. Co-authored-by: Steffen Larsen <steffen.larsen@intel.com>
1 parent 574891b commit b91b732

33 files changed

+93
-29
lines changed

clang/include/clang/Basic/LangOptions.def

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -279,6 +279,7 @@ LANGOPT(IntelFPGA , 1, 0, "Perform ahead-of-time compilation for FPGA")
279279
LANGOPT(SYCLAllowFuncPtr , 1, 0, "Allow function pointers in SYCL device code")
280280
LANGOPT(SYCLStdLayoutKernelParams, 1, 0, "Enable standard layout requirement for SYCL kernel parameters")
281281
LANGOPT(SYCLUnnamedLambda , 1, 0, "Allow unnamed lambda SYCL kernels")
282+
LANGOPT(SYCLForceInlineKernelLambda , 1, 0, "Force inline SYCL kernel lambdas in entry point")
282283
LANGOPT(SYCLESIMDForceStatelessMem, 1, 0, "Make accessors use USM memory in ESIMD kernels")
283284
ENUM_LANGOPT(SYCLVersion , SYCLMajorVersion, 2, SYCL_None, "Version of the SYCL standard used")
284285
LANGOPT(DeclareSPIRVBuiltins, 1, 0, "Declare SPIR-V builtin functions")

clang/include/clang/Driver/Options.td

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2942,6 +2942,12 @@ defm sycl_unnamed_lambda
29422942
" >= clang::LangOptions::SYCLMajorVersion::SYCL_2020")>,
29432943
PosFlag<SetTrue, [], "Allow">, NegFlag<SetFalse, [], "Disallow">,
29442944
BothFlags<[CC1Option, CoreOption], " unnamed SYCL lambda kernels">>;
2945+
defm sycl_force_inline_kernel_lambda
2946+
: BoolFOption<
2947+
"sycl-force-inline-kernel-lambda", LangOpts<"SYCLForceInlineKernelLambda">,
2948+
DefaultTrue,
2949+
PosFlag<SetTrue, [], "Allow">, NegFlag<SetFalse, [], "Disallow">,
2950+
BothFlags<[CC1Option, CoreOption], " force inline SYCL kernels lambda in entry point">>;
29452951
def fsycl_help_EQ : Joined<["-"], "fsycl-help=">,
29462952
Flags<[NoXarchOption, CoreOption]>, HelpText<"Emit help information from the "
29472953
"related offline compilation tool. Valid values: all, fpga, gen, x86_64.">,

clang/lib/Driver/ToolChains/Clang.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5123,6 +5123,10 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
51235123
CmdArgs.push_back("-sycl-std=2020");
51245124
}
51255125

5126+
if (!Args.hasFlag(options::OPT_fsycl_force_inline_kernel_lambda,
5127+
options::OPT_fno_sycl_force_inline_kernel_lambda, true))
5128+
CmdArgs.push_back("-fno-sycl-force-inline-kernel-lambda");
5129+
51265130
if (!Args.hasFlag(options::OPT_fsycl_unnamed_lambda,
51275131
options::OPT_fno_sycl_unnamed_lambda, true))
51285132
CmdArgs.push_back("-fno-sycl-unnamed-lambda");

clang/lib/Sema/SemaSYCL.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -837,6 +837,18 @@ class SingleDeviceFunctionTracker {
837837
CallGraphNode *KernelNode = Parent.getNodeForKernel(SYCLKernel);
838838
llvm::SmallVector<FunctionDecl *> CallStack;
839839
VisitCallNode(KernelNode, GetFDFromNode(KernelNode), CallStack);
840+
841+
// Always inline the KernelBody in the kernel entry point. For ESIMD
842+
// inlining is handled later down the pipeline.
843+
if (KernelBody &&
844+
Parent.SemaRef.getLangOpts().SYCLForceInlineKernelLambda &&
845+
!KernelBody->hasAttr<NoInlineAttr>() &&
846+
!KernelBody->hasAttr<AlwaysInlineAttr>() &&
847+
!KernelBody->hasAttr<SYCLSimdAttr>()) {
848+
KernelBody->addAttr(AlwaysInlineAttr::CreateImplicit(
849+
KernelBody->getASTContext(), {}, AttributeCommonInfo::AS_Keyword,
850+
AlwaysInlineAttr::Keyword_forceinline));
851+
}
840852
}
841853

842854
public:

clang/test/CodeGenSYCL/check-direct-attribute-propagation.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -triple spir64-unknown-unknown -disable-llvm-passes -sycl-std=2020 -opaque-pointers -emit-llvm -o - %s | FileCheck %s
1+
// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -internal-isystem %S/Inputs -triple spir64-unknown-unknown -disable-llvm-passes -sycl-std=2020 -opaque-pointers -emit-llvm -o - %s | FileCheck %s
22

33
// Tests for IR of [[intel::scheduler_target_fmax_mhz()]], [[intel::num_simd_work_items()]],
44
// [[intel::no_global_work_offset()]], [[intel::max_global_work_dim()]], [[sycl::reqd_sub_group_size()]],

clang/test/CodeGenSYCL/debug-info-srcpos-kernel.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// RUN: %clang -fsycl-device-only %s -S -emit-llvm -O0 -g -o - | FileCheck %s
1+
// RUN: %clang -fno-sycl-force-inline-kernel-lambda -fsycl-device-only %s -S -emit-llvm -O0 -g -o - | FileCheck %s
22
//
33
// Verify the SYCL kernel routine is marked artificial and has the
44
// expected source correlation.

clang/test/CodeGenSYCL/device-functions.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown -disable-llvm-passes -opaque-pointers -emit-llvm %s -o - | FileCheck %s
1+
// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -triple spir64-unknown-unknown -disable-llvm-passes -opaque-pointers -emit-llvm %s -o - | FileCheck %s
22

33
template <typename T>
44
T bar(T arg);

clang/test/CodeGenSYCL/device-variables.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown -disable-llvm-passes -opaque-pointers -emit-llvm %s -o - | FileCheck %s
1+
// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -triple spir64-unknown-unknown -disable-llvm-passes -opaque-pointers -emit-llvm %s -o - | FileCheck %s
22

33
enum class test_type { value1, value2, value3 };
44

clang/test/CodeGenSYCL/kernel-handler.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -triple nvptx64-unknown-unknown -disable-llvm-passes -opaque-pointers -emit-llvm -o - %s | FileCheck %s --check-prefixes=ALL,NONATIVESUPPORT
2-
// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -triple spir64-unknown-unknown -disable-llvm-passes -opaque-pointers -emit-llvm -o - %s | FileCheck %s --check-prefixes=ALL,NATIVESUPPORT
1+
// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -internal-isystem %S/Inputs -triple nvptx64-unknown-unknown -disable-llvm-passes -opaque-pointers -emit-llvm -o - %s | FileCheck %s --check-prefixes=ALL,NONATIVESUPPORT
2+
// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -internal-isystem %S/Inputs -triple spir64-unknown-unknown -disable-llvm-passes -opaque-pointers -emit-llvm -o - %s | FileCheck %s --check-prefixes=ALL,NATIVESUPPORT
33

44
// This test checks IR generated when kernel_handler argument
55
// (used to handle SYCL 2020 specialization constants) is passed

clang/test/CodeGenSYCL/kernel_binding_decls.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// RUN: %clang_cc1 -internal-isystem %S/Inputs -fsycl-is-device -std=c++20 -triple spir64-unknown-unknown -disable-llvm-passes -opaque-pointers -emit-llvm %s -o - | FileCheck %s
1+
// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -internal-isystem %S/Inputs -fsycl-is-device -std=c++20 -triple spir64-unknown-unknown -disable-llvm-passes -opaque-pointers -emit-llvm %s -o - | FileCheck %s
22

33
#include "sycl.hpp"
44

clang/test/CodeGenSYCL/max-concurrency.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -disable-llvm-passes -triple spir64-unknown-unknown -sycl-std=2020 -opaque-pointers -emit-llvm -o - %s | FileCheck %s
1+
// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -internal-isystem %S/Inputs -disable-llvm-passes -triple spir64-unknown-unknown -sycl-std=2020 -opaque-pointers -emit-llvm -o - %s | FileCheck %s
22

33
#include "sycl.hpp"
44

clang/test/CodeGenSYCL/no-opaque-ptr-kernel_binding_decls.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// RUN: %clang_cc1 -internal-isystem %S/Inputs -fsycl-is-device -std=c++20 -triple spir64-unknown-unknown -disable-llvm-passes -no-opaque-pointers -emit-llvm %s -o - | FileCheck %s
1+
// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -internal-isystem %S/Inputs -fsycl-is-device -std=c++20 -triple spir64-unknown-unknown -disable-llvm-passes -no-opaque-pointers -emit-llvm %s -o - | FileCheck %s
22

33
#include "sycl.hpp"
44

clang/test/CodeGenSYCL/no_opaque_basic-kernel-wrapper.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown -disable-llvm-passes -no-opaque-pointers -emit-llvm %s -o - | FileCheck %s
1+
// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -triple spir64-unknown-unknown -disable-llvm-passes -no-opaque-pointers -emit-llvm %s -o - | FileCheck %s
22

33
// This test checks that compiler generates correct kernel wrapper for basic
44
// case.

clang/test/CodeGenSYCL/no_opaque_check-direct-attribute-propagation.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -triple spir64-unknown-unknown -disable-llvm-passes -sycl-std=2020 -no-opaque-pointers -emit-llvm -o - %s | FileCheck %s
1+
// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -internal-isystem %S/Inputs -triple spir64-unknown-unknown -disable-llvm-passes -sycl-std=2020 -no-opaque-pointers -emit-llvm -o - %s | FileCheck %s
22

33
// Tests for IR of [[intel::scheduler_target_fmax_mhz()]], [[intel::num_simd_work_items()]],
44
// [[intel::no_global_work_offset()]], [[intel::max_global_work_dim()]], [[sycl::reqd_sub_group_size()]],

clang/test/CodeGenSYCL/no_opaque_device-functions.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown -disable-llvm-passes -no-opaque-pointers -emit-llvm %s -o - | FileCheck %s
1+
// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -triple spir64-unknown-unknown -disable-llvm-passes -no-opaque-pointers -emit-llvm %s -o - | FileCheck %s
22

33
template <typename T>
44
T bar(T arg);

clang/test/CodeGenSYCL/no_opaque_device-variables.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown -disable-llvm-passes -no-opaque-pointers -emit-llvm %s -o - | FileCheck %s
1+
// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -triple spir64-unknown-unknown -disable-llvm-passes -no-opaque-pointers -emit-llvm %s -o - | FileCheck %s
22

33
enum class test_type { value1, value2, value3 };
44

clang/test/CodeGenSYCL/no_opaque_kernel-handler.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -triple nvptx64-unknown-unknown -disable-llvm-passes -no-opaque-pointers -emit-llvm -o - %s | FileCheck %s --check-prefixes=ALL,NONATIVESUPPORT
2-
// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -triple spir64-unknown-unknown -disable-llvm-passes -no-opaque-pointers -emit-llvm -o - %s | FileCheck %s --check-prefixes=ALL,NATIVESUPPORT
1+
// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -internal-isystem %S/Inputs -triple nvptx64-unknown-unknown -disable-llvm-passes -no-opaque-pointers -emit-llvm -o - %s | FileCheck %s --check-prefixes=ALL,NONATIVESUPPORT
2+
// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -internal-isystem %S/Inputs -triple spir64-unknown-unknown -disable-llvm-passes -no-opaque-pointers -emit-llvm -o - %s | FileCheck %s --check-prefixes=ALL,NATIVESUPPORT
33

44
// This test checks IR generated when kernel_handler argument
55
// (used to handle SYCL 2020 specialization constants) is passed

clang/test/CodeGenSYCL/no_opaque_max-concurrency.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -disable-llvm-passes -triple spir64-unknown-unknown -sycl-std=2020 -no-opaque-pointers -emit-llvm -o - %s | FileCheck %s
1+
// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -internal-isystem %S/Inputs -disable-llvm-passes -triple spir64-unknown-unknown -sycl-std=2020 -no-opaque-pointers -emit-llvm -o - %s | FileCheck %s
22

33
#include "sycl.hpp"
44

clang/test/CodeGenSYCL/no_opaque_sampler.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown -disable-llvm-passes -no-opaque-pointers -emit-llvm %s -o - | FileCheck --enable-var-scope %s
1+
// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -triple spir64-unknown-unknown -disable-llvm-passes -no-opaque-pointers -emit-llvm %s -o - | FileCheck --enable-var-scope %s
22
// CHECK: define {{.*}}spir_kernel void @{{[a-zA-Z0-9_]+}}(%opencl.sampler_t addrspace(2)* [[SAMPLER_ARG:%[a-zA-Z0-9_]+]])
33
// CHECK-NEXT: entry:
44
// CHECK-NEXT: [[SAMPLER_ARG]].addr = alloca %opencl.sampler_t addrspace(2)*, align 8

clang/test/CodeGenSYCL/no_opaque_spir-calling-conv.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown -disable-llvm-passes -no-opaque-pointers -emit-llvm %s -o - | FileCheck %s
1+
// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -triple spir64-unknown-unknown -disable-llvm-passes -no-opaque-pointers -emit-llvm %s -o - | FileCheck %s
22

33
template <typename name, typename Func>
44
__attribute__((sycl_kernel)) void kernel_single_task(const Func &kernelFunc) {

clang/test/CodeGenSYCL/no_opaque_spir-enum.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown -disable-llvm-passes -no-opaque-pointers -emit-llvm %s -o - | FileCheck %s
1+
// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -triple spir64-unknown-unknown -disable-llvm-passes -no-opaque-pointers -emit-llvm %s -o - | FileCheck %s
22

33
template <typename name, typename Func>
44
__attribute__((sycl_kernel)) void kernel_single_task(const Func &kernelFunc) {

clang/test/CodeGenSYCL/no_opaque_stall_enable_device.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -triple spir64-unknown-unknown -disable-llvm-passes -no-opaque-pointers -emit-llvm -o - %s | FileCheck %s
1+
// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -internal-isystem %S/Inputs -triple spir64-unknown-unknown -disable-llvm-passes -no-opaque-pointers -emit-llvm -o - %s | FileCheck %s
22

33
// Tests for IR of Intel FPGA [[intel::use_stall_enable_clusters]] function attribute on Device.
44

clang/test/CodeGenSYCL/no_opaque_union-kernel-param.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown -disable-llvm-passes -no-opaque-pointers -emit-llvm %s -o - | FileCheck %s
1+
// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -triple spir64-unknown-unknown -disable-llvm-passes -no-opaque-pointers -emit-llvm %s -o - | FileCheck %s
22

33
// This test checks a kernel argument that is union with both array and non-array fields.
44

clang/test/CodeGenSYCL/sampler.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown -disable-llvm-passes -opaque-pointers -emit-llvm %s -o - | FileCheck --enable-var-scope %s
1+
// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -triple spir64-unknown-unknown -disable-llvm-passes -opaque-pointers -emit-llvm %s -o - | FileCheck --enable-var-scope %s
22
// CHECK: define {{.*}}spir_kernel void @{{[a-zA-Z0-9_]+}}(ptr addrspace(2) [[SAMPLER_ARG:%[a-zA-Z0-9_]+]])
33
// CHECK-NEXT: entry:
44
// CHECK-NEXT: [[SAMPLER_ARG]].addr = alloca ptr addrspace(2), align 8

clang/test/CodeGenSYCL/spir-calling-conv.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown -disable-llvm-passes -opaque-pointers -emit-llvm %s -o - | FileCheck %s
1+
// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -triple spir64-unknown-unknown -disable-llvm-passes -opaque-pointers -emit-llvm %s -o - | FileCheck %s
22

33
template <typename name, typename Func>
44
__attribute__((sycl_kernel)) void kernel_single_task(const Func &kernelFunc) {

clang/test/CodeGenSYCL/spir-enum.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown -disable-llvm-passes -opaque-pointers -emit-llvm %s -o - | FileCheck %s
1+
// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -triple spir64-unknown-unknown -disable-llvm-passes -opaque-pointers -emit-llvm %s -o - | FileCheck %s
22

33
template <typename name, typename Func>
44
__attribute__((sycl_kernel)) void kernel_single_task(const Func &kernelFunc) {

clang/test/CodeGenSYCL/stall_enable_device.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -triple spir64-unknown-unknown -disable-llvm-passes -opaque-pointers -emit-llvm -o - %s | FileCheck %s
1+
// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -internal-isystem %S/Inputs -triple spir64-unknown-unknown -disable-llvm-passes -opaque-pointers -emit-llvm -o - %s | FileCheck %s
22

33
// Tests for IR of Intel FPGA [[intel::use_stall_enable_clusters]] function attribute on Device.
44

clang/test/CodeGenSYCL/sycl-device-static-init.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown -disable-llvm-passes %s -emit-llvm -o - | FileCheck %s
1+
// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -triple spir64-unknown-unknown -disable-llvm-passes %s -emit-llvm -o - | FileCheck %s
22
// Test that static initializers do not force the emission of globals on sycl device
33

44
// CHECK-NOT: $_ZN8BaseInitI12TestBaseTypeE15s_regbase_ncsdmE = comdat any

clang/test/CodeGenSYCL/union-kernel-param.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown -disable-llvm-passes -opaque-pointers -emit-llvm %s -o - | FileCheck %s
1+
// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -triple spir64-unknown-unknown -disable-llvm-passes -opaque-pointers -emit-llvm %s -o - | FileCheck %s
22

33
// This test checks a kernel argument that is union with both array and non-array fields.
44

clang/test/CodeGenSYCL/unique_stable_name_windows_diff.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
// RUN: %clang_cc1 -triple spir64-unknown-unknown -aux-triple x86_64-pc-windows-msvc -fsycl-is-device -disable-llvm-passes -fsycl-is-device -emit-llvm %s -o - | FileCheck %s
2-
// RUN: %clang_cc1 -triple x86_64-pc-windows-msvc -fsycl-is-device -disable-llvm-passes -fsycl-is-device -emit-llvm %s -o - | FileCheck %s --check-prefixes=WIN,CHECK
1+
// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -triple spir64-unknown-unknown -aux-triple x86_64-pc-windows-msvc -fsycl-is-device -disable-llvm-passes -fsycl-is-device -emit-llvm %s -o - | FileCheck %s
2+
// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -triple x86_64-pc-windows-msvc -fsycl-is-device -disable-llvm-passes -fsycl-is-device -emit-llvm %s -o - | FileCheck %s --check-prefixes=WIN,CHECK
33

44
template<typename KN, typename Func>
55
__attribute__((sycl_kernel)) void kernel(Func F){

clang/test/Driver/sycl.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,11 @@
7272
// RUN: %clang_cl -### -fsycl-device-only -fno-sycl-unnamed-lambda %s 2>&1 | FileCheck %s --check-prefix=CHECK-NOT-LAMBDA
7373
// CHECK-NOT-LAMBDA: "-fno-sycl-unnamed-lambda"
7474

75+
// -fsycl-force-inline-kernel-lambda
76+
// RUN: %clangxx -### -fsycl-device-only -fno-sycl-force-inline-kernel-lambda %s 2>&1 | FileCheck %s --check-prefix=CHECK-NOT-INLINE
77+
// RUN: %clang_cl -### -fsycl-device-only -fno-sycl-force-inline-kernel-lambda %s 2>&1 | FileCheck %s --check-prefix=CHECK-NOT-INLINE
78+
// CHECK-NOT-INLINE: "-fno-sycl-force-inline-kernel-lambda"
79+
7580
/// -fsycl-device-only triple checks
7681
// RUN: %clang -fsycl-device-only -target x86_64-unknown-linux-gnu -### %s 2>&1 \
7782
// RUN: | FileCheck --check-prefix=DEVICE-64 %s
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -internal-isystem %S/Inputs -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,CHECK-NO-INLINE
2+
// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,CHECK-INLINE
3+
4+
#include "sycl.hpp"
5+
6+
int main() {
7+
sycl::queue q;
8+
9+
// CHECK: define {{.*}} spir_kernel void @_ZTSZZ4mainENKUlRN4sycl3_V17handlerEE_clES2_E10KernelName()
10+
//
11+
// CHECK-NO-INLINE: call void @_ZZZ4mainENKUlRN4sycl3_V17handlerEE_clES2_ENKUlvE_clEv
12+
// CHECK-INLINE-NOT: call void @_ZZZ4mainENKUlRN4sycl3_V17handlerEE_clES2_ENKUlvE_clEv
13+
q.submit([&](sycl::handler &h) { h.parallel_for<class KernelName>([] {}); });
14+
15+
16+
// CHECK: define {{.*}} spir_kernel void @_ZTSZZ4mainENKUlRN4sycl3_V17handlerEE0_clES2_E16KernelNameInline()
17+
// CHECK-NOT: call void @_ZZZ4mainENKUlRN4sycl3_V17handlerEE0_clES2_ENKUlvE_clEv
18+
q.submit([&](sycl::handler &h) { h.parallel_for<class KernelNameInline>([]() __attribute__((always_inline)) {}); });
19+
20+
// CHECK: define {{.*}} spir_kernel void @_ZTSZZ4mainENKUlRN4sycl3_V17handlerEE1_clES2_E18KernelNameNoInline()
21+
// CHECK: call void @_ZZZ4mainENKUlRN4sycl3_V17handlerEE1_clES2_ENKUlvE_clEv
22+
q.submit([&](sycl::handler &h) { h.parallel_for<class KernelNameNoInline>([]() __attribute__((noinline)) {}); });
23+
24+
/// The flag is ignored for ESIMD kernels
25+
// CHECK: define {{.*}} spir_kernel void @_ZTSZZ4mainENKUlRN4sycl3_V17handlerEE2_clES2_E15KernelNameESIMD()
26+
// CHECK: call void @_ZZZ4mainENKUlRN4sycl3_V17handlerEE2_clES2_ENKUlvE_clEv
27+
q.submit([&](sycl::handler &h) { h.parallel_for<class KernelNameESIMD>([]() __attribute__((sycl_explicit_simd)) {}); });
28+
29+
return 0;
30+
}

sycl/doc/UsersManual.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,12 @@ and not recommended to use in production environment.
107107
* nd_item class get_global_id()/get_global_linear_id() member functions
108108
Enabled by default.
109109

110+
**`-f[no-]sycl-force-inline-kernel-lambda`**
111+
112+
Enables/Disables inlining of the kernel lambda operator into the compiler
113+
generated entry point function. This flag does not apply to ESIMD
114+
kernels.
115+
Enabled by default.
110116

111117
**`-fgpu-inline-threshold=<n>`**
112118

0 commit comments

Comments
 (0)