Skip to content

[SYCL] Always inline kernel lambda operator in entry point #6977

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Oct 12, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions clang/include/clang/Basic/LangOptions.def
Original file line number Diff line number Diff line change
Expand Up @@ -279,6 +279,7 @@ LANGOPT(IntelFPGA , 1, 0, "Perform ahead-of-time compilation for FPGA")
LANGOPT(SYCLAllowFuncPtr , 1, 0, "Allow function pointers in SYCL device code")
LANGOPT(SYCLStdLayoutKernelParams, 1, 0, "Enable standard layout requirement for SYCL kernel parameters")
LANGOPT(SYCLUnnamedLambda , 1, 0, "Allow unnamed lambda SYCL kernels")
LANGOPT(SYCLForceInlineKernelLambda , 1, 0, "Force inline SYCL kernel lambdas in entry point")
LANGOPT(SYCLESIMDForceStatelessMem, 1, 0, "Make accessors use USM memory in ESIMD kernels")
ENUM_LANGOPT(SYCLVersion , SYCLMajorVersion, 2, SYCL_None, "Version of the SYCL standard used")
LANGOPT(DeclareSPIRVBuiltins, 1, 0, "Declare SPIR-V builtin functions")
Expand Down
6 changes: 6 additions & 0 deletions clang/include/clang/Driver/Options.td
Original file line number Diff line number Diff line change
Expand Up @@ -2942,6 +2942,12 @@ defm sycl_unnamed_lambda
" >= clang::LangOptions::SYCLMajorVersion::SYCL_2020")>,
PosFlag<SetTrue, [], "Allow">, NegFlag<SetFalse, [], "Disallow">,
BothFlags<[CC1Option, CoreOption], " unnamed SYCL lambda kernels">>;
defm sycl_force_inline_kernel_lambda
: BoolFOption<
"sycl-force-inline-kernel-lambda", LangOpts<"SYCLForceInlineKernelLambda">,
DefaultTrue,
PosFlag<SetTrue, [], "Allow">, NegFlag<SetFalse, [], "Disallow">,
BothFlags<[CC1Option, CoreOption], " force inline SYCL kernels lambda in entry point">>;
def fsycl_help_EQ : Joined<["-"], "fsycl-help=">,
Flags<[NoXarchOption, CoreOption]>, HelpText<"Emit help information from the "
"related offline compilation tool. Valid values: all, fpga, gen, x86_64.">,
Expand Down
4 changes: 4 additions & 0 deletions clang/lib/Driver/ToolChains/Clang.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5123,6 +5123,10 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
CmdArgs.push_back("-sycl-std=2020");
}

if (!Args.hasFlag(options::OPT_fsycl_force_inline_kernel_lambda,
options::OPT_fno_sycl_force_inline_kernel_lambda, true))
CmdArgs.push_back("-fno-sycl-force-inline-kernel-lambda");

if (!Args.hasFlag(options::OPT_fsycl_unnamed_lambda,
options::OPT_fno_sycl_unnamed_lambda, true))
CmdArgs.push_back("-fno-sycl-unnamed-lambda");
Expand Down
12 changes: 12 additions & 0 deletions clang/lib/Sema/SemaSYCL.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -837,6 +837,18 @@ class SingleDeviceFunctionTracker {
CallGraphNode *KernelNode = Parent.getNodeForKernel(SYCLKernel);
llvm::SmallVector<FunctionDecl *> CallStack;
VisitCallNode(KernelNode, GetFDFromNode(KernelNode), CallStack);

// Always inline the KernelBody in the kernel entry point. For ESIMD
// inlining is handled later down the pipeline.
if (KernelBody &&
Parent.SemaRef.getLangOpts().SYCLForceInlineKernelLambda &&
!KernelBody->hasAttr<NoInlineAttr>() &&
!KernelBody->hasAttr<AlwaysInlineAttr>() &&
!KernelBody->hasAttr<SYCLSimdAttr>()) {
KernelBody->addAttr(AlwaysInlineAttr::CreateImplicit(
KernelBody->getASTContext(), {}, AttributeCommonInfo::AS_Keyword,
AlwaysInlineAttr::Keyword_forceinline));
}
}

public:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -triple spir64-unknown-unknown -disable-llvm-passes -sycl-std=2020 -opaque-pointers -emit-llvm -o - %s | FileCheck %s
// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -internal-isystem %S/Inputs -triple spir64-unknown-unknown -disable-llvm-passes -sycl-std=2020 -opaque-pointers -emit-llvm -o - %s | FileCheck %s

// Tests for IR of [[intel::scheduler_target_fmax_mhz()]], [[intel::num_simd_work_items()]],
// [[intel::no_global_work_offset()]], [[intel::max_global_work_dim()]], [[sycl::reqd_sub_group_size()]],
Expand Down
2 changes: 1 addition & 1 deletion clang/test/CodeGenSYCL/debug-info-srcpos-kernel.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// RUN: %clang -fsycl-device-only %s -S -emit-llvm -O0 -g -o - | FileCheck %s
// RUN: %clang -fno-sycl-force-inline-kernel-lambda -fsycl-device-only %s -S -emit-llvm -O0 -g -o - | FileCheck %s
//
// Verify the SYCL kernel routine is marked artificial and has the
// expected source correlation.
Expand Down
2 changes: 1 addition & 1 deletion clang/test/CodeGenSYCL/device-functions.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown -disable-llvm-passes -opaque-pointers -emit-llvm %s -o - | FileCheck %s
// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -triple spir64-unknown-unknown -disable-llvm-passes -opaque-pointers -emit-llvm %s -o - | FileCheck %s

template <typename T>
T bar(T arg);
Expand Down
2 changes: 1 addition & 1 deletion clang/test/CodeGenSYCL/device-variables.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown -disable-llvm-passes -opaque-pointers -emit-llvm %s -o - | FileCheck %s
// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -triple spir64-unknown-unknown -disable-llvm-passes -opaque-pointers -emit-llvm %s -o - | FileCheck %s

enum class test_type { value1, value2, value3 };

Expand Down
4 changes: 2 additions & 2 deletions clang/test/CodeGenSYCL/kernel-handler.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -triple nvptx64-unknown-unknown -disable-llvm-passes -opaque-pointers -emit-llvm -o - %s | FileCheck %s --check-prefixes=ALL,NONATIVESUPPORT
// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -triple spir64-unknown-unknown -disable-llvm-passes -opaque-pointers -emit-llvm -o - %s | FileCheck %s --check-prefixes=ALL,NATIVESUPPORT
// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -internal-isystem %S/Inputs -triple nvptx64-unknown-unknown -disable-llvm-passes -opaque-pointers -emit-llvm -o - %s | FileCheck %s --check-prefixes=ALL,NONATIVESUPPORT
// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -internal-isystem %S/Inputs -triple spir64-unknown-unknown -disable-llvm-passes -opaque-pointers -emit-llvm -o - %s | FileCheck %s --check-prefixes=ALL,NATIVESUPPORT

// This test checks IR generated when kernel_handler argument
// (used to handle SYCL 2020 specialization constants) is passed
Expand Down
2 changes: 1 addition & 1 deletion clang/test/CodeGenSYCL/kernel_binding_decls.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// RUN: %clang_cc1 -internal-isystem %S/Inputs -fsycl-is-device -std=c++20 -triple spir64-unknown-unknown -disable-llvm-passes -opaque-pointers -emit-llvm %s -o - | FileCheck %s
// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -internal-isystem %S/Inputs -fsycl-is-device -std=c++20 -triple spir64-unknown-unknown -disable-llvm-passes -opaque-pointers -emit-llvm %s -o - | FileCheck %s

#include "sycl.hpp"

Expand Down
2 changes: 1 addition & 1 deletion clang/test/CodeGenSYCL/max-concurrency.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -disable-llvm-passes -triple spir64-unknown-unknown -sycl-std=2020 -opaque-pointers -emit-llvm -o - %s | FileCheck %s
// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -internal-isystem %S/Inputs -disable-llvm-passes -triple spir64-unknown-unknown -sycl-std=2020 -opaque-pointers -emit-llvm -o - %s | FileCheck %s

#include "sycl.hpp"

Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// RUN: %clang_cc1 -internal-isystem %S/Inputs -fsycl-is-device -std=c++20 -triple spir64-unknown-unknown -disable-llvm-passes -no-opaque-pointers -emit-llvm %s -o - | FileCheck %s
// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -internal-isystem %S/Inputs -fsycl-is-device -std=c++20 -triple spir64-unknown-unknown -disable-llvm-passes -no-opaque-pointers -emit-llvm %s -o - | FileCheck %s

#include "sycl.hpp"

Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown -disable-llvm-passes -no-opaque-pointers -emit-llvm %s -o - | FileCheck %s
// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -triple spir64-unknown-unknown -disable-llvm-passes -no-opaque-pointers -emit-llvm %s -o - | FileCheck %s

// This test checks that compiler generates correct kernel wrapper for basic
// case.
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -triple spir64-unknown-unknown -disable-llvm-passes -sycl-std=2020 -no-opaque-pointers -emit-llvm -o - %s | FileCheck %s
// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -internal-isystem %S/Inputs -triple spir64-unknown-unknown -disable-llvm-passes -sycl-std=2020 -no-opaque-pointers -emit-llvm -o - %s | FileCheck %s

// Tests for IR of [[intel::scheduler_target_fmax_mhz()]], [[intel::num_simd_work_items()]],
// [[intel::no_global_work_offset()]], [[intel::max_global_work_dim()]], [[sycl::reqd_sub_group_size()]],
Expand Down
2 changes: 1 addition & 1 deletion clang/test/CodeGenSYCL/no_opaque_device-functions.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown -disable-llvm-passes -no-opaque-pointers -emit-llvm %s -o - | FileCheck %s
// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -triple spir64-unknown-unknown -disable-llvm-passes -no-opaque-pointers -emit-llvm %s -o - | FileCheck %s

template <typename T>
T bar(T arg);
Expand Down
2 changes: 1 addition & 1 deletion clang/test/CodeGenSYCL/no_opaque_device-variables.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown -disable-llvm-passes -no-opaque-pointers -emit-llvm %s -o - | FileCheck %s
// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -triple spir64-unknown-unknown -disable-llvm-passes -no-opaque-pointers -emit-llvm %s -o - | FileCheck %s

enum class test_type { value1, value2, value3 };

Expand Down
4 changes: 2 additions & 2 deletions clang/test/CodeGenSYCL/no_opaque_kernel-handler.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -triple nvptx64-unknown-unknown -disable-llvm-passes -no-opaque-pointers -emit-llvm -o - %s | FileCheck %s --check-prefixes=ALL,NONATIVESUPPORT
// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -triple spir64-unknown-unknown -disable-llvm-passes -no-opaque-pointers -emit-llvm -o - %s | FileCheck %s --check-prefixes=ALL,NATIVESUPPORT
// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -internal-isystem %S/Inputs -triple nvptx64-unknown-unknown -disable-llvm-passes -no-opaque-pointers -emit-llvm -o - %s | FileCheck %s --check-prefixes=ALL,NONATIVESUPPORT
// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -internal-isystem %S/Inputs -triple spir64-unknown-unknown -disable-llvm-passes -no-opaque-pointers -emit-llvm -o - %s | FileCheck %s --check-prefixes=ALL,NATIVESUPPORT

// This test checks IR generated when kernel_handler argument
// (used to handle SYCL 2020 specialization constants) is passed
Expand Down
2 changes: 1 addition & 1 deletion clang/test/CodeGenSYCL/no_opaque_max-concurrency.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -disable-llvm-passes -triple spir64-unknown-unknown -sycl-std=2020 -no-opaque-pointers -emit-llvm -o - %s | FileCheck %s
// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -internal-isystem %S/Inputs -disable-llvm-passes -triple spir64-unknown-unknown -sycl-std=2020 -no-opaque-pointers -emit-llvm -o - %s | FileCheck %s

#include "sycl.hpp"

Expand Down
2 changes: 1 addition & 1 deletion clang/test/CodeGenSYCL/no_opaque_sampler.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown -disable-llvm-passes -no-opaque-pointers -emit-llvm %s -o - | FileCheck --enable-var-scope %s
// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -triple spir64-unknown-unknown -disable-llvm-passes -no-opaque-pointers -emit-llvm %s -o - | FileCheck --enable-var-scope %s
// CHECK: define {{.*}}spir_kernel void @{{[a-zA-Z0-9_]+}}(%opencl.sampler_t addrspace(2)* [[SAMPLER_ARG:%[a-zA-Z0-9_]+]])
// CHECK-NEXT: entry:
// CHECK-NEXT: [[SAMPLER_ARG]].addr = alloca %opencl.sampler_t addrspace(2)*, align 8
Expand Down
2 changes: 1 addition & 1 deletion clang/test/CodeGenSYCL/no_opaque_spir-calling-conv.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown -disable-llvm-passes -no-opaque-pointers -emit-llvm %s -o - | FileCheck %s
// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -triple spir64-unknown-unknown -disable-llvm-passes -no-opaque-pointers -emit-llvm %s -o - | FileCheck %s

template <typename name, typename Func>
__attribute__((sycl_kernel)) void kernel_single_task(const Func &kernelFunc) {
Expand Down
2 changes: 1 addition & 1 deletion clang/test/CodeGenSYCL/no_opaque_spir-enum.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown -disable-llvm-passes -no-opaque-pointers -emit-llvm %s -o - | FileCheck %s
// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -triple spir64-unknown-unknown -disable-llvm-passes -no-opaque-pointers -emit-llvm %s -o - | FileCheck %s

template <typename name, typename Func>
__attribute__((sycl_kernel)) void kernel_single_task(const Func &kernelFunc) {
Expand Down
2 changes: 1 addition & 1 deletion clang/test/CodeGenSYCL/no_opaque_stall_enable_device.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -triple spir64-unknown-unknown -disable-llvm-passes -no-opaque-pointers -emit-llvm -o - %s | FileCheck %s
// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -internal-isystem %S/Inputs -triple spir64-unknown-unknown -disable-llvm-passes -no-opaque-pointers -emit-llvm -o - %s | FileCheck %s

// Tests for IR of Intel FPGA [[intel::use_stall_enable_clusters]] function attribute on Device.

Expand Down
2 changes: 1 addition & 1 deletion clang/test/CodeGenSYCL/no_opaque_union-kernel-param.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown -disable-llvm-passes -no-opaque-pointers -emit-llvm %s -o - | FileCheck %s
// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -triple spir64-unknown-unknown -disable-llvm-passes -no-opaque-pointers -emit-llvm %s -o - | FileCheck %s

// This test checks a kernel argument that is union with both array and non-array fields.

Expand Down
2 changes: 1 addition & 1 deletion clang/test/CodeGenSYCL/sampler.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown -disable-llvm-passes -opaque-pointers -emit-llvm %s -o - | FileCheck --enable-var-scope %s
// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -triple spir64-unknown-unknown -disable-llvm-passes -opaque-pointers -emit-llvm %s -o - | FileCheck --enable-var-scope %s
// CHECK: define {{.*}}spir_kernel void @{{[a-zA-Z0-9_]+}}(ptr addrspace(2) [[SAMPLER_ARG:%[a-zA-Z0-9_]+]])
// CHECK-NEXT: entry:
// CHECK-NEXT: [[SAMPLER_ARG]].addr = alloca ptr addrspace(2), align 8
Expand Down
2 changes: 1 addition & 1 deletion clang/test/CodeGenSYCL/spir-calling-conv.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown -disable-llvm-passes -opaque-pointers -emit-llvm %s -o - | FileCheck %s
// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -triple spir64-unknown-unknown -disable-llvm-passes -opaque-pointers -emit-llvm %s -o - | FileCheck %s

template <typename name, typename Func>
__attribute__((sycl_kernel)) void kernel_single_task(const Func &kernelFunc) {
Expand Down
2 changes: 1 addition & 1 deletion clang/test/CodeGenSYCL/spir-enum.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown -disable-llvm-passes -opaque-pointers -emit-llvm %s -o - | FileCheck %s
// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -triple spir64-unknown-unknown -disable-llvm-passes -opaque-pointers -emit-llvm %s -o - | FileCheck %s

template <typename name, typename Func>
__attribute__((sycl_kernel)) void kernel_single_task(const Func &kernelFunc) {
Expand Down
2 changes: 1 addition & 1 deletion clang/test/CodeGenSYCL/stall_enable_device.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -triple spir64-unknown-unknown -disable-llvm-passes -opaque-pointers -emit-llvm -o - %s | FileCheck %s
// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -internal-isystem %S/Inputs -triple spir64-unknown-unknown -disable-llvm-passes -opaque-pointers -emit-llvm -o - %s | FileCheck %s

// Tests for IR of Intel FPGA [[intel::use_stall_enable_clusters]] function attribute on Device.

Expand Down
2 changes: 1 addition & 1 deletion clang/test/CodeGenSYCL/sycl-device-static-init.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown -disable-llvm-passes %s -emit-llvm -o - | FileCheck %s
// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -triple spir64-unknown-unknown -disable-llvm-passes %s -emit-llvm -o - | FileCheck %s
// Test that static initializers do not force the emission of globals on sycl device

// CHECK-NOT: $_ZN8BaseInitI12TestBaseTypeE15s_regbase_ncsdmE = comdat any
Expand Down
2 changes: 1 addition & 1 deletion clang/test/CodeGenSYCL/union-kernel-param.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown -disable-llvm-passes -opaque-pointers -emit-llvm %s -o - | FileCheck %s
// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -triple spir64-unknown-unknown -disable-llvm-passes -opaque-pointers -emit-llvm %s -o - | FileCheck %s

// This test checks a kernel argument that is union with both array and non-array fields.

Expand Down
4 changes: 2 additions & 2 deletions clang/test/CodeGenSYCL/unique_stable_name_windows_diff.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
// RUN: %clang_cc1 -triple spir64-unknown-unknown -aux-triple x86_64-pc-windows-msvc -fsycl-is-device -disable-llvm-passes -fsycl-is-device -emit-llvm %s -o - | FileCheck %s
// RUN: %clang_cc1 -triple x86_64-pc-windows-msvc -fsycl-is-device -disable-llvm-passes -fsycl-is-device -emit-llvm %s -o - | FileCheck %s --check-prefixes=WIN,CHECK
// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -triple spir64-unknown-unknown -aux-triple x86_64-pc-windows-msvc -fsycl-is-device -disable-llvm-passes -fsycl-is-device -emit-llvm %s -o - | FileCheck %s
// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -triple x86_64-pc-windows-msvc -fsycl-is-device -disable-llvm-passes -fsycl-is-device -emit-llvm %s -o - | FileCheck %s --check-prefixes=WIN,CHECK

template<typename KN, typename Func>
__attribute__((sycl_kernel)) void kernel(Func F){
Expand Down
5 changes: 5 additions & 0 deletions clang/test/Driver/sycl.c
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,11 @@
// RUN: %clang_cl -### -fsycl-device-only -fno-sycl-unnamed-lambda %s 2>&1 | FileCheck %s --check-prefix=CHECK-NOT-LAMBDA
// CHECK-NOT-LAMBDA: "-fno-sycl-unnamed-lambda"

// -fsycl-force-inline-kernel-lambda
// RUN: %clangxx -### -fsycl-device-only -fno-sycl-force-inline-kernel-lambda %s 2>&1 | FileCheck %s --check-prefix=CHECK-NOT-INLINE
// RUN: %clang_cl -### -fsycl-device-only -fno-sycl-force-inline-kernel-lambda %s 2>&1 | FileCheck %s --check-prefix=CHECK-NOT-INLINE
// CHECK-NOT-INLINE: "-fno-sycl-force-inline-kernel-lambda"

/// -fsycl-device-only triple checks
// RUN: %clang -fsycl-device-only -target x86_64-unknown-linux-gnu -### %s 2>&1 \
// RUN: | FileCheck --check-prefix=DEVICE-64 %s
Expand Down
30 changes: 30 additions & 0 deletions clang/test/SemaSYCL/sycl-force-inline-kernel-lambda.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -internal-isystem %S/Inputs -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,CHECK-NO-INLINE
// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,CHECK-INLINE

#include "sycl.hpp"

int main() {
sycl::queue q;

// CHECK: define {{.*}} spir_kernel void @_ZTSZZ4mainENKUlRN4sycl3_V17handlerEE_clES2_E10KernelName()
//
// CHECK-NO-INLINE: call void @_ZZZ4mainENKUlRN4sycl3_V17handlerEE_clES2_ENKUlvE_clEv
// CHECK-INLINE-NOT: call void @_ZZZ4mainENKUlRN4sycl3_V17handlerEE_clES2_ENKUlvE_clEv
q.submit([&](sycl::handler &h) { h.parallel_for<class KernelName>([] {}); });


// CHECK: define {{.*}} spir_kernel void @_ZTSZZ4mainENKUlRN4sycl3_V17handlerEE0_clES2_E16KernelNameInline()
// CHECK-NOT: call void @_ZZZ4mainENKUlRN4sycl3_V17handlerEE0_clES2_ENKUlvE_clEv
q.submit([&](sycl::handler &h) { h.parallel_for<class KernelNameInline>([]() __attribute__((always_inline)) {}); });

// CHECK: define {{.*}} spir_kernel void @_ZTSZZ4mainENKUlRN4sycl3_V17handlerEE1_clES2_E18KernelNameNoInline()
// CHECK: call void @_ZZZ4mainENKUlRN4sycl3_V17handlerEE1_clES2_ENKUlvE_clEv
q.submit([&](sycl::handler &h) { h.parallel_for<class KernelNameNoInline>([]() __attribute__((noinline)) {}); });

/// The flag is ignored for ESIMD kernels
// CHECK: define {{.*}} spir_kernel void @_ZTSZZ4mainENKUlRN4sycl3_V17handlerEE2_clES2_E15KernelNameESIMD()
// CHECK: call void @_ZZZ4mainENKUlRN4sycl3_V17handlerEE2_clES2_ENKUlvE_clEv
q.submit([&](sycl::handler &h) { h.parallel_for<class KernelNameESIMD>([]() __attribute__((sycl_explicit_simd)) {}); });

return 0;
}
6 changes: 6 additions & 0 deletions sycl/doc/UsersManual.md
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,12 @@ and not recommended to use in production environment.
* nd_item class get_global_id()/get_global_linear_id() member functions
Enabled by default.

**`-f[no-]sycl-force-inline-kernel-lambda`**

Enables/Disables inlining of the kernel lambda operator into the compiler
generated entry point function. This flag does not apply to ESIMD
kernels.
Enabled by default.

**`-fgpu-inline-threshold=<n>`**

Expand Down