Skip to content

[clang][SYCL] Do not decompose SYCL functors unless necessary #18258

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 9 commits into
base: sycl
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion clang/include/clang/Basic/LangOptions.def
Original file line number Diff line number Diff line change
Expand Up @@ -311,7 +311,7 @@ ENUM_LANGOPT(SYCLVersion , SYCLMajorVersion, 2, SYCL_None, "Version of the SYCL
LANGOPT(DeclareSPIRVBuiltins, 1, 0, "Declare SPIR-V builtin functions")
LANGOPT(SYCLExplicitSIMD , 1, 0, "SYCL compilation with explicit SIMD extension")
LANGOPT(EnableDAEInSpirKernels , 1, 0, "Enable Dead Argument Elimination in SPIR kernels")
LANGOPT(SYCLDecomposeStruct, 1, 1, "Force top level decomposition of SYCL functor")
LANGOPT(SYCLDecomposeStruct, 1, 0, "Force top level decomposition of SYCL functor")
LANGOPT(
SYCLValueFitInMaxInt, 1, 1,
"SYCL compiler assumes value fits within MAX_INT for member function of "
Expand Down
4 changes: 2 additions & 2 deletions clang/include/clang/Driver/Options.td
Original file line number Diff line number Diff line change
Expand Up @@ -7184,11 +7184,11 @@ defm sycl_instrument_device_code
"(experimental)">>;
defm sycl_decompose_functor
: BoolFOption<"sycl-decompose-functor",
LangOpts<"SYCLDecomposeStruct">, DefaultTrue,
LangOpts<"SYCLDecomposeStruct">, DefaultFalse,
PosFlag<SetTrue, [], [ClangOption, CLOption], "Do">,
NegFlag<SetFalse, [], [ClangOption, CLOption], "Do not">,
BothFlags<[], [ClangOption, CLOption, CC1Option],
" decompose SYCL functor if possible (experimental, CUDA only)">>;
" decompose SYCL functor if possible (default is false)">>;
defm sycl_cuda_compat
: BoolFOption<"sycl-cuda-compatibility", LangOpts<"SYCLCUDACompat">, DefaultFalse,
PosFlag<SetTrue, [], [ClangOption, CLOption, CC1Option], "Enable CUDA compatibility mode (experimental). "
Expand Down
20 changes: 7 additions & 13 deletions clang/lib/Sema/SemaSYCL.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -318,13 +318,6 @@ ExprResult SemaSYCL::BuildSYCLBuiltinBaseTypeExpr(SourceLocation Loc,
SYCLBuiltinBaseTypeExpr(Loc, SourceTy, Idx, BaseTy);
}

/// Returns true if the target requires a new type.
/// This happens if a pointer to generic cannot be passed
static bool targetRequiresNewType(ASTContext &Context) {
llvm::Triple T = Context.getTargetInfo().getTriple();
return !T.isNVPTX();
}

// This information is from Section 4.13 of the SYCL spec
// https://www.khronos.org/registry/SYCL/specs/sycl-1.2.1.pdf
// This function returns false if the math lib function
Expand Down Expand Up @@ -1570,13 +1563,14 @@ class KernelObjVisitor {
public:
KernelObjVisitor(SemaSYCL &S) : SemaSYCLRef(S) {}

static bool useTopLevelKernelObj(const CXXRecordDecl *KernelObj) {
static bool useTopLevelKernelObj(SemaSYCL &SemaSYCLRef,
const CXXRecordDecl *KernelObj) {
// If the kernel is empty, "decompose" it so we don't generate arguments.
if (KernelObj->isEmpty())
return false;
// FIXME: Workaround to not change large number of tests
// this is covered by the test below.
if (targetRequiresNewType(KernelObj->getASTContext()))
if (SemaSYCLRef.getLangOpts().SYCLDecomposeStruct)
return false;
if (KernelObj->hasAttr<SYCLRequiresDecompositionAttr>() ||
KernelObj->hasAttr<SYCLGenerateNewTypeAttr>())
Expand Down Expand Up @@ -1615,7 +1609,7 @@ class KernelObjVisitor {
template <typename... HandlerTys>
void VisitKernelRecord(const CXXRecordDecl *KernelObj,
QualType KernelFunctorTy, HandlerTys &...Handlers) {
if (!useTopLevelKernelObj(KernelObj)) {
if (!useTopLevelKernelObj(SemaSYCLRef, KernelObj)) {
VisitRecordBases(KernelObj, Handlers...);
VisitRecordFields(KernelObj, Handlers...);
} else {
Expand Down Expand Up @@ -2297,12 +2291,12 @@ class SyclKernelDecompMarker : public SyclKernelFieldHandler {
}

bool handlePointerType(FieldDecl *, QualType) final {
PointerStack.back() = targetRequiresNewType(SemaSYCLRef.getASTContext());
PointerStack.back() = SemaSYCLRef.getLangOpts().SYCLDecomposeStruct;
return true;
}

bool handlePointerType(ParmVarDecl *, QualType) final {
PointerStack.back() = targetRequiresNewType(SemaSYCLRef.getASTContext());
PointerStack.back() = SemaSYCLRef.getLangOpts().SYCLDecomposeStruct;
return true;
}

Expand Down Expand Up @@ -4149,7 +4143,7 @@ class SyclKernelBodyCreator : public SyclKernelFieldHandler {
FunctionDecl *KernelCallerFunc, bool IsSIMDKernel,
CXXMethodDecl *CallOperator)
: SyclKernelFieldHandler(S),
UseTopLevelKernelObj(KernelObjVisitor::useTopLevelKernelObj(KernelObj)),
UseTopLevelKernelObj(KernelObjVisitor::useTopLevelKernelObj(S, KernelObj)),
DeclCreator(DC),
KernelObjClone(UseTopLevelKernelObj
? nullptr
Expand Down
8 changes: 3 additions & 5 deletions clang/test/CodeGenSYCL/bool-kernel-argument.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,9 @@ int main() {
bool test = false;
sycl::queue q;

// CHECK: @_ZTSZZ4mainENKUlRN4sycl3_V17handlerEE_clES2_E11test_kernel(i8 {{.*}} [[ARG:%[A-Za-z_0-9]*]]
// CHECK: %__SYCLKernel = alloca
// CHECK: %test = getelementptr inbounds nuw %class.anon, ptr addrspace(4) %__SYCLKernel.ascast
// CHECK: store i8 %{{.*}}, ptr addrspace(4) %test
// CHECK: call spir_func void @_ZZZ4mainENKUlRN4sycl3_V17handlerEE_clES2_ENKUlvE_clEv
// CHECK: @_ZTSZZ4mainENKUlRN4sycl3_V17handlerEE_clES2_E11test_kernel(ptr {{.*}} [[ARG:%[A-Za-z_0-9]*]]
// CHECK: [[ARG_CAST:%[A-Za-z_0-9.]*]] = addrspacecast ptr [[ARG]] to ptr addrspace(4)
// CHECK: call spir_func void @_ZZZ4mainENKUlRN4sycl3_V17handlerEE_clES2_ENKUlvE_clEv({{.*}} [[ARG_CAST]])
//
// CHECK: define {{.*}} @_Z9take_boolb(i1
q.submit([&](sycl::handler &h) {
Expand Down
10 changes: 3 additions & 7 deletions clang/test/CodeGenSYCL/free_function_kernel_params.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// RUN: %clang_cc1 -internal-isystem %S/Inputs -fsycl-is-device -triple spir64 \
// RUN: %clang_cc1 -internal-isystem %S/Inputs -fsycl-is-device -fsycl-decompose-functor -triple spir64 \
// RUN: -emit-llvm %s -o - | FileCheck %s
// This test checks parameter IR generation for free functions with parameters
// of non-decomposed struct type, work group memory type, dynamic work group memory type
Expand Down Expand Up @@ -50,13 +50,9 @@ template void ff_6(KArgWithPtrArray<TestArrSize> KArg);
// CHECK: %struct.NoPointers = type { i32 }
// CHECK: %struct.Pointers = type { ptr addrspace(4), ptr addrspace(4) }
// CHECK: %struct.Agg = type { %struct.NoPointers, i32, ptr addrspace(4), %struct.Pointers }
// CHECK: %struct.__generated_Pointers = type { ptr addrspace(1), ptr addrspace(1) }
// CHECK: %struct.__generated_Agg = type { %struct.NoPointers, i32, ptr addrspace(1), %struct.__generated_Pointers.4 }
// CHECK: %struct.__generated_Pointers.4 = type { ptr addrspace(1), ptr addrspace(1) }
// CHECK: %struct.__generated_KArgWithPtrArray = type { [3 x ptr addrspace(1)], [3 x i32], [3 x i32] }
// CHECK: %struct.KArgWithPtrArray = type { [3 x ptr addrspace(4)], [3 x i32], [3 x i32] }
// CHECK: define dso_local spir_kernel void @{{.*}}__sycl_kernel{{.*}}(ptr noundef byval(%struct.NoPointers) align 4 %__arg_S1, ptr noundef byval(%struct.__generated_Pointers) align 8 %__arg_S2, ptr noundef byval(%struct.__generated_Agg) align 8 %__arg_S3)
// CHECK: define dso_local spir_kernel void @{{.*}}__sycl_kernel_ff_6{{.*}}(ptr noundef byval(%struct.__generated_KArgWithPtrArray) align 8 %__arg_KArg)
// CHECK: define dso_local spir_kernel void @{{.*}}__sycl_kernel{{.*}}(ptr noundef byval(%struct.NoPointers) align 4 %__arg_S1, ptr noundef byval(%struct.Pointers) align 8 %__arg_S2, ptr noundef byval(%struct.Agg) align 8 %__arg_S3)
// CHECK: define dso_local spir_kernel void @{{.*}}__sycl_kernel_ff_6{{.*}}(ptr noundef byval(%struct.KArgWithPtrArray) align 8 %__arg_KArg)

__attribute__((sycl_device))
[[__sycl_detail__::add_ir_attributes_function("sycl-nd-range-kernel", 0)]]
Expand Down
30 changes: 5 additions & 25 deletions clang/test/CodeGenSYCL/generated-types-initialization.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,34 +38,14 @@ int main() {
});
return 0;
}
// CHECK: define dso_local spir_kernel void @{{.*}}basic(ptr noundef byval(%struct.__generated_B) align 8 %_arg_Obj)
//
// Kernel object clone.
// CHECK: %[[K:[a-zA-Z0-9_.]+]] = alloca %class.anon
// CHECK: %[[K_as_cast:[a-zA-Z0-9_.]+]] = addrspacecast ptr %[[K]] to ptr addrspace(4)
//
// Argument reference.
// CHECK: %[[Arg_ref:[a-zA-Z0-9_.]+]] = addrspacecast ptr %_arg_Obj to ptr addrspace(4)
//
// Initialization.
// CHECK: %[[GEP:[a-zA-Z0-9_.]+]] = getelementptr inbounds nuw %class.anon, ptr addrspace(4) %[[K_as_cast]], i32 0, i32 0
// CHECK: call void @llvm.memcpy.p4.p4.i64(ptr addrspace(4) align 8 %[[GEP]], ptr addrspace(4) align 8 %[[Arg_ref]], i64 16, i1 false)
// CHECK: define dso_local spir_kernel void @{{.*}}basic(ptr noundef byval(%class.anon) align 8 %_arg__sycl_functor)
//
// Kernel body call.
// CHECK: call spir_func void @_ZZZ4mainENKUlRN4sycl3_V17handlerEE_clES2_ENKUlvE_clEv(ptr addrspace(4) noundef align 8 dereferenceable_or_null(16) %[[K_as_cast]])
// CHECK: %[[Obj_as_cast:[a-zA-Z0-9_.]+]] = addrspacecast ptr %_arg__sycl_functor to ptr addrspace(4)
// CHECK: call spir_func void @_ZZZ4mainENKUlRN4sycl3_V17handlerEE_clES2_ENKUlvE_clEv(ptr addrspace(4) noundef align 8 dereferenceable_or_null(16) %[[Obj_as_cast]])

// CHECK: define dso_local spir_kernel void @{{.*}}nns(ptr noundef byval(%struct.__generated_B.0) align 8 %_arg_NNSObj)
//
// Kernel object clone.
// CHECK: %[[NNSK:[a-zA-Z0-9_.]+]] = alloca %class.anon.2
// CHECK: %[[NNSK_as_cast:[a-zA-Z0-9_.]+]] = addrspacecast ptr %[[NNSK]] to ptr addrspace(4)
//
// Argument reference.
// CHECK: %[[NNSArg_ref:[a-zA-Z0-9_.]+]] = addrspacecast ptr %_arg_NNSObj to ptr addrspace(4)
//
// Initialization.
// CHECK: %[[NNSGEP:[a-zA-Z0-9_.]+]] = getelementptr inbounds nuw %class.anon.2, ptr addrspace(4) %[[NNSK_as_cast]], i32 0, i32 0
// CHECK: call void @llvm.memcpy.p4.p4.i64(ptr addrspace(4) align 8 %[[NNSGEP]], ptr addrspace(4) align 8 %[[NNSArg_ref]], i64 16, i1 false)
// CHECK: define dso_local spir_kernel void @{{.*}}nns(ptr noundef byval(%class.anon.0) align 8 %_arg__sycl_functor)
//
// Kernel body call.
// CHECK: %[[NNSK_as_cast:[a-zA-Z0-9_.]+]] = addrspacecast ptr %_arg__sycl_functor to ptr addrspace(4)
// CHECK: call spir_func void @_ZZZ4mainENKUlRN4sycl3_V17handlerEE0_clES2_ENKUlvE_clEv(ptr addrspace(4) noundef align 8 dereferenceable_or_null(16) %[[NNSK_as_cast]])
4 changes: 2 additions & 2 deletions clang/test/CodeGenSYCL/infer-address-spaces.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,5 +17,5 @@ void foo(const float *usm_in, float* usm_out) {

// No addrspacecast before loading and storing values
// CHECK-NOT: addrspacecast
// CHECK: [[VAL:%.*]] = load float, ptr addrspace(1)
// CHECK: store float [[VAL]], ptr addrspace(1)
// CHECK: [[VAL:%.*]] = load float, ptr addrspace(4)
// CHECK: store float [[VAL]], ptr addrspace(4)
2 changes: 1 addition & 1 deletion clang/test/CodeGenSYCL/inheritance.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown -disable-llvm-passes -emit-llvm %s -o - | FileCheck %s
// RUN: %clang_cc1 -fsycl-is-device -fsycl-decompose-functor -triple spir64-unknown-unknown -disable-llvm-passes -emit-llvm %s -o - | FileCheck %s

#include "Inputs/sycl.hpp"

Expand Down
2 changes: 1 addition & 1 deletion clang/test/CodeGenSYCL/intel-restrict.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// RUN: %clang_cc1 -fsycl-is-device %s -emit-llvm -triple spir64-unknown-unknown -o - | FileCheck %s
// RUN: %clang_cc1 -fsycl-is-device -fsycl-decompose-functor %s -emit-llvm -triple spir64-unknown-unknown -o - | FileCheck %s

template <typename name, typename Func>
__attribute__((sycl_kernel)) void kernel(const Func &kernelFunc) {
Expand Down
5 changes: 2 additions & 3 deletions clang/test/CodeGenSYCL/kernel-arg-accessor-pointer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -129,14 +129,13 @@ int main() {

// Check kernel_C parameters
// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_C
// CHECK-SAME: i32 noundef [[MEM_ARG1:%[a-zA-Z0-9_]+]]
// CHECK-SAME: ptr noundef byval(%class.anon.3) align 4 [[MEM_ARG1:%[a-zA-Z0-9_]+]]
// CHECK-NOT: kernel_arg_runtime_aligned
// CHECK-NOT: kernel_arg_exclusive_ptr

// Check usm_ptr parameters
// CHECK: define {{.*}}spir_kernel void @{{.*}}usm_ptr
// CHECK-SAME: ptr addrspace(1) noundef align 4 [[MEM_ARG1:%[a-zA-Z0-9_]+]],
// CHECK-SAME: ptr addrspace(1) noundef align 4 [[MEM_ARG1:%[a-zA-Z0-9_]+]]
// CHECK-SAME: ptr noundef byval(%class.anon.4) align 8 [[MEM_ARG1:%[a-zA-Z0-9_]+]]
// CHECK-NOT: kernel_arg_runtime_aligned
// CHECK-NOT: kernel_arg_exclusive_ptr

Expand Down
3 changes: 2 additions & 1 deletion clang/test/CodeGenSYCL/kernel-device-space-arg.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown -emit-llvm %s -disable-llvm-passes -o - | FileCheck %s

// CHECK: define {{.*}}spir_kernel void @_ZTSZ4mainE15kernel_function(ptr addrspace(5) {{.*}} ptr addrspace(6) {{.*}}
// CHECK: %class.anon = type { ptr addrspace(5), ptr addrspace(6) }
// CHECK: define {{.*}}spir_kernel void @_ZTSZ4mainE15kernel_function(ptr noundef byval(%class.anon) {{.*}}

#include "Inputs/sycl.hpp"

Expand Down
4 changes: 2 additions & 2 deletions clang/test/CodeGenSYCL/kernel-handler.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fno-sycl-decompose-functor -fsycl-is-device -internal-isystem %S/Inputs -triple nvptx64-unknown-unknown -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s --check-prefixes=ALL,NONATIVESUPPORT
// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -internal-isystem %S/Inputs -triple spir64-unknown-unknown -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s --check-prefixes=ALL,NATIVESUPPORT
// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -internal-isystem %S/Inputs -triple nvptx64-unknown-unknown -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s --check-prefixes=ALL,NONATIVESUPPORT
// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-decompose-functor -fsycl-is-device -internal-isystem %S/Inputs -triple spir64-unknown-unknown -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s --check-prefixes=ALL,NATIVESUPPORT

// This test checks IR generated when kernel_handler argument
// (used to handle SYCL 2020 specialization constants) is passed
Expand Down
82 changes: 3 additions & 79 deletions clang/test/CodeGenSYCL/kernel-param-pod-array.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,88 +46,12 @@ int main() {

// Check kernel_B parameters
// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_B
// CHECK-SAME:(ptr noundef byval(%struct{{.*}}.__wrapper_class) align 4 %[[ARR_ARG:.*]])

// Check local lambda object alloca
// CHECK: %[[LOCAL_OBJECTA:[a-zA-Z0-9_]+]] = alloca %class{{.*}}.anon, align 4
// CHECK: %[[LOCAL_OBJECT:[a-zA-Z0-9_.]+]] = addrspacecast ptr %[[LOCAL_OBJECTA]] to ptr addrspace(4)

// Check for Array init loop
// CHECK: %[[LAMBDA_PTR:.+]] = getelementptr inbounds nuw %class{{.*}}.anon, ptr addrspace(4) %[[LOCAL_OBJECT]], i32 0, i32 0
// CHECK: %[[WRAPPER_PTR:.+]] = getelementptr inbounds nuw %struct{{.*}}.__wrapper_class, ptr addrspace(4) %[[ARR_ARG]].ascast, i32 0, i32 0
// CHECK: %[[ARRAY_BEGIN:.+]] = getelementptr inbounds [2 x i32], ptr addrspace(4) %[[LAMBDA_PTR]], i64 0, i64 0
// CHECK: br label %[[ARRAYINITBODY:.+]]

// The loop body itself
// CHECK: [[ARRAYINITBODY]]:
// CHECK: %[[ARRAYINDEX:.+]] = phi i64 [ 0, %{{.*}} ], [ %[[NEXTINDEX:.+]], %[[ARRAYINITBODY]] ]
// CHECK: %[[TARG_ARRAY_ELEM:.+]] = getelementptr inbounds i32, ptr addrspace(4) %[[ARRAY_BEGIN]], i64 %[[ARRAYINDEX]]
// CHECK: %[[SRC_ELEM:.+]] = getelementptr inbounds nuw [2 x i32], ptr addrspace(4) %[[WRAPPER_PTR]], i64 0, i64 %[[ARRAYINDEX]]
// CHECK: %[[SRC_VAL:.+]] = load i32, ptr addrspace(4) %[[SRC_ELEM]]
// CHECK: store i32 %[[SRC_VAL]], ptr addrspace(4) %[[TARG_ARRAY_ELEM]]
// CHECK: %[[NEXTINDEX]] = add nuw i64 %[[ARRAYINDEX]], 1
// CHECK: %[[ISDONE:.+]] = icmp eq i64 %[[NEXTINDEX]], 2
// CHECK: br i1 %[[ISDONE]], label %{{.*}}, label %[[ARRAYINITBODY]]
// CHECK-SAME:(ptr noundef byval(%class.anon) align 4 %[[ARR_ARG:.*]])

// Check kernel_C parameters
// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_C
// CHECK-SAME:(ptr noundef byval(%struct{{.*}}.__wrapper_class{{.*}}) align 4 %[[ARR_ARG:.*]])

// Check local lambda object alloca
// CHECK: %[[LOCAL_OBJECTA:[a-zA-Z0-9_]+]] = alloca %class{{.*}}.anon{{.*}}, align 4
// CHECK: %[[LOCAL_OBJECT:[a-zA-Z0-9_.]+]] = addrspacecast ptr %[[LOCAL_OBJECTA]] to ptr addrspace(4)

// Check for Array init loop
// CHECK: %[[LAMBDA_PTR:.+]] = getelementptr inbounds nuw %class{{.*}}.anon{{.*}}, ptr addrspace(4) %[[LOCAL_OBJECT]], i32 0, i32 0
// CHECK: %[[WRAPPER_PTR:.+]] = getelementptr inbounds nuw %struct{{.*}}.__wrapper_class{{.*}}, ptr addrspace(4) %[[ARR_ARG]].ascast, i32 0, i32 0
// CHECK: %[[ARRAY_BEGIN:.+]] = getelementptr inbounds [2 x %struct{{.*}}.foo], ptr addrspace(4) %[[LAMBDA_PTR]], i64 0, i64 0
// CHECK: br label %[[ARRAYINITBODY:.+]]

// The loop body itself
// CHECK: [[ARRAYINITBODY]]:
// CHECK: %[[ARRAYINDEX:.+]] = phi i64 [ 0, %{{.*}} ], [ %[[NEXTINDEX:.+]], %[[ARRAYINITBODY]] ]
// CHECK: %[[TARG_ARRAY_ELEM:.+]] = getelementptr inbounds %struct{{.*}}.foo, ptr addrspace(4) %[[ARRAY_BEGIN]], i64 %[[ARRAYINDEX]]
// CHECK: %[[SRC_ELEM:.+]] = getelementptr inbounds nuw [2 x %struct{{.*}}.foo], ptr addrspace(4) %[[WRAPPER_PTR]], i64 0, i64 %[[ARRAYINDEX]]
// call void @llvm.memcpy.p4.p4.i64(ptr addrspace(4) align 4 %[[TARG_ARRAY_ELEM]], ptr addrspace(4) align %[[SRC_ELEM]], i64 24, i1 false)
// CHECK: %[[NEXTINDEX]] = add nuw i64 %[[ARRAYINDEX]], 1
// CHECK: %[[ISDONE:.+]] = icmp eq i64 %[[NEXTINDEX]], 2
// CHECK: br i1 %[[ISDONE]], label %{{.*}}, label %[[ARRAYINITBODY]]
// CHECK-SAME:(ptr noundef byval(%class.anon.0) align 4 %[[ARR_ARG:.*]])

// Check kernel_D parameters
// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_D
// CHECK-SAME:(ptr noundef byval(%struct{{.*}}.__wrapper_class{{.*}}) align 4 %[[ARR_ARG:.*]])

// Check local lambda object alloca
// CHECK: %[[LOCAL_OBJECTA:[a-zA-Z0-9_]+]] = alloca %class{{.*}}.anon{{.*}}, align 4
// CHECK: %[[LOCAL_OBJECT:[a-zA-Z0-9_.]+]] = addrspacecast ptr %[[LOCAL_OBJECTA]] to ptr addrspace(4)

// Check for Array init loop
// CHECK: %[[LAMBDA_PTR:.+]] = getelementptr inbounds nuw %class{{.*}}.anon{{.*}}, ptr addrspace(4) %[[LOCAL_OBJECT]], i32 0, i32 0
// CHECK: %[[WRAPPER_PTR:.+]] = getelementptr inbounds nuw %struct{{.*}}.__wrapper_class{{.*}}, ptr addrspace(4) %[[ARR_ARG]].ascast, i32 0, i32 0
// CHECK: %[[ARRAY_BEGIN:.+]] = getelementptr inbounds [2 x [1 x i32]], ptr addrspace(4) %[[LAMBDA_PTR]], i64 0, i64 0
// CHECK: br label %[[ARRAYINITBODY:.+]]

// Check Outer loop.
// CHECK: [[ARRAYINITBODY]]:
// CHECK: %[[ARRAYINDEX:.+]] = phi i64 [ 0, %{{.*}} ], [ %[[NEXTINDEX:.+]], %[[ARRAYINITEND:.+]] ]
// CHECK: %[[TARG_OUTER_ELEM:.+]] = getelementptr inbounds [1 x i32], ptr addrspace(4) %[[ARRAY_BEGIN]], i64 %[[ARRAYINDEX]]
// CHECK: %[[SRC_OUTER_ELEM:.+]] = getelementptr inbounds nuw [2 x [1 x i32]], ptr addrspace(4) %[[WRAPPER_PTR]], i64 0, i64 %[[ARRAYINDEX]]
// CHECK: %[[ARRAY_BEGIN_INNER:.+]] = getelementptr inbounds [1 x i32], ptr addrspace(4) %[[TARG_OUTER_ELEM]], i64 0, i64 0
// CHECK: br label %[[ARRAYINITBODY_INNER:.+]]

// Check Inner Loop
// CHECK: [[ARRAYINITBODY_INNER]]:
// CHECK: %[[ARRAYINDEX_INNER:.+]] = phi i64 [ 0, %{{.*}} ], [ %[[NEXTINDEX_INNER:.+]], %[[ARRAYINITBODY_INNER:.+]] ]
// CHECK: %[[TARG_INNER_ELEM:.+]] = getelementptr inbounds i32, ptr addrspace(4) %[[ARRAY_BEGIN_INNER]], i64 %[[ARRAYINDEX_INNER]]
// CHECK: %[[SRC_INNER_ELEM:.+]] = getelementptr inbounds nuw [1 x i32], ptr addrspace(4) %[[SRC_OUTER_ELEM]], i64 0, i64 %[[ARRAYINDEX_INNER]]
// CHECK: %[[SRC_LOAD:.+]] = load i32, ptr addrspace(4) %[[SRC_INNER_ELEM]]
// CHECK: store i32 %[[SRC_LOAD]], ptr addrspace(4) %[[TARG_INNER_ELEM]]
// CHECK: %[[NEXTINDEX_INNER]] = add nuw i64 %[[ARRAYINDEX_INNER]], 1
// CHECK: %[[ISDONE_INNER:.+]] = icmp eq i64 %[[NEXTINDEX_INNER]], 1
// CHECK: br i1 %[[ISDONE_INNER]], label %[[ARRAYINITEND]], label %[[ARRAYINITBODY_INNER]]

// Check Inner loop 'end'
// CHECK: [[ARRAYINITEND]]:
// CHECK: %[[NEXTINDEX]] = add nuw i64 %[[ARRAYINDEX]], 1
// CHECK: %[[ISDONE:.+]] = icmp eq i64 %[[NEXTINDEX]], 2
// CHECK: br i1 %[[ISDONE]], label %{{.*}}, label %[[ARRAYINITBODY]]
// CHECK-SAME:(ptr noundef byval(%class.anon.1) align 4 %[[ARR_ARG:.*]])
Loading
Loading