From 7d9b5f5a342c0ec588ff9d9b2e6f30d0cc3c4204 Mon Sep 17 00:00:00 2001 From: Mariya Podchishchaeva Date: Thu, 13 Oct 2022 02:12:55 +0200 Subject: [PATCH] [SYCL] Fix crash due to incorrect `ReinterpretCastExpr` generation (#7030) When creating `ReinterpretCastExpr` use `getTrivialTypeSourceInfo` instead of `CreateTypeSourceInfo` since `CreateTypeSourceInfo` doesn't initialize underlying memory. The crash appeared during recursive AST visitors walks if kernel argument type had a nested name specifier. --- clang/lib/Sema/SemaSYCL.cpp | 2 +- .../generated-types-initialization.cpp | 71 +++++++++++++++++ ...-opaque-generated-types-initialization.cpp | 77 +++++++++++++++++++ .../SemaSYCL/built-in-type-kernel-arg.cpp | 30 ++++++++ clang/test/SemaSYCL/decomposition.cpp | 10 +++ 5 files changed, 189 insertions(+), 1 deletion(-) create mode 100644 clang/test/CodeGenSYCL/generated-types-initialization.cpp create mode 100644 clang/test/CodeGenSYCL/no-opaque-generated-types-initialization.cpp diff --git a/clang/lib/Sema/SemaSYCL.cpp b/clang/lib/Sema/SemaSYCL.cpp index 81c8867373e19..73a61582f20f5 100644 --- a/clang/lib/Sema/SemaSYCL.cpp +++ b/clang/lib/Sema/SemaSYCL.cpp @@ -2959,7 +2959,7 @@ class SyclKernelBodyCreator : public SyclKernelFieldHandler { Expr *createReinterpretCastExpr(Expr *E, QualType To) { return CXXReinterpretCastExpr::Create( SemaRef.Context, To, VK_PRValue, CK_BitCast, E, - /*Path=*/nullptr, SemaRef.Context.CreateTypeSourceInfo(To), + /*Path=*/nullptr, SemaRef.Context.getTrivialTypeSourceInfo(To), SourceLocation(), SourceLocation(), SourceRange()); } diff --git a/clang/test/CodeGenSYCL/generated-types-initialization.cpp b/clang/test/CodeGenSYCL/generated-types-initialization.cpp new file mode 100644 index 0000000000000..b6e3e160ac2dd --- /dev/null +++ b/clang/test/CodeGenSYCL/generated-types-initialization.cpp @@ -0,0 +1,71 @@ +// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -internal-isystem %S/Inputs -triple spir64-unknown-unknown -disable-llvm-passes -opaque-pointers -emit-llvm %s -o - | FileCheck %s + +// This test checks that compiler generates correct code when kernel arguments +// are structs that contain pointers but not decomposed. + +#include "sycl.hpp" + +struct A { + float *F; +}; + +struct B { + int *F1; + A F3; + B(int *I, A AA) : F1(I), F3(AA) {}; +}; + +struct Nested { + typedef B TDA; +}; + +int main() { + sycl::queue q; + B Obj{nullptr, {nullptr}}; + + q.submit([&](sycl::handler &h) { + h.single_task( + [=]() { + (void)Obj; + }); + }); + + Nested::TDA NNSObj{nullptr, {nullptr}}; + q.submit([&](sycl::handler &h) { + h.single_task([=]() { + (void)NNSObj; + }); + }); + return 0; +} +// CHECK: define dso_local spir_kernel void @{{.*}}basic(ptr noundef byval(%struct.__generated_B) align 8 %_arg_Obj) +// +// Kernel object clone. +// CHECK: %[[K:[a-zA-Z0-9_.]+]] = alloca %class.anon +// CHECK: %[[K_as_cast:[a-zA-Z0-9_.]+]] = addrspacecast ptr %[[K]] to ptr addrspace(4) +// +// Argument reference. +// CHECK: %[[Arg_ref:[a-zA-Z0-9_.]+]] = addrspacecast ptr %_arg_Obj to ptr addrspace(4) +// +// Initialization. +// CHECK: %[[GEP:[a-zA-Z0-9_.]+]] = getelementptr inbounds %class.anon, ptr addrspace(4) %[[K_as_cast]], i32 0, i32 0 +// CHECK: call void @llvm.memcpy.p4.p4.i64(ptr addrspace(4) align 8 %[[GEP]], ptr addrspace(4) align 8 %[[Arg_ref]], i64 16, i1 false) +// +// Kernel body call. +// CHECK: call spir_func void @_ZZZ4mainENKUlRN4sycl3_V17handlerEE_clES2_ENKUlvE_clEv(ptr addrspace(4) noundef align 8 dereferenceable_or_null(16) %[[K_as_cast]]) + +// CHECK: define dso_local spir_kernel void @{{.*}}nns(ptr noundef byval(%struct.__generated_B.0) align 8 %_arg_NNSObj) +// +// Kernel object clone. +// CHECK: %[[NNSK:[a-zA-Z0-9_.]+]] = alloca %class.anon.2 +// CHECK: %[[NNSK_as_cast:[a-zA-Z0-9_.]+]] = addrspacecast ptr %[[NNSK]] to ptr addrspace(4) +// +// Argument reference. +// CHECK: %[[NNSArg_ref:[a-zA-Z0-9_.]+]] = addrspacecast ptr %_arg_NNSObj to ptr addrspace(4) +// +// Initialization. +// CHECK: %[[NNSGEP:[a-zA-Z0-9_.]+]] = getelementptr inbounds %class.anon.2, ptr addrspace(4) %[[NNSK_as_cast]], i32 0, i32 0 +// CHECK: call void @llvm.memcpy.p4.p4.i64(ptr addrspace(4) align 8 %[[NNSGEP]], ptr addrspace(4) align 8 %[[NNSArg_ref]], i64 16, i1 false) +// +// Kernel body call. +// CHECK: call spir_func void @_ZZZ4mainENKUlRN4sycl3_V17handlerEE0_clES2_ENKUlvE_clEv(ptr addrspace(4) noundef align 8 dereferenceable_or_null(16) %[[NNSK_as_cast]]) diff --git a/clang/test/CodeGenSYCL/no-opaque-generated-types-initialization.cpp b/clang/test/CodeGenSYCL/no-opaque-generated-types-initialization.cpp new file mode 100644 index 0000000000000..323688db8dc45 --- /dev/null +++ b/clang/test/CodeGenSYCL/no-opaque-generated-types-initialization.cpp @@ -0,0 +1,77 @@ +// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -internal-isystem %S/Inputs -triple spir64-unknown-unknown -disable-llvm-passes -no-opaque-pointers -emit-llvm %s -o - | FileCheck %s + +// This test checks that compiler generates correct code when kernel arguments +// are structs that contain pointers but not decomposed. + +#include "sycl.hpp" + +struct A { + float *F; +}; + +struct B { + int *F1; + A F3; + B(int *I, A AA) : F1(I), F3(AA) {}; +}; + +struct Nested { + typedef B TDA; +}; + +int main() { + sycl::queue q; + B Obj{nullptr, {nullptr}}; + + q.submit([&](sycl::handler &h) { + h.single_task( + [=]() { + (void)Obj; + }); + }); + + Nested::TDA NNSObj{nullptr, {nullptr}}; + q.submit([&](sycl::handler &h) { + h.single_task([=]() { + (void)NNSObj; + }); + }); + return 0; +} +// CHECK: define dso_local spir_kernel void @{{.*}}basic(%struct.__generated_B* noundef byval(%struct.__generated_B) align 8 %_arg_Obj) +// +// Kernel object clone. +// CHECK: %[[K:[a-zA-Z0-9_.]+]] = alloca %class.anon +// CHECK: %[[K_as_cast:[a-zA-Z0-9_.]+]] = addrspacecast %class.anon* %[[K]] to %class.anon addrspace(4)* +// +// Argument reference. +// CHECK: %[[Arg_ref:[a-zA-Z0-9_.]+]] = addrspacecast %struct.__generated_B* %_arg_Obj to %struct.__generated_B addrspace(4)* + +// Initialization. +// CHECK: %[[GEP:[a-zA-Z0-9_.]+]] = getelementptr inbounds %class.anon, %class.anon addrspace(4)* %[[K_as_cast]], i32 0, i32 0 +// CHECK: %[[ArgBC:[a-zA-Z0-9_.]+]] = bitcast %struct.__generated_B addrspace(4)* %[[Arg_ref]] to %struct.B addrspace(4)* +// CHECK: %[[GEPBC:[a-zA-Z0-9_.]+]] = bitcast %struct.B addrspace(4)* %[[GEP]] to i8 addrspace(4)* +// CHECK: %[[ArgBC2:[a-zA-Z0-9_.]+]] = bitcast %struct.B addrspace(4)* %[[ArgBC]] to i8 addrspace(4)* +// CHECK: call void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* align 8 %[[GEPBC]], i8 addrspace(4)* align 8 %[[ArgBC2]], i64 16, i1 false) +// +// Kernel body call. +// CHECK: call spir_func void @_ZZZ4mainENKUlRN4sycl3_V17handlerEE_clES2_ENKUlvE_clEv(%class.anon addrspace(4)* noundef align 8 dereferenceable_or_null(16) %[[K_as_cast]]) + +// CHECK: define dso_local spir_kernel void @{{.*}}nns(%struct.__generated_B.0* noundef byval(%struct.__generated_B.0) align 8 %_arg_NNSObj) +// +// Kernel object clone. +// CHECK: %[[NNSK:[a-zA-Z0-9_.]+]] = alloca %class.anon.2 +// CHECK: %[[NNSK_as_cast:[a-zA-Z0-9_.]+]] = addrspacecast %class.anon.2* %[[NNSK]] to %class.anon.2 addrspace(4)* +// +// Argument reference. +// CHECK: %[[NNSArg_ref:[a-zA-Z0-9_.]+]] = addrspacecast %struct.__generated_B.0* %_arg_NNSObj to %struct.__generated_B.0 addrspace(4)* +// +// Initialization. +// CHECK: %[[NNSGEP:[a-zA-Z0-9_.]+]] = getelementptr inbounds %class.anon.2, %class.anon.2 addrspace(4)* %[[NNSK_as_cast]], i32 0, i32 0 +// CHECK: %[[NNSArgBC:[a-zA-Z0-9_.]+]] = bitcast %struct.__generated_B.0 addrspace(4)* %[[NNSArg_ref]] to %struct.B addrspace(4)* +// CHECK: %[[NNSGEPBC:[a-zA-Z0-9_.]+]] = bitcast %struct.B addrspace(4)* %[[NNSGEP]] to i8 addrspace(4)* +// CHECK: %[[NNSArgBC2:[a-zA-Z0-9_.]+]] = bitcast %struct.B addrspace(4)* %[[NNSArgBC]] to i8 addrspace(4)* +// CHECK: call void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* align 8 %[[NNSGEPBC]], i8 addrspace(4)* align 8 %[[NNSArgBC2]], i64 16, i1 false) +// +// Kernel body call. +// CHECK: call spir_func void @_ZZZ4mainENKUlRN4sycl3_V17handlerEE0_clES2_ENKUlvE_clEv(%class.anon.2 addrspace(4)* noundef align 8 dereferenceable_or_null(16) %[[NNSK_as_cast]]) diff --git a/clang/test/SemaSYCL/built-in-type-kernel-arg.cpp b/clang/test/SemaSYCL/built-in-type-kernel-arg.cpp index d1df3c4467efe..aa603877a7b88 100644 --- a/clang/test/SemaSYCL/built-in-type-kernel-arg.cpp +++ b/clang/test/SemaSYCL/built-in-type-kernel-arg.cpp @@ -23,6 +23,15 @@ void test(const int some_const) { }); } +struct test_struct_simple { + int data; + int *ptr; +}; + +struct Nested { +typedef test_struct_simple TDS; +}; + int main() { int data = 5; int* data_addr = &data; @@ -54,6 +63,15 @@ int main() { }); }); + Nested::TDS tds; + deviceQueue.submit([&](sycl::handler &h) { + h.single_task( + [=]() { + test_struct_simple k_s; + k_s = tds; + }); + }); + const int some_const = 10; test(some_const); return 0; @@ -162,3 +180,15 @@ int main() { // CHECK-NEXT: ImplicitCastExpr {{.*}} 'int *' // CHECK-NEXT: ImplicitCastExpr {{.*}} '__global int *' // CHECK-NEXT: DeclRefExpr {{.*}} '__global int *' lvalue ParmVar {{.*}} '_arg_ptr_array' '__global int *' + +// CHECK: FunctionDecl {{.*}}kernel_nns 'void (__generated_test_struct_simple)' +// CHECK-NEXT: ParmVarDecl {{.*}} used _arg_tds '__generated_test_struct_simple' + +// CHECK: VarDecl {{.*}} used __SYCLKernel +// CHECK: InitListExpr +// CHECK: CXXConstructExpr {{.*}} 'Nested::TDS':'test_struct_simple' 'void (const test_struct_simple &) noexcept' +// CHECK: ImplicitCastExpr {{.*}} 'const test_struct_simple' lvalue +// CHECK: UnaryOperator {{.*}} 'Nested::TDS':'test_struct_simple' lvalue prefix '*' cannot overflow +// CHECK: CXXReinterpretCastExpr {{.*}} 'Nested::TDS *' reinterpret_cast +// CHECK: UnaryOperator {{.*}} '__generated_test_struct_simple *' prefix '&' cannot overflow +// CHECK: DeclRefExpr {{.*}} '__generated_test_struct_simple' lvalue ParmVar {{.*}} '_arg_tds' '__generated_test_struct_simple' diff --git a/clang/test/SemaSYCL/decomposition.cpp b/clang/test/SemaSYCL/decomposition.cpp index 808f9bcb79a8b..ad3ba635d9e37 100644 --- a/clang/test/SemaSYCL/decomposition.cpp +++ b/clang/test/SemaSYCL/decomposition.cpp @@ -53,6 +53,10 @@ struct StructWithPtr { int i; }; +struct Nested { +typedef StructWithPtr TDStrWithPTR; +}; + struct NonTrivialType { int *Ptr; int i; @@ -179,6 +183,12 @@ int main() { }); // CHECK: FunctionDecl {{.*}}Pointer{{.*}} 'void (__generated_StructWithPtr)' + Nested::TDStrWithPTR TDStructWithPtr; + myQueue.submit([&](sycl::handler &h) { + h.single_task([=]() { return TDStructWithPtr.i; }); + }); + // CHECK: FunctionDecl {{.*}}TDStr{{.*}} 'void (__generated_StructWithPtr)' + // FIXME: Stop decomposition of arrays with pointers StructWithArray t1; myQueue.submit([&](sycl::handler &h) {