Skip to content

[SYCL] Redesign pointer handling for OpenCL kernel generation #6728

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 19 commits into from
Sep 27, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions clang/include/clang/Basic/Attr.td
Original file line number Diff line number Diff line change
Expand Up @@ -1429,6 +1429,14 @@ def SYCLRequiresDecomposition : InheritableAttr {
let Documentation = [InternalOnly];
}

def SYCLGenerateNewType : InheritableAttr {
// No spellings, as this is for internal use.
let Spellings = [];
let Subjects = SubjectList<[Named]>;
let LangOpts = [SYCLIsDevice, SYCLIsHost];
let Documentation = [InternalOnly];
}

def SYCLIntelKernelArgsRestrict : InheritableAttr {
let Spellings = [CXX11<"intel", "kernel_args_restrict">];
let Subjects = SubjectList<[Function], ErrorDiag>;
Expand Down
455 changes: 414 additions & 41 deletions clang/lib/Sema/SemaSYCL.cpp

Large diffs are not rendered by default.

45 changes: 26 additions & 19 deletions clang/test/CodeGenSYCL/inheritance.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,31 +39,38 @@ int main() {
return 0;
}

// CHECK: %struct.base = type { i32, %class.InnerField }
// CHECK: %class.InnerField = type { %class.InnerFieldBase, i32 }
// CHECK: %class.InnerFieldBase = type { i32 }
// CHECK: %class.__generated_second_base = type { ptr addrspace(1) }
// CHECK: %struct.derived = type <{ %struct.base, [4 x i8], %class.second_base, i32, [4 x i8] }>
// CHECK: %class.second_base = type { ptr addrspace(4) }

// Check kernel paramters
// CHECK: define {{.*}}spir_kernel void @{{.*}}derived(ptr noundef byval(%struct.base) align 4 %_arg__base, ptr noundef byval(%struct.__wrapper_class) align 8 %_arg_e, i32 noundef %_arg_a)
// CHECK: define {{.*}}spir_kernel void @{{.*}}derived
// CHECK-SAME: ptr noundef byval(%struct.base) align 4 %_arg__base
// CHECK-SAME: ptr noundef byval(%class.__generated_second_base) align 8 %_arg__base1
// CHECK-SAME: i32 noundef %_arg_a

// Check alloca for kernel paramters
// CHECK: %[[ARG_AA:[a-zA-Z0-9_.]+]] = alloca i32, align 4
// Check alloca for local functor object
// CHECK: %[[LOCAL_OBJECT:[a-zA-Z0-9_.]+]] = alloca %struct.derived, align 8
// CHECK: %[[ARG_A:[a-zA-Z0-9_.]+]] = addrspacecast ptr %[[ARG_AA]] to ptr addrspace(4)
// CHECK: %[[BASE_TO_PTR:[a-zA-Z0-9_.]+]] = addrspacecast ptr %[[LOCAL_OBJECT]] to ptr addrspace(4)
// Check allocas for kernel parameters and local functor object
// CHECK: %[[ARG_A_ALLOCA:[a-zA-Z0-9_.]+]] = alloca i32, align 4
// CHECK: %[[LOCAL_OBJECT_ALLOCA:[a-zA-Z0-9_.]+]] = alloca %struct.derived, align 8
// CHECK: %[[ARG_A:[a-zA-Z0-9_.]+]] = addrspacecast ptr %[[ARG_A_ALLOCA]] to ptr addrspace(4)
// CHECK: %[[LOCAL_OBJECT:[a-zA-Z0-9_.]+]] = addrspacecast ptr %[[LOCAL_OBJECT_ALLOCA]] to ptr addrspace(4)
// CHECK: %[[ARG_BASE:[a-zA-Z0-9_.]+]] = addrspacecast ptr %_arg__base to ptr addrspace(4)
// CHECK: %[[ARG_BASE1:[a-zA-Z0-9_.]+]] = addrspacecast ptr %_arg__base1 to ptr addrspace(4)
// CHECK: store i32 %_arg_a, ptr addrspace(4) %[[ARG_A]], align 4

// Initialize 'base' subobject
// CHECK: call void @llvm.memcpy.p4.p4.i64(ptr addrspace(4) align 8 %[[BASE_TO_PTR]], ptr addrspace(4) align 4 %_arg__base.ascast, i64 12, i1 false)

// Initialize 'second_base' subobject
// First, derived-to-base cast with offset:
// CHECK: %[[OFFSET_CALC:.*]] = getelementptr inbounds i8, ptr addrspace(4) %[[LOCAL_OBJECT]].ascast, i64 16
// Initialize 'second_base::e'
// CHECK: %[[SECOND_BASE_PTR:.*]] = getelementptr inbounds %class.second_base, ptr addrspace(4) %[[OFFSET_CALC]], i32 0, i32 0
// CHECK: %[[PTR_TO_WRAPPER:.*]] = getelementptr inbounds %struct.__wrapper_class, ptr addrspace(4) %_arg_e.ascast, i32 0, i32 0
// CHECK: %[[LOAD_PTR:.*]] = load ptr addrspace(1), ptr addrspace(4) %[[PTR_TO_WRAPPER]]
// CHECK: %[[AS_CAST:.*]] = addrspacecast ptr addrspace(1) %[[LOAD_PTR]] to ptr addrspace(4)
// CHECK: store ptr addrspace(4) %[[AS_CAST]], ptr addrspace(4) %[[SECOND_BASE_PTR]]
// CHECK: call void @llvm.memcpy.p4.p4.i64(ptr addrspace(4) align 8 %[[LOCAL_OBJECT]], ptr addrspace(4) align 4 %[[ARG_BASE]], i64 12, i1 false)

// Initialize field 'a'
// CHECK: %[[GEP_A:[a-zA-Z0-9]+]] = getelementptr inbounds %struct.derived, ptr addrspace(4) %[[LOCAL_OBJECT]].ascast, i32 0, i32 3
// CHECK: %[[GEP_A:[a-zA-Z0-9]+]] = getelementptr inbounds %struct.derived, ptr addrspace(4) %[[LOCAL_OBJECT]], i32 0, i32 3
// CHECK: %[[LOAD_A:[0-9]+]] = load i32, ptr addrspace(4) %[[ARG_A]], align 4
// CHECK: store i32 %[[LOAD_A]], ptr addrspace(4) %[[GEP_A]]

// Initialize 'second_base' subobject
// First, derived-to-base cast with offset:
// CHECK: %[[OFFSET_CALC:.*]] = getelementptr inbounds i8, ptr addrspace(4) %[[LOCAL_OBJECT]], i64 16
// Initialize 'second_base'
// CHECK: call void @llvm.memcpy.p4.p4.i64(ptr addrspace(4) align 8 %[[OFFSET_CALC]], ptr addrspace(4) align 8 %[[ARG_BASE1]], i64 8, i1 false)
48 changes: 29 additions & 19 deletions clang/test/CodeGenSYCL/no_opaque_inheritance.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,35 +39,45 @@ int main() {
return 0;
}

// CHECK: %struct.base = type { i32, %class.InnerField }
// CHECK: %class.InnerField = type { %class.InnerFieldBase, i32 }
// CHECK: %class.InnerFieldBase = type { i32 }
// CHECK: %class.__generated_second_base = type { i32 addrspace(1)* }
// CHECK: %struct.derived = type <{ %struct.base, [4 x i8], %class.second_base, i32, [4 x i8] }>
// CHECK: %class.second_base = type { i32 addrspace(4)* }

// Check kernel paramters
// CHECK: define {{.*}}spir_kernel void @{{.*}}derived(%struct.base* noundef byval(%struct.base) align 4 %_arg__base, %struct.__wrapper_class* noundef byval(%struct.__wrapper_class) align 8 %_arg_e, i32 noundef %_arg_a)
// CHECK: define {{.*}}spir_kernel void @{{.*}}derived
// CHECK-SAME: %struct.base* noundef byval(%struct.base) align 4 %_arg__base
// CHECK-SAME: %class.__generated_second_base* noundef byval(%class.__generated_second_base) align 8 %_arg__base1
// CHECK-SAME: i32 noundef %_arg_a

// Check alloca for kernel paramters
// CHECK: %[[ARG_AA:[a-zA-Z0-9_.]+]] = alloca i32, align 4
// Check alloca for local functor object
// CHECK: %[[LOCAL_OBJECT:[a-zA-Z0-9_.]+]] = alloca %struct.derived, align 8
// CHECK: %[[ARG_A:[a-zA-Z0-9_.]+]] = addrspacecast i32* %[[ARG_AA]] to i32 addrspace(4)*
// Check allocas for kernel parameters and local functor object
// CHECK: %[[ARG_A_ALLOCA:[a-zA-Z0-9_.]+]] = alloca i32, align 4
// CHECK: %[[LOCAL_OBJECT_ALLOCA:[a-zA-Z0-9_.]+]] = alloca %struct.derived, align 8
// CHECK: %[[ARG_A:[a-zA-Z0-9_.]+]] = addrspacecast i32* %[[ARG_A_ALLOCA]] to i32 addrspace(4)*
// CHECK: %[[LOCAL_OBJECT:[a-zA-Z0-9_.]+]] = addrspacecast %struct.derived* %[[LOCAL_OBJECT_ALLOCA]] to %struct.derived addrspace(4)*
// CHECK: %[[ARG_BASE:[a-zA-Z0-9_.]+]] = addrspacecast %struct.base* %_arg__base to %struct.base addrspace(4)*
// CHECK: %[[ARG_BASE1:[a-zA-Z0-9_.]+]] = addrspacecast %class.__generated_second_base* %_arg__base1 to %class.__generated_second_base addrspace(4)*
// CHECK: store i32 %_arg_a, i32 addrspace(4)* %[[ARG_A]], align 4

// Initialize 'base' subobject
// CHECK: %[[DERIVED_TO_BASE:.*]] = bitcast %struct.derived addrspace(4)* %[[LOCAL_OBJECT]].ascast to %struct.base addrspace(4)*
// CHECK: %[[DERIVED_TO_BASE:.*]] = bitcast %struct.derived addrspace(4)* %[[LOCAL_OBJECT]] to %struct.base addrspace(4)*
// CHECK: %[[BASE_TO_PTR:.*]] = bitcast %struct.base addrspace(4)* %[[DERIVED_TO_BASE]] to i8 addrspace(4)*
// CHECK: %[[PARAM_TO_PTR:.*]] = bitcast %struct.base addrspace(4)* %_arg__base.ascast to i8 addrspace(4)*
// CHECK: %[[PARAM_TO_PTR:.*]] = bitcast %struct.base addrspace(4)* %[[ARG_BASE]] to i8 addrspace(4)*
// CHECK: call void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* align 8 %[[BASE_TO_PTR]], i8 addrspace(4)* align 4 %[[PARAM_TO_PTR]], i64 12, i1 false)

// Initialize field 'a'
// CHECK: %[[GEP_A:[a-zA-Z0-9]+]] = getelementptr inbounds %struct.derived, %struct.derived addrspace(4)* %[[LOCAL_OBJECT]], i32 0, i32 3
// CHECK: %[[LOAD_A:[0-9]+]] = load i32, i32 addrspace(4)* %[[ARG_A]], align 4
// CHECK: store i32 %[[LOAD_A]], i32 addrspace(4)* %[[GEP_A]]

// Initialize 'second_base' subobject
// First, derived-to-base cast with offset:
// CHECK: %[[DERIVED_PTR:.*]] = bitcast %struct.derived addrspace(4)* %[[LOCAL_OBJECT]].ascast to i8 addrspace(4)*
// CHECK: %[[DERIVED_PTR:.*]] = bitcast %struct.derived addrspace(4)* %[[LOCAL_OBJECT]] to i8 addrspace(4)*
// CHECK: %[[OFFSET_CALC:.*]] = getelementptr inbounds i8, i8 addrspace(4)* %[[DERIVED_PTR]], i64 16
// CHECK: %[[TO_SECOND_BASE:.*]] = bitcast i8 addrspace(4)* %[[OFFSET_CALC]] to %class.second_base addrspace(4)*
// Initialize 'second_base::e'
// CHECK: %[[SECOND_BASE_PTR:.*]] = getelementptr inbounds %class.second_base, %class.second_base addrspace(4)* %[[TO_SECOND_BASE]], i32 0, i32 0
// CHECK: %[[PTR_TO_WRAPPER:.*]] = getelementptr inbounds %struct.__wrapper_class, %struct.__wrapper_class addrspace(4)* %_arg_e.ascast, i32 0, i32 0
// CHECK: %[[LOAD_PTR:.*]] = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(4)* %[[PTR_TO_WRAPPER]]
// CHECK: %[[AS_CAST:.*]] = addrspacecast i32 addrspace(1)* %[[LOAD_PTR]] to i32 addrspace(4)*
// CHECK: store i32 addrspace(4)* %[[AS_CAST]], i32 addrspace(4)* addrspace(4)* %[[SECOND_BASE_PTR]]
// CHECK: %[[SECOND_BASE_TO_PTR:.*]] = bitcast %class.second_base addrspace(4)* %[[TO_SECOND_BASE]] to i8 addrspace(4)*
// CHECK: %[[SECOND_PARAM_TO_PTR:.*]] = bitcast %class.__generated_second_base addrspace(4)* %[[ARG_BASE1]] to i8 addrspace(4)*
// CHECK: call void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* align 8 %[[SECOND_BASE_TO_PTR]], i8 addrspace(4)* align 8 %[[SECOND_PARAM_TO_PTR]], i64 8, i1 false)

// Initialize field 'a'
// CHECK: %[[GEP_A:[a-zA-Z0-9]+]] = getelementptr inbounds %struct.derived, %struct.derived addrspace(4)* %[[LOCAL_OBJECT]].ascast, i32 0, i32 3
// CHECK: %[[LOAD_A:[0-9]+]] = load i32, i32 addrspace(4)* %[[ARG_A]], align 4
// CHECK: store i32 %[[LOAD_A]], i32 addrspace(4)* %[[GEP_A]]
4 changes: 2 additions & 2 deletions clang/test/CodeGenSYCL/no_opaque_pointers-in-structs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,14 +35,14 @@ int main() {

// CHECK: %[[WRAPPER_F1:[a-zA-Z0-9_.]+]] = type { i32 addrspace(1)* }
// CHECK: %[[WRAPPER_F2:[a-zA-Z0-9_.]+]] = type { float addrspace(1)* }
// CHECK: %[[WRAPPER_F:[a-zA-Z0-9_.]+]] = type { float addrspace(1)* }
// CHECK: %[[GENERATED_A:[a-zA-Z0-9_.]+]] = type { float addrspace(1)* }
// CHECK: %[[WRAPPER_F4_1:[a-zA-Z0-9_.]+]] = type { i32 addrspace(1)* }
// CHECK: %[[WRAPPER_F4_2:[a-zA-Z0-9_.]+]] = type { i32 addrspace(1)* }
// CHECK: %[[WRAPPER_LAMBDA_PTR:[a-zA-Z0-9_.]+]] = type { float addrspace(1)* }
// CHECK: define {{.*}}spir_kernel void @{{.*}}structs
// CHECK-SAME: %[[WRAPPER_F1]]* noundef byval(%[[WRAPPER_F1]]) align 8 %_arg_F1,
// CHECK-SAME: %[[WRAPPER_F2]]* noundef byval(%[[WRAPPER_F2]]) align 8 %_arg_F2,
// CHECK-SAME: %[[WRAPPER_F]]* noundef byval(%[[WRAPPER_F]]) align 8 %_arg_F,
// CHECK-SAME: %[[GENERATED_A]]* noundef byval(%[[GENERATED_A]]) align 8 %_arg_F3,
// CHECK-SAME: %[[WRAPPER_F4_1]]* noundef byval(%[[WRAPPER_F4_1]]) align 8 %_arg_F4
// CHECK-SAME: %[[WRAPPER_F4_2]]* noundef byval(%[[WRAPPER_F4_2]]) align 8 %_arg_F41
// CHECK: define {{.*}}spir_kernel void @{{.*}}lambdas{{.*}}(%[[WRAPPER_LAMBDA_PTR]]* noundef byval(%[[WRAPPER_LAMBDA_PTR]]) align 8 %_arg_Ptr)
4 changes: 2 additions & 2 deletions clang/test/CodeGenSYCL/pointers-in-structs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,14 +35,14 @@ int main() {

// CHECK: %[[WRAPPER_F1:[a-zA-Z0-9_.]+]] = type { ptr addrspace(1) }
// CHECK: %[[WRAPPER_F2:[a-zA-Z0-9_.]+]] = type { ptr addrspace(1) }
// CHECK: %[[WRAPPER_F:[a-zA-Z0-9_.]+]] = type { ptr addrspace(1) }
// CHECK: %[[GENERATED_A:[a-zA-Z0-9_.]+]] = type { ptr addrspace(1) }
// CHECK: %[[WRAPPER_F4_1:[a-zA-Z0-9_.]+]] = type { ptr addrspace(1) }
// CHECK: %[[WRAPPER_F4_2:[a-zA-Z0-9_.]+]] = type { ptr addrspace(1) }
// CHECK: %[[WRAPPER_LAMBDA_PTR:[a-zA-Z0-9_.]+]] = type { ptr addrspace(1) }
// CHECK: define {{.*}}spir_kernel void @{{.*}}structs
// CHECK-SAME: ptr noundef byval(%[[WRAPPER_F1]]) align 8 %_arg_F1,
// CHECK-SAME: ptr noundef byval(%[[WRAPPER_F2]]) align 8 %_arg_F2,
// CHECK-SAME: ptr noundef byval(%[[WRAPPER_F]]) align 8 %_arg_F,
// CHECK-SAME: ptr noundef byval(%[[GENERATED_A]]) align 8 %_arg_F3,
// CHECK-SAME: ptr noundef byval(%[[WRAPPER_F4_1]]) align 8 %_arg_F4
// CHECK-SAME: ptr noundef byval(%[[WRAPPER_F4_2]]) align 8 %_arg_F41
// CHECK: define {{.*}}spir_kernel void @{{.*}}lambdas{{.*}}(ptr noundef byval(%[[WRAPPER_LAMBDA_PTR]]) align 8 %_arg_Ptr)
8 changes: 2 additions & 6 deletions clang/test/CodeGenSYCL/struct_kernel_param.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,8 @@
// CHECK-NEXT: { kernel_param_kind_t::kind_accessor, 4062, 0 },
// FldInt, offset to 16 because the float* causes the alignment of the structs
// to change.
// CHECK-NEXT: { kernel_param_kind_t::kind_std_layout, 4, 16 },
// FldArr
// CHECK-NEXT: { kernel_param_kind_t::kind_std_layout, 4, 24 },
// FldFloat
// CHECK-NEXT: { kernel_param_kind_t::kind_std_layout, 8, 32 },
// CHECK-NEXT: { kernel_param_kind_t::kind_std_layout, 12, 40 },
// MyStruct is not decomposed since it does not contain special types.
// CHECK-NEXT: { kernel_param_kind_t::kind_std_layout, 40, 16 },
// CHECK-EMPTY:
// CHECK-NEXT: { kernel_param_kind_t::kind_invalid, -987654321, -987654321 },
// CHECK-NEXT:};
Expand Down
Loading