Skip to content

Commit

Permalink
[SYCL] Redesign pointer handling for OpenCL kernel generation (#6728)
Browse files Browse the repository at this point in the history
Requirement - Do not decompose types with pointers when generating
OpenCL kernel arguments.

This PR adds logic to stop decomposing trivial types containing
pointers. For every SYCL kernel argument which is a record type
containing a pointer (or has a field or a base class with a pointer), we
generate a new record type with all pointers in __global address space.
This compiler generated type is the openCL kernel argument. In the
kernel body, we initialize the local clone via memcpy.

Limitations:

1. Array of pointers or array of types with pointers are still
decomposed to it's elements.
2. Due to current implementation restrictions, types which are not
default constructible, continue to trigger decomposition if they contain
pointers.

Both limitations above will hopefully be fixed in follow-up PRs.

Signed-off-by: Elizabeth Andrews <elizabeth.andrews@intel.com>
  • Loading branch information
elizabethandrews committed Sep 27, 2022
1 parent 60c634c commit 3916d3b
Show file tree
Hide file tree
Showing 12 changed files with 928 additions and 154 deletions.
8 changes: 8 additions & 0 deletions clang/include/clang/Basic/Attr.td
Original file line number Diff line number Diff line change
Expand Up @@ -1429,6 +1429,14 @@ def SYCLRequiresDecomposition : InheritableAttr {
let Documentation = [InternalOnly];
}

def SYCLGenerateNewType : InheritableAttr {
// No spellings, as this is for internal use.
let Spellings = [];
let Subjects = SubjectList<[Named]>;
let LangOpts = [SYCLIsDevice, SYCLIsHost];
let Documentation = [InternalOnly];
}

def SYCLIntelKernelArgsRestrict : InheritableAttr {
let Spellings = [CXX11<"intel", "kernel_args_restrict">];
let Subjects = SubjectList<[Function], ErrorDiag>;
Expand Down
455 changes: 414 additions & 41 deletions clang/lib/Sema/SemaSYCL.cpp

Large diffs are not rendered by default.

45 changes: 26 additions & 19 deletions clang/test/CodeGenSYCL/inheritance.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,31 +39,38 @@ int main() {
return 0;
}

// CHECK: %struct.base = type { i32, %class.InnerField }
// CHECK: %class.InnerField = type { %class.InnerFieldBase, i32 }
// CHECK: %class.InnerFieldBase = type { i32 }
// CHECK: %class.__generated_second_base = type { ptr addrspace(1) }
// CHECK: %struct.derived = type <{ %struct.base, [4 x i8], %class.second_base, i32, [4 x i8] }>
// CHECK: %class.second_base = type { ptr addrspace(4) }

// Check kernel paramters
// CHECK: define {{.*}}spir_kernel void @{{.*}}derived(ptr noundef byval(%struct.base) align 4 %_arg__base, ptr noundef byval(%struct.__wrapper_class) align 8 %_arg_e, i32 noundef %_arg_a)
// CHECK: define {{.*}}spir_kernel void @{{.*}}derived
// CHECK-SAME: ptr noundef byval(%struct.base) align 4 %_arg__base
// CHECK-SAME: ptr noundef byval(%class.__generated_second_base) align 8 %_arg__base1
// CHECK-SAME: i32 noundef %_arg_a

// Check alloca for kernel paramters
// CHECK: %[[ARG_AA:[a-zA-Z0-9_.]+]] = alloca i32, align 4
// Check alloca for local functor object
// CHECK: %[[LOCAL_OBJECT:[a-zA-Z0-9_.]+]] = alloca %struct.derived, align 8
// CHECK: %[[ARG_A:[a-zA-Z0-9_.]+]] = addrspacecast ptr %[[ARG_AA]] to ptr addrspace(4)
// CHECK: %[[BASE_TO_PTR:[a-zA-Z0-9_.]+]] = addrspacecast ptr %[[LOCAL_OBJECT]] to ptr addrspace(4)
// Check allocas for kernel parameters and local functor object
// CHECK: %[[ARG_A_ALLOCA:[a-zA-Z0-9_.]+]] = alloca i32, align 4
// CHECK: %[[LOCAL_OBJECT_ALLOCA:[a-zA-Z0-9_.]+]] = alloca %struct.derived, align 8
// CHECK: %[[ARG_A:[a-zA-Z0-9_.]+]] = addrspacecast ptr %[[ARG_A_ALLOCA]] to ptr addrspace(4)
// CHECK: %[[LOCAL_OBJECT:[a-zA-Z0-9_.]+]] = addrspacecast ptr %[[LOCAL_OBJECT_ALLOCA]] to ptr addrspace(4)
// CHECK: %[[ARG_BASE:[a-zA-Z0-9_.]+]] = addrspacecast ptr %_arg__base to ptr addrspace(4)
// CHECK: %[[ARG_BASE1:[a-zA-Z0-9_.]+]] = addrspacecast ptr %_arg__base1 to ptr addrspace(4)
// CHECK: store i32 %_arg_a, ptr addrspace(4) %[[ARG_A]], align 4

// Initialize 'base' subobject
// CHECK: call void @llvm.memcpy.p4.p4.i64(ptr addrspace(4) align 8 %[[BASE_TO_PTR]], ptr addrspace(4) align 4 %_arg__base.ascast, i64 12, i1 false)

// Initialize 'second_base' subobject
// First, derived-to-base cast with offset:
// CHECK: %[[OFFSET_CALC:.*]] = getelementptr inbounds i8, ptr addrspace(4) %[[LOCAL_OBJECT]].ascast, i64 16
// Initialize 'second_base::e'
// CHECK: %[[SECOND_BASE_PTR:.*]] = getelementptr inbounds %class.second_base, ptr addrspace(4) %[[OFFSET_CALC]], i32 0, i32 0
// CHECK: %[[PTR_TO_WRAPPER:.*]] = getelementptr inbounds %struct.__wrapper_class, ptr addrspace(4) %_arg_e.ascast, i32 0, i32 0
// CHECK: %[[LOAD_PTR:.*]] = load ptr addrspace(1), ptr addrspace(4) %[[PTR_TO_WRAPPER]]
// CHECK: %[[AS_CAST:.*]] = addrspacecast ptr addrspace(1) %[[LOAD_PTR]] to ptr addrspace(4)
// CHECK: store ptr addrspace(4) %[[AS_CAST]], ptr addrspace(4) %[[SECOND_BASE_PTR]]
// CHECK: call void @llvm.memcpy.p4.p4.i64(ptr addrspace(4) align 8 %[[LOCAL_OBJECT]], ptr addrspace(4) align 4 %[[ARG_BASE]], i64 12, i1 false)

// Initialize field 'a'
// CHECK: %[[GEP_A:[a-zA-Z0-9]+]] = getelementptr inbounds %struct.derived, ptr addrspace(4) %[[LOCAL_OBJECT]].ascast, i32 0, i32 3
// CHECK: %[[GEP_A:[a-zA-Z0-9]+]] = getelementptr inbounds %struct.derived, ptr addrspace(4) %[[LOCAL_OBJECT]], i32 0, i32 3
// CHECK: %[[LOAD_A:[0-9]+]] = load i32, ptr addrspace(4) %[[ARG_A]], align 4
// CHECK: store i32 %[[LOAD_A]], ptr addrspace(4) %[[GEP_A]]

// Initialize 'second_base' subobject
// First, derived-to-base cast with offset:
// CHECK: %[[OFFSET_CALC:.*]] = getelementptr inbounds i8, ptr addrspace(4) %[[LOCAL_OBJECT]], i64 16
// Initialize 'second_base'
// CHECK: call void @llvm.memcpy.p4.p4.i64(ptr addrspace(4) align 8 %[[OFFSET_CALC]], ptr addrspace(4) align 8 %[[ARG_BASE1]], i64 8, i1 false)
48 changes: 29 additions & 19 deletions clang/test/CodeGenSYCL/no_opaque_inheritance.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,35 +39,45 @@ int main() {
return 0;
}

// CHECK: %struct.base = type { i32, %class.InnerField }
// CHECK: %class.InnerField = type { %class.InnerFieldBase, i32 }
// CHECK: %class.InnerFieldBase = type { i32 }
// CHECK: %class.__generated_second_base = type { i32 addrspace(1)* }
// CHECK: %struct.derived = type <{ %struct.base, [4 x i8], %class.second_base, i32, [4 x i8] }>
// CHECK: %class.second_base = type { i32 addrspace(4)* }

// Check kernel paramters
// CHECK: define {{.*}}spir_kernel void @{{.*}}derived(%struct.base* noundef byval(%struct.base) align 4 %_arg__base, %struct.__wrapper_class* noundef byval(%struct.__wrapper_class) align 8 %_arg_e, i32 noundef %_arg_a)
// CHECK: define {{.*}}spir_kernel void @{{.*}}derived
// CHECK-SAME: %struct.base* noundef byval(%struct.base) align 4 %_arg__base
// CHECK-SAME: %class.__generated_second_base* noundef byval(%class.__generated_second_base) align 8 %_arg__base1
// CHECK-SAME: i32 noundef %_arg_a

// Check alloca for kernel paramters
// CHECK: %[[ARG_AA:[a-zA-Z0-9_.]+]] = alloca i32, align 4
// Check alloca for local functor object
// CHECK: %[[LOCAL_OBJECT:[a-zA-Z0-9_.]+]] = alloca %struct.derived, align 8
// CHECK: %[[ARG_A:[a-zA-Z0-9_.]+]] = addrspacecast i32* %[[ARG_AA]] to i32 addrspace(4)*
// Check allocas for kernel parameters and local functor object
// CHECK: %[[ARG_A_ALLOCA:[a-zA-Z0-9_.]+]] = alloca i32, align 4
// CHECK: %[[LOCAL_OBJECT_ALLOCA:[a-zA-Z0-9_.]+]] = alloca %struct.derived, align 8
// CHECK: %[[ARG_A:[a-zA-Z0-9_.]+]] = addrspacecast i32* %[[ARG_A_ALLOCA]] to i32 addrspace(4)*
// CHECK: %[[LOCAL_OBJECT:[a-zA-Z0-9_.]+]] = addrspacecast %struct.derived* %[[LOCAL_OBJECT_ALLOCA]] to %struct.derived addrspace(4)*
// CHECK: %[[ARG_BASE:[a-zA-Z0-9_.]+]] = addrspacecast %struct.base* %_arg__base to %struct.base addrspace(4)*
// CHECK: %[[ARG_BASE1:[a-zA-Z0-9_.]+]] = addrspacecast %class.__generated_second_base* %_arg__base1 to %class.__generated_second_base addrspace(4)*
// CHECK: store i32 %_arg_a, i32 addrspace(4)* %[[ARG_A]], align 4

// Initialize 'base' subobject
// CHECK: %[[DERIVED_TO_BASE:.*]] = bitcast %struct.derived addrspace(4)* %[[LOCAL_OBJECT]].ascast to %struct.base addrspace(4)*
// CHECK: %[[DERIVED_TO_BASE:.*]] = bitcast %struct.derived addrspace(4)* %[[LOCAL_OBJECT]] to %struct.base addrspace(4)*
// CHECK: %[[BASE_TO_PTR:.*]] = bitcast %struct.base addrspace(4)* %[[DERIVED_TO_BASE]] to i8 addrspace(4)*
// CHECK: %[[PARAM_TO_PTR:.*]] = bitcast %struct.base addrspace(4)* %_arg__base.ascast to i8 addrspace(4)*
// CHECK: %[[PARAM_TO_PTR:.*]] = bitcast %struct.base addrspace(4)* %[[ARG_BASE]] to i8 addrspace(4)*
// CHECK: call void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* align 8 %[[BASE_TO_PTR]], i8 addrspace(4)* align 4 %[[PARAM_TO_PTR]], i64 12, i1 false)

// Initialize field 'a'
// CHECK: %[[GEP_A:[a-zA-Z0-9]+]] = getelementptr inbounds %struct.derived, %struct.derived addrspace(4)* %[[LOCAL_OBJECT]], i32 0, i32 3
// CHECK: %[[LOAD_A:[0-9]+]] = load i32, i32 addrspace(4)* %[[ARG_A]], align 4
// CHECK: store i32 %[[LOAD_A]], i32 addrspace(4)* %[[GEP_A]]

// Initialize 'second_base' subobject
// First, derived-to-base cast with offset:
// CHECK: %[[DERIVED_PTR:.*]] = bitcast %struct.derived addrspace(4)* %[[LOCAL_OBJECT]].ascast to i8 addrspace(4)*
// CHECK: %[[DERIVED_PTR:.*]] = bitcast %struct.derived addrspace(4)* %[[LOCAL_OBJECT]] to i8 addrspace(4)*
// CHECK: %[[OFFSET_CALC:.*]] = getelementptr inbounds i8, i8 addrspace(4)* %[[DERIVED_PTR]], i64 16
// CHECK: %[[TO_SECOND_BASE:.*]] = bitcast i8 addrspace(4)* %[[OFFSET_CALC]] to %class.second_base addrspace(4)*
// Initialize 'second_base::e'
// CHECK: %[[SECOND_BASE_PTR:.*]] = getelementptr inbounds %class.second_base, %class.second_base addrspace(4)* %[[TO_SECOND_BASE]], i32 0, i32 0
// CHECK: %[[PTR_TO_WRAPPER:.*]] = getelementptr inbounds %struct.__wrapper_class, %struct.__wrapper_class addrspace(4)* %_arg_e.ascast, i32 0, i32 0
// CHECK: %[[LOAD_PTR:.*]] = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(4)* %[[PTR_TO_WRAPPER]]
// CHECK: %[[AS_CAST:.*]] = addrspacecast i32 addrspace(1)* %[[LOAD_PTR]] to i32 addrspace(4)*
// CHECK: store i32 addrspace(4)* %[[AS_CAST]], i32 addrspace(4)* addrspace(4)* %[[SECOND_BASE_PTR]]
// CHECK: %[[SECOND_BASE_TO_PTR:.*]] = bitcast %class.second_base addrspace(4)* %[[TO_SECOND_BASE]] to i8 addrspace(4)*
// CHECK: %[[SECOND_PARAM_TO_PTR:.*]] = bitcast %class.__generated_second_base addrspace(4)* %[[ARG_BASE1]] to i8 addrspace(4)*
// CHECK: call void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* align 8 %[[SECOND_BASE_TO_PTR]], i8 addrspace(4)* align 8 %[[SECOND_PARAM_TO_PTR]], i64 8, i1 false)

// Initialize field 'a'
// CHECK: %[[GEP_A:[a-zA-Z0-9]+]] = getelementptr inbounds %struct.derived, %struct.derived addrspace(4)* %[[LOCAL_OBJECT]].ascast, i32 0, i32 3
// CHECK: %[[LOAD_A:[0-9]+]] = load i32, i32 addrspace(4)* %[[ARG_A]], align 4
// CHECK: store i32 %[[LOAD_A]], i32 addrspace(4)* %[[GEP_A]]
4 changes: 2 additions & 2 deletions clang/test/CodeGenSYCL/no_opaque_pointers-in-structs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,14 +35,14 @@ int main() {

// CHECK: %[[WRAPPER_F1:[a-zA-Z0-9_.]+]] = type { i32 addrspace(1)* }
// CHECK: %[[WRAPPER_F2:[a-zA-Z0-9_.]+]] = type { float addrspace(1)* }
// CHECK: %[[WRAPPER_F:[a-zA-Z0-9_.]+]] = type { float addrspace(1)* }
// CHECK: %[[GENERATED_A:[a-zA-Z0-9_.]+]] = type { float addrspace(1)* }
// CHECK: %[[WRAPPER_F4_1:[a-zA-Z0-9_.]+]] = type { i32 addrspace(1)* }
// CHECK: %[[WRAPPER_F4_2:[a-zA-Z0-9_.]+]] = type { i32 addrspace(1)* }
// CHECK: %[[WRAPPER_LAMBDA_PTR:[a-zA-Z0-9_.]+]] = type { float addrspace(1)* }
// CHECK: define {{.*}}spir_kernel void @{{.*}}structs
// CHECK-SAME: %[[WRAPPER_F1]]* noundef byval(%[[WRAPPER_F1]]) align 8 %_arg_F1,
// CHECK-SAME: %[[WRAPPER_F2]]* noundef byval(%[[WRAPPER_F2]]) align 8 %_arg_F2,
// CHECK-SAME: %[[WRAPPER_F]]* noundef byval(%[[WRAPPER_F]]) align 8 %_arg_F,
// CHECK-SAME: %[[GENERATED_A]]* noundef byval(%[[GENERATED_A]]) align 8 %_arg_F3,
// CHECK-SAME: %[[WRAPPER_F4_1]]* noundef byval(%[[WRAPPER_F4_1]]) align 8 %_arg_F4
// CHECK-SAME: %[[WRAPPER_F4_2]]* noundef byval(%[[WRAPPER_F4_2]]) align 8 %_arg_F41
// CHECK: define {{.*}}spir_kernel void @{{.*}}lambdas{{.*}}(%[[WRAPPER_LAMBDA_PTR]]* noundef byval(%[[WRAPPER_LAMBDA_PTR]]) align 8 %_arg_Ptr)
4 changes: 2 additions & 2 deletions clang/test/CodeGenSYCL/pointers-in-structs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,14 +35,14 @@ int main() {

// CHECK: %[[WRAPPER_F1:[a-zA-Z0-9_.]+]] = type { ptr addrspace(1) }
// CHECK: %[[WRAPPER_F2:[a-zA-Z0-9_.]+]] = type { ptr addrspace(1) }
// CHECK: %[[WRAPPER_F:[a-zA-Z0-9_.]+]] = type { ptr addrspace(1) }
// CHECK: %[[GENERATED_A:[a-zA-Z0-9_.]+]] = type { ptr addrspace(1) }
// CHECK: %[[WRAPPER_F4_1:[a-zA-Z0-9_.]+]] = type { ptr addrspace(1) }
// CHECK: %[[WRAPPER_F4_2:[a-zA-Z0-9_.]+]] = type { ptr addrspace(1) }
// CHECK: %[[WRAPPER_LAMBDA_PTR:[a-zA-Z0-9_.]+]] = type { ptr addrspace(1) }
// CHECK: define {{.*}}spir_kernel void @{{.*}}structs
// CHECK-SAME: ptr noundef byval(%[[WRAPPER_F1]]) align 8 %_arg_F1,
// CHECK-SAME: ptr noundef byval(%[[WRAPPER_F2]]) align 8 %_arg_F2,
// CHECK-SAME: ptr noundef byval(%[[WRAPPER_F]]) align 8 %_arg_F,
// CHECK-SAME: ptr noundef byval(%[[GENERATED_A]]) align 8 %_arg_F3,
// CHECK-SAME: ptr noundef byval(%[[WRAPPER_F4_1]]) align 8 %_arg_F4
// CHECK-SAME: ptr noundef byval(%[[WRAPPER_F4_2]]) align 8 %_arg_F41
// CHECK: define {{.*}}spir_kernel void @{{.*}}lambdas{{.*}}(ptr noundef byval(%[[WRAPPER_LAMBDA_PTR]]) align 8 %_arg_Ptr)
8 changes: 2 additions & 6 deletions clang/test/CodeGenSYCL/struct_kernel_param.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,8 @@
// CHECK-NEXT: { kernel_param_kind_t::kind_accessor, 4062, 0 },
// FldInt, offset to 16 because the float* causes the alignment of the structs
// to change.
// CHECK-NEXT: { kernel_param_kind_t::kind_std_layout, 4, 16 },
// FldArr
// CHECK-NEXT: { kernel_param_kind_t::kind_std_layout, 4, 24 },
// FldFloat
// CHECK-NEXT: { kernel_param_kind_t::kind_std_layout, 8, 32 },
// CHECK-NEXT: { kernel_param_kind_t::kind_std_layout, 12, 40 },
// MyStruct is not decomposed since it does not contain special types.
// CHECK-NEXT: { kernel_param_kind_t::kind_std_layout, 40, 16 },
// CHECK-EMPTY:
// CHECK-NEXT: { kernel_param_kind_t::kind_invalid, -987654321, -987654321 },
// CHECK-NEXT:};
Expand Down
Loading

0 comments on commit 3916d3b

Please sign in to comment.