Skip to content

[SYCL][NFCI] Move default optimization level configuration #7885

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jan 4, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions clang/lib/Driver/ToolChains/Clang.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4994,6 +4994,10 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
CmdArgs.push_back("-Wno-sycl-strict");
}

// Set O2 optimization level by default
if (!Args.getLastArg(options::OPT_O_Group))
CmdArgs.push_back("-O2");

// Add the integration header option to generate the header.
StringRef Header(D.getIntegrationHeader(Input.getBaseInput()));
if (!Header.empty()) {
Expand Down
7 changes: 3 additions & 4 deletions clang/lib/Frontend/CompilerInvocation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -579,10 +579,9 @@ static bool FixupInvocation(CompilerInvocation &Invocation,
static unsigned getOptimizationLevel(ArgList &Args, InputKind IK,
DiagnosticsEngine &Diags) {
unsigned DefaultOpt = llvm::CodeGenOpt::None;
if (((IK.getLanguage() == Language::OpenCL ||
IK.getLanguage() == Language::OpenCLCXX) &&
!Args.hasArg(OPT_cl_opt_disable)) ||
Args.hasArg(OPT_fsycl_is_device))
if ((IK.getLanguage() == Language::OpenCL ||
IK.getLanguage() == Language::OpenCLCXX) &&
!Args.hasArg(OPT_cl_opt_disable))
DefaultOpt = llvm::CodeGenOpt::Default;

if (Arg *A = Args.getLastArg(options::OPT_O_Group)) {
Expand Down
12 changes: 6 additions & 6 deletions clang/test/CodeGenSYCL/address-space-cond-op.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,27 +5,27 @@ struct S {
unsigned short x;
};

// CHECK-LABEL: @_Z3foobR1SS_(
// CHECK-LABEL: define {{[^@]+}}@_Z3foobR1SS_(
// CHECK: entry:
// CHECK-NEXT: [[COND_ADDR:%.*]] = alloca i8, align 1
// CHECK-NEXT: [[LHS_ADDR:%.*]] = alloca ptr addrspace(4), align 8
// CHECK-NEXT: [[COND_ADDR_ASCAST:%.*]] = addrspacecast ptr [[COND_ADDR]] to ptr addrspace(4)
// CHECK-NEXT: [[LHS_ADDR_ASCAST:%.*]] = addrspacecast ptr [[LHS_ADDR]] to ptr addrspace(4)
// CHECK-NEXT: [[FROMBOOL:%.*]] = zext i1 [[COND:%.*]] to i8
// CHECK-NEXT: store i8 [[FROMBOOL]], ptr addrspace(4) [[COND_ADDR_ASCAST]], align 1, [[TBAA12:!tbaa !.*]]
// CHECK-NEXT: store ptr addrspace(4) [[LHS:%.*]], ptr addrspace(4) [[LHS_ADDR_ASCAST]], align 8, [[TBAA5:!tbaa !.*]]
// CHECK-NEXT: store i8 [[FROMBOOL]], ptr addrspace(4) [[COND_ADDR_ASCAST]], align 1
// CHECK-NEXT: store ptr addrspace(4) [[LHS:%.*]], ptr addrspace(4) [[LHS_ADDR_ASCAST]], align 8
// CHECK-NEXT: [[RHS_ASCAST:%.*]] = addrspacecast ptr [[RHS:%.*]] to ptr addrspace(4)
// CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr addrspace(4) [[COND_ADDR_ASCAST]], align 1, [[TBAA12]], [[RNG14:!range !.*]]
// CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr addrspace(4) [[COND_ADDR_ASCAST]], align 1
// CHECK-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP0]] to i1
// CHECK-NEXT: br i1 [[TOBOOL]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK: cond.true:
// CHECK-NEXT: [[TMP1:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[LHS_ADDR_ASCAST]], align 8, [[TBAA5]]
// CHECK-NEXT: [[TMP1:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[LHS_ADDR_ASCAST]], align 8
// CHECK-NEXT: br label [[COND_END:%.*]]
// CHECK: cond.false:
// CHECK-NEXT: br label [[COND_END]]
// CHECK: cond.end:
// CHECK-NEXT: [[COND_LVALUE:%.*]] = phi ptr addrspace(4) [ [[TMP1]], [[COND_TRUE]] ], [ [[RHS_ASCAST]], [[COND_FALSE]] ]
// CHECK-NEXT: call void @llvm.memcpy.p4.p4.i64(ptr addrspace(4) align 2 %agg.result, ptr addrspace(4) align 2 [[COND_LVALUE]], i64 2, i1 false), !tbaa.struct !{{[0-9]+}}
// CHECK-NEXT: call void @llvm.memcpy.p4.p4.i64(ptr addrspace(4) align 2 [[AGG_RESULT:%.*]], ptr addrspace(4) align 2 [[COND_LVALUE]], i64 2, i1 false)
// CHECK-NEXT: ret void
//
S foo(bool cond, S &lhs, S rhs) {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -internal-isystem %S/Inputs -triple spir64-unknown-unknown -disable-llvm-passes -sycl-std=2020 -opaque-pointers -emit-llvm -o - %s | FileCheck %s
// RUN: %clang_cc1 -O2 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -internal-isystem %S/Inputs -triple spir64-unknown-unknown -disable-llvm-passes -sycl-std=2020 -opaque-pointers -emit-llvm -o - %s | FileCheck %s

// Tests for IR of [[intel::scheduler_target_fmax_mhz()]], [[intel::num_simd_work_items()]],
// [[intel::no_global_work_offset()]], [[intel::max_global_work_dim()]], [[sycl::reqd_sub_group_size()]],
Expand Down
1 change: 0 additions & 1 deletion clang/test/CodeGenSYCL/const-wg-init.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,5 @@ int main() {
const int WG_CONST = 10;
});
// CHECK: store i32 10, ptr addrspace(4) addrspacecast (ptr addrspace(3) @{{.*}}WG_CONST{{.*}} to ptr addrspace(4))
// CHECK: %{{[0-9]+}} = call ptr @llvm.invariant.start.p4(i64 4, ptr addrspace(4) addrspacecast (ptr addrspace(3) @{{.*}}WG_CONST{{.*}} to ptr addrspace(4)))
return 0;
}
2 changes: 1 addition & 1 deletion clang/test/CodeGenSYCL/functionptr-addrspace.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ __attribute__((sycl_kernel)) void kernel_single_task(const Func &kernelFunc) {
kernelFunc();
}

// CHECK: define dso_local spir_func{{.*}}invoke_function{{.*}}(ptr nocapture noundef %fptr, ptr addrspace(4) nocapture noundef %ptr)
// CHECK: define dso_local spir_func{{.*}}invoke_function{{.*}}(ptr noundef %fptr, ptr addrspace(4) noundef %ptr)
void invoke_function(int (*fptr)(), int *ptr) {}

int f() { return 0; }
Expand Down
2 changes: 1 addition & 1 deletion clang/test/CodeGenSYCL/group-local-memory.cpp
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
// Check that SYCLLowerWGLocalMemory pass is added to the SYCL device
// compilation pipeline with the inliner pass (new Pass Manager).

// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown -emit-llvm \
// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown -emit-llvm -O2 \
// RUN: -mdebug-pass Structure %s -o /dev/null 2>&1 \
// RUN: | FileCheck %s -check-prefixes=CHECK-INL,CHECK

Expand Down
2 changes: 1 addition & 1 deletion clang/test/CodeGenSYCL/inline_asm.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown -opaque-pointers -emit-llvm -x c++ %s -o - | FileCheck %s
// RUN: %clang_cc1 -O1 -fsycl-is-device -triple spir64-unknown-unknown -opaque-pointers -emit-llvm -x c++ %s -o - | FileCheck %s

class kernel;

Expand Down
4 changes: 3 additions & 1 deletion clang/test/CodeGenSYCL/inlining.cpp
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown %s -S -emit-llvm -o - | FileCheck %s
// RUN: %clang_cc1 -O1 -fsycl-is-device -triple spir64-unknown-unknown %s -S -emit-llvm -o - | FileCheck %s
// RUN: %clang_cc1 -O0 -fsycl-is-device -triple spir64-unknown-unknown %s -S -emit-llvm -o - | FileCheck %s --check-prefix=CHECK-O0

template <typename name, typename Func>
__attribute__((sycl_kernel)) void kernel_single_task(const Func &kernelFunc) {
kernelFunc();
}

int main() {
// CHECK-O0: noinline
// CHECK-NOT: noinline
kernel_single_task<class kernel_function>([]() {});
return 0;
Expand Down
2 changes: 1 addition & 1 deletion clang/test/CodeGenSYCL/intel-fpga-loops.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// RUN: %clang_cc1 -triple spir64-unknown-unknown -disable-llvm-passes -fsycl-is-device -emit-llvm %s -o - | FileCheck %s
// RUN: %clang_cc1 -O2 -triple spir64-unknown-unknown -disable-llvm-passes -fsycl-is-device -emit-llvm %s -o - | FileCheck %s

// CHECK: br label %for.cond, !llvm.loop ![[MD_DLP:[0-9]+]]
// CHECK: br label %for.cond, !llvm.loop ![[MD_II:[0-9]+]]
Expand Down
8 changes: 1 addition & 7 deletions clang/test/CodeGenSYCL/max-concurrency.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,26 +15,22 @@
// CHECK: %inc = add nsw i32 [[TMP2]], 1
// CHECK: store i32 %inc, ptr addrspace(4) %i.ascast, align 4
// CHECK: br label %for.cond, !llvm.loop ![[MD_MC:[0-9]+]]
// CHECK: store i32 %inc10, ptr addrspace(4) %i1.ascast, align 4
// CHECK: store i32 %inc8, ptr addrspace(4) %i1.ascast, align 4
// CHECK: br label %for.cond2, !llvm.loop ![[MD_MC_1:[0-9]+]]
// CHECK: ret void

// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name1() [[ATTR0:#[0-9]+]] {{.*}} !max_concurrency ![[NUM1:[0-9]+]]
// CHECK: entry:
// CHECK: [[F1:%.*]] = alloca [[CLASS_F1:%.*]], align 1
// CHECK: [[F1_ASCAST:%.*]] = addrspacecast ptr [[F1]] to ptr addrspace(4)
// CHECK: call void @llvm.lifetime.start.p0(i64 1, ptr [[F1]])
// CHECK: call spir_func void @_ZNK8Functor1clEv(ptr addrspace(4) noundef align 1 dereferenceable_or_null(1) [[F1_ASCAST]])
// CHECK: call void @llvm.lifetime.end.p0(i64 1, ptr [[F1]])
// CHECK: ret void

// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name4() [[ATTR0]] {{.*}} !max_concurrency ![[NUM1:[0-9]+]]
// CHECK: entry
// CHECK: [[F3:%.*]] = alloca [[CLASS_F3:%.*]], align 1
// CHECK: [[F3_ASCAST:%.*]] = addrspacecast ptr [[F3]] to ptr addrspace(4)
// CHECK: call void @llvm.lifetime.start.p0(i64 1, ptr [[F3]])
// CHECK: call spir_func void @_ZNK8Functor3ILi4EEclEv(ptr addrspace(4) noundef align 1 dereferenceable_or_null(1) [[F3_ASCAST]])
// CHECK: call void @llvm.lifetime.end.p0(i64 1, ptr [[F3]]
// CHECK: ret void

// CHECK: define linkonce_odr spir_func void @_ZNK8Functor3ILi4EEclEv
Expand All @@ -49,9 +45,7 @@
// CHECK: entry:
// CHECK: [[H1:%.*]] = alloca [[H:%.*]], align 1
// CHECK: [[H2:%.*]] = addrspacecast ptr [[H1]] to ptr addrspace(4)
// CHECK: call void @llvm.lifetime.start.p0(i64 1, ptr [[H1]])
// CHECK: call spir_func void @_ZZZ4mainENKUlRN4sycl3_V17handlerEE_clES2_ENKUlvE_clEv(ptr addrspace(4) noundef align 1 dereferenceable_or_null(1) [[H2]])
// CHECK: call void @llvm.lifetime.end.p0(i64 1, ptr [[H1]])
// CHECK: ret void

// CHECK: define {{.*}}spir_func void @_ZZZ4mainENKUlRN4sycl3_V17handlerEE_clES2_ENKUlvE_clEv
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,11 @@ void foo() {

// Store the int and the float into the struct created
// CHECK: %x = getelementptr inbounds %class.anon, %class.anon addrspace(4)* %__SYCLKernel{{.*}}, i32 0, i32 0
// CHECK: %1 = load i32, i32 addrspace(4)* %_arg_x.addr
// CHECK: store i32 %1, i32 addrspace(4)* %x
// CHECK: %0 = load i32, i32 addrspace(4)* %_arg_x.addr
// CHECK: store i32 %0, i32 addrspace(4)* %x
// CHECK: %f2 = getelementptr inbounds %class.anon, %class.anon addrspace(4)* %__SYCLKernel{{.*}}, i32 0, i32 1
// CHECK: %2 = load float, float addrspace(4)* %_arg_f2.addr
// CHECK: store float %2, float addrspace(4)* %f2
// CHECK: %1 = load float, float addrspace(4)* %_arg_f2.addr
// CHECK: store float %1, float addrspace(4)* %f2

// Call the lambda
// CHECK: call spir_func void @{{.*}}foo{{.*}}(%class.anon addrspace(4)* {{.*}} %__SYCLKernel{{.*}})
Expand Down
14 changes: 10 additions & 4 deletions clang/test/CodeGenSYCL/no-opaque-ptrs-sycl-intelfpga-bitint.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,16 @@

// CHECK: define{{.*}} void @_Z3fooDB4096_S_(i4096 addrspace(4)* {{.*}} sret(i4096) align 8 %agg.result, i4096* {{.*}} byval(i4096) align 8 %[[ARG1:[0-9]+]], i4096* {{.*}} byval(i4096) align 8 %[[ARG2:[0-9]+]])
signed _BitInt(4096) foo(signed _BitInt(4096) a, signed _BitInt(4096) b) {
// CHECK: %[[VAR_A:a]] = load i4096, i4096* %[[ARG1]], align 8
// CHECK: %[[VAR_B:b]] = load i4096, i4096* %[[ARG2]], align 8
// CHECK: %[[RES:div]] = sdiv i4096 %[[VAR_A]], %[[VAR_B]]
// CHECK: store i4096 %[[RES]], i4096 addrspace(4)* %agg.result, align 8
// CHECK: %a.addr.ascast = addrspacecast i4096* %a.addr to i4096 addrspace(4)*
// CHECK: %b.addr.ascast = addrspacecast i4096* %b.addr to i4096 addrspace(4)*
// CHECK: %a = load i4096, i4096* %[[ARG1]], align 8
// CHECK: %b = load i4096, i4096* %[[ARG2]], align 8
// CHECK: store i4096 %a, i4096 addrspace(4)* %a.addr.ascast, align 8
// CHECK: store i4096 %b, i4096 addrspace(4)* %b.addr.ascast, align 8
// CHECK: %2 = load i4096, i4096 addrspace(4)* %a.addr.ascast, align 8
// CHECK: %3 = load i4096, i4096 addrspace(4)* %b.addr.ascast, align 8
// CHECK: %div = sdiv i4096 %2, %3
// CHECK: store i4096 %div, i4096 addrspace(4)* %agg.result, align 8
// CHECK: ret void
return a / b;
}
Expand Down
24 changes: 12 additions & 12 deletions clang/test/CodeGenSYCL/no_opaque_address-space-cond-op.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,29 +5,29 @@ struct S {
unsigned short x;
};

// CHECK-LABEL: @_Z3foobR1SS_(
// CHECK-LABEL: define {{[^@]+}}@_Z3foobR1SS_(
// CHECK: entry:
// CHECK-NEXT: [[COND_ADDR:%.*]] = alloca i8, align 1
// CHECK-NEXT: [[LHS_ADDR:%.*]] = alloca [[STRUCT__ZTS1S_S:%.*]] addrspace(4)*, align 8
// CHECK-NEXT: [[LHS_ADDR:%.*]] = alloca [[STRUCT_S:%.*]] addrspace(4)*, align 8
// CHECK-NEXT: [[COND_ADDR_ASCAST:%.*]] = addrspacecast i8* [[COND_ADDR]] to i8 addrspace(4)*
// CHECK-NEXT: [[LHS_ADDR_ASCAST:%.*]] = addrspacecast [[STRUCT__ZTS1S_S]] addrspace(4)** [[LHS_ADDR]] to [[STRUCT__ZTS1S_S]] addrspace(4)* addrspace(4)*
// CHECK-NEXT: [[LHS_ADDR_ASCAST:%.*]] = addrspacecast [[STRUCT_S]] addrspace(4)** [[LHS_ADDR]] to [[STRUCT_S]] addrspace(4)* addrspace(4)*
// CHECK-NEXT: [[FROMBOOL:%.*]] = zext i1 [[COND:%.*]] to i8
// CHECK-NEXT: store i8 [[FROMBOOL]], i8 addrspace(4)* [[COND_ADDR_ASCAST]], align 1, [[TBAA12:!tbaa !.*]]
// CHECK-NEXT: store [[STRUCT__ZTS1S_S]] addrspace(4)* [[LHS:%.*]], [[STRUCT__ZTS1S_S]] addrspace(4)* addrspace(4)* [[LHS_ADDR_ASCAST]], align 8, [[TBAA5:!tbaa !.*]]
// CHECK-NEXT: [[RHS_ASCAST:%.*]] = addrspacecast %struct.S* [[RHS:%.*]] to [[STRUCT__ZTS1S_S]] addrspace(4)*
// CHECK-NEXT: [[TMP0:%.*]] = load i8, i8 addrspace(4)* [[COND_ADDR_ASCAST]], align 1, [[TBAA12]], [[RNG14:!range !.*]]
// CHECK-NEXT: store i8 [[FROMBOOL]], i8 addrspace(4)* [[COND_ADDR_ASCAST]], align 1
// CHECK-NEXT: store [[STRUCT_S]] addrspace(4)* [[LHS:%.*]], [[STRUCT_S]] addrspace(4)* addrspace(4)* [[LHS_ADDR_ASCAST]], align 8
// CHECK-NEXT: [[RHS_ASCAST:%.*]] = addrspacecast %struct.S* [[RHS:%.*]] to [[STRUCT_S]] addrspace(4)*
// CHECK-NEXT: [[TMP0:%.*]] = load i8, i8 addrspace(4)* [[COND_ADDR_ASCAST]], align 1
// CHECK-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP0]] to i1
// CHECK-NEXT: br i1 [[TOBOOL]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK: cond.true:
// CHECK-NEXT: [[TMP1:%.*]] = load [[STRUCT__ZTS1S_S]] addrspace(4)*, [[STRUCT__ZTS1S_S]] addrspace(4)* addrspace(4)* [[LHS_ADDR_ASCAST]], align 8, [[TBAA5]]
// CHECK-NEXT: [[TMP1:%.*]] = load [[STRUCT_S]] addrspace(4)*, [[STRUCT_S]] addrspace(4)* addrspace(4)* [[LHS_ADDR_ASCAST]], align 8
// CHECK-NEXT: br label [[COND_END:%.*]]
// CHECK: cond.false:
// CHECK-NEXT: br label [[COND_END]]
// CHECK: cond.end:
// CHECK-NEXT: [[COND_LVALUE:%.*]] = phi [[STRUCT__ZTS1S_S]] addrspace(4)* [ [[TMP1]], [[COND_TRUE]] ], [ [[RHS_ASCAST]], [[COND_FALSE]] ]
// CHECK-NEXT: [[TMP2:%.*]] = bitcast [[STRUCT__ZTS1S_S]] addrspace(4)* [[AGG_RESULT:%.*]] to i8 addrspace(4)*
// CHECK-NEXT: [[TMP3:%.*]] = bitcast [[STRUCT__ZTS1S_S]] addrspace(4)* [[COND_LVALUE]] to i8 addrspace(4)*
// CHECK-NEXT: call void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* align 2 [[TMP2]], i8 addrspace(4)* align 2 [[TMP3]], i64 2, i1 false), !tbaa.struct !{{[0-9]+}}
// CHECK-NEXT: [[COND_LVALUE:%.*]] = phi [[STRUCT_S]] addrspace(4)* [ [[TMP1]], [[COND_TRUE]] ], [ [[RHS_ASCAST]], [[COND_FALSE]] ]
// CHECK-NEXT: [[TMP2:%.*]] = bitcast [[STRUCT_S]] addrspace(4)* [[AGG_RESULT:%.*]] to i8 addrspace(4)*
// CHECK-NEXT: [[TMP3:%.*]] = bitcast [[STRUCT_S]] addrspace(4)* [[COND_LVALUE]] to i8 addrspace(4)*
// CHECK-NEXT: call void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* align 2 [[TMP2]], i8 addrspace(4)* align 2 [[TMP3]], i64 2, i1 false)
// CHECK-NEXT: ret void
//
S foo(bool cond, S &lhs, S rhs) {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -internal-isystem %S/Inputs -triple spir64-unknown-unknown -disable-llvm-passes -sycl-std=2020 -no-opaque-pointers -emit-llvm -o - %s | FileCheck %s
// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -O2 -fsycl-is-device -internal-isystem %S/Inputs -triple spir64-unknown-unknown -disable-llvm-passes -sycl-std=2020 -no-opaque-pointers -emit-llvm -o - %s | FileCheck %s

// Tests for IR of [[intel::scheduler_target_fmax_mhz()]], [[intel::num_simd_work_items()]],
// [[intel::no_global_work_offset()]], [[intel::max_global_work_dim()]], [[sycl::reqd_sub_group_size()]],
Expand Down
1 change: 0 additions & 1 deletion clang/test/CodeGenSYCL/no_opaque_const-wg-init.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@ int main() {
const int WG_CONST = 10;
});
// CHECK: store i32 10, i32 addrspace(4)* addrspacecast (i32 addrspace(3)* @{{.*}}WG_CONST{{.*}} to i32 addrspace(4)*)
// CHECK: %{{[0-9]+}} = call {}* @llvm.invariant.start.p4i8(i64 4, i8 addrspace(4)* addrspacecast (i8 addrspace(3)* bitcast (i32 addrspace(3)* @{{.*}}WG_CONST{{.*}} to i8 addrspace(3)*) to i8 addrspace(4)*))

return 0;
}
11 changes: 8 additions & 3 deletions clang/test/CodeGenSYCL/no_opaque_inline_asm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,20 @@ class kernel;
template <typename name, typename Func>
__attribute__((sycl_kernel)) void kernel_single_task(const Func &kernelFunc) {
// CHECK: %[[ARRAY_A:[0-9a-z]+]] = alloca [100 x i32], align 4
// CHECK: %[[IDX:.*]] = getelementptr inbounds [100 x i32], [100 x i32]* %[[ARRAY_A]], i64 0, i64 0
// CHECK: %[[IDX4:.*]] = addrspacecast i32* %[[IDX]] to i32 addrspace(4)*
// CHECK: %[[I:[0-9a-z]+]] = alloca i32, align 4
// CHECK: %[[ARRAY_A]].ascast = addrspacecast [100 x i32]* %[[ARRAY_A]] to [100 x i32] addrspace(4)*
// CHECK: %[[I]].ascast = addrspacecast i32* %[[I]] to i32 addrspace(4)*
// CHECK: store i32 0, i32 addrspace(4)* %[[I]].ascast, align 4
// CHECK: %0 = load i32, i32 addrspace(4)* %[[I]].ascast, align 4
// CHECK: %[[IDXPROM:[0-9a-z]+]] = sext i32 %0 to i64
// CHECK: %[[IDX:.*]] = getelementptr inbounds [100 x i32], [100 x i32] addrspace(4)* %[[ARRAY_A]].ascast, i64 0, i64 %[[IDXPROM]]
int a[100], i = 0;
// CHECK-NEXT: call void asm sideeffect
// CHECK: ".decl V52 v_type=G type=d num_elts=16 align=GRF
// CHECK: svm_gather.4.1 (M1, 16) $0.0 V52.0
// CHECK: add(M1, 16) V52(0, 0)<1> V52(0, 0)<1; 1, 0> 0x1
// CHECK: svm_scatter.4.1 (M1, 16) $0.0 V52.0",
// CHECK: "rw"(i32 addrspace(4)* %[[IDX4]])
// CHECK: "rw"(i32 addrspace(4)* %[[IDX]])
// TODO: nonnull attribute missing?
asm volatile(".decl V52 v_type=G type=d num_elts=16 align=GRF\n"
"svm_gather.4.1 (M1, 16) %0.0 V52.0\n"
Expand Down
2 changes: 1 addition & 1 deletion clang/test/CodeGenSYCL/no_opaque_max-concurrency.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -internal-isystem %S/Inputs -disable-llvm-passes -triple spir64-unknown-unknown -sycl-std=2020 -no-opaque-pointers -emit-llvm -o - %s | FileCheck %s
// RUN: %clang_cc1 -O2 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -internal-isystem %S/Inputs -disable-llvm-passes -triple spir64-unknown-unknown -sycl-std=2020 -no-opaque-pointers -emit-llvm -o - %s | FileCheck %s

#include "sycl.hpp"

Expand Down
2 changes: 0 additions & 2 deletions clang/test/CodeGenSYCL/no_opaque_sampler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,6 @@
// CHECK: [[ANON:%[a-zA-Z0-9_]+]] = alloca %class.anon, align 8
// CHECK: [[ANONCAST:%[a-zA-Z0-9_.]+]] = addrspacecast %class.anon* [[ANON]] to %class.anon addrspace(4)*
// CHECK: store %opencl.sampler_t addrspace(2)* [[SAMPLER_ARG]], %opencl.sampler_t addrspace(2)* addrspace(4)* [[SAMPLER_ARG]].addr.ascast, align 8
// CHECK-NEXT: [[BITCAST:%[0-9]+]] = bitcast %class.anon* [[ANON]] to i8*
// CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[BITCAST]]) #4
// CHECK-NEXT: [[GEP:%[a-zA-z0-9]+]] = getelementptr inbounds %class.anon, %class.anon addrspace(4)* [[ANONCAST]], i32 0, i32 0
// CHECK-NEXT: [[LOAD_SAMPLER_ARG:%[0-9]+]] = load %opencl.sampler_t addrspace(2)*, %opencl.sampler_t addrspace(2)* addrspace(4)* [[SAMPLER_ARG]].addr.ascast, align 8
// CHECK-NEXT: call spir_func void @{{[a-zA-Z0-9_]+}}(%"class.sycl::_V1::sampler" addrspace(4)* {{[^,]*}} [[GEP]], %opencl.sampler_t addrspace(2)* [[LOAD_SAMPLER_ARG]])
Expand Down
6 changes: 3 additions & 3 deletions clang/test/CodeGenSYCL/no_opaque_stall_enable_device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,12 +26,12 @@ class Foo {
int main() {
q.submit([&](handler &h) {
// CHECK: define {{.*}}spir_kernel void @{{.*}}test_kernel1() {{.*}} !stall_enable ![[NUM4:[0-9]+]]
// CHECK: define {{.*}}spir_func void @{{.*}}FuncObjclEv(%struct.{{.*}}FuncObj addrspace(4)* noundef align 1 dereferenceable_or_null(1) %this) #3 comdat align 2{{.*}} !stall_enable ![[NUM4]]
// CHECK: define {{.*}}spir_func void @{{.*}}FuncObjclEv(%struct.{{.*}}FuncObj addrspace(4)* noundef align 1 dereferenceable_or_null(1) %this) #2 comdat align 2{{.*}} !stall_enable ![[NUM4]]
h.single_task<class test_kernel1>(
FuncObj());

// CHECK: define {{.*}}spir_kernel void @{{.*}}test_kernel2() {{.*}} !stall_enable ![[NUM4]]
// CHECK define {{.*}}spir_func void @{{.*}}FooclEv(%class._ZTS3Foo.Foo addrspace(4)* noundef align 1 dereferenceable_or_null(1) %this) #3 comdat align 2{{.*}} !stall_enable ![[NUM4]]
// CHECK define {{.*}}spir_func void @{{.*}}FooclEv(%class._ZTS3Foo.Foo addrspace(4)* noundef align 1 dereferenceable_or_null(1) %this) #2 comdat align 2{{.*}} !stall_enable ![[NUM4]]
Foo f;
h.single_task<class test_kernel2>(f);

Expand All @@ -47,7 +47,7 @@ int main() {
// CHECK: define {{.*}}spir_kernel void @{{.*}}test_kernel4()
// CHECK-NOT: !stall_enable
// CHECK-SAME: {
// CHECK: define {{.*}}spir_func void @{{.*}}func1{{.*}}(%class.anon{{.*}} addrspace(4)* noundef align 1 dereferenceable_or_null(1) %this) #4 align 2{{.*}} !stall_enable ![[NUM4]]
// CHECK: define {{.*}}spir_func void @{{.*}}func1{{.*}}(%class.anon{{.*}} addrspace(4)* noundef align 1 dereferenceable_or_null(1) %this) #2 align 2{{.*}} !stall_enable ![[NUM4]]
h.single_task<class test_kernel4>(
[]() { func1(); });

Expand Down
Loading