Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[SYCL][Experimental] Reduce the set of optimizations for SYCL device #1550

Merged
merged 6 commits into from
Apr 21, 2020
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
[SYCL][Experimental] Reduce the set of optimizations for SYCL device
This is patch limits the set of optimizations aiming to reduce the size of
generated device module.

Optimizations are currently disabled by default as they cause multiple
sorts of issues. Some of the issues are addressed within this patch, but
not all of them.

Optimizations can be enabled with `-fsycl-enable-optimizaions` front-end
option (or `-Xclang -fsycl-enable-optimizaions` driver option).

Signed-off-by: Alexey Bader <alexey.bader@intel.com>
  • Loading branch information
bader committed Apr 18, 2020
commit 32c3c2ab629ef4cefe2119775a88e960872c209f
4 changes: 2 additions & 2 deletions clang/lib/Basic/Targets/SPIR.h
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ class LLVM_LIBRARY_VISIBILITY SPIR32TargetInfo : public SPIRTargetInfo {
SizeType = TargetInfo::UnsignedInt;
PtrDiffType = IntPtrType = TargetInfo::SignedInt;
resetDataLayout("e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-"
"v96:128-v192:256-v256:256-v512:512-v1024:1024");
"v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64");
}

void getTargetDefines(const LangOptions &Opts,
Expand All @@ -152,7 +152,7 @@ class LLVM_LIBRARY_VISIBILITY SPIR64TargetInfo : public SPIRTargetInfo {
PtrDiffType = IntPtrType = TargetInfo::SignedLong;

resetDataLayout("e-i64:64-v16:16-v24:32-v32:32-v48:64-"
"v96:128-v192:256-v256:256-v512:512-v1024:1024");
"v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64");
}

void getTargetDefines(const LangOptions &Opts,
Expand Down
56 changes: 37 additions & 19 deletions clang/lib/CodeGen/BackendUtil.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -599,19 +599,38 @@ void EmitAssemblyHelper::CreatePasses(legacy::PassManager &MPM,
CodeGenOpts.PrepareForThinLTO));
}

PMBuilder.OptLevel = CodeGenOpts.OptimizationLevel;
PMBuilder.SizeLevel = CodeGenOpts.OptimizeSize;
PMBuilder.SLPVectorize = CodeGenOpts.VectorizeSLP;
PMBuilder.LoopVectorize = CodeGenOpts.VectorizeLoop;

PMBuilder.DisableUnrollLoops = !CodeGenOpts.UnrollLoops;
// Loop interleaving in the loop vectorizer has historically been set to be
// enabled when loop unrolling is enabled.
PMBuilder.LoopsInterleaved = CodeGenOpts.UnrollLoops;
PMBuilder.MergeFunctions = CodeGenOpts.MergeFunctions;
PMBuilder.PrepareForThinLTO = CodeGenOpts.PrepareForThinLTO;
PMBuilder.PrepareForLTO = CodeGenOpts.PrepareForLTO;
PMBuilder.RerollLoops = CodeGenOpts.RerollLoops;
if (LangOpts.SYCLIsDevice) {
bader marked this conversation as resolved.
Show resolved Hide resolved
bader marked this conversation as resolved.
Show resolved Hide resolved
PMBuilder.OptLevel = 1;
PMBuilder.SizeLevel = 2;
PMBuilder.SLPVectorize = false;
PMBuilder.LoopVectorize = false;
PMBuilder.DivergentTarget = true;
PMBuilder.DisableGVNLoadPRE = true;
PMBuilder.ForgetAllSCEVInLoopUnroll = true;

PMBuilder.DisableUnrollLoops = true;
// Loop interleaving in the loop vectorizer has historically been set to be
// enabled when loop unrolling is enabled.
PMBuilder.LoopsInterleaved = false;
PMBuilder.MergeFunctions = false;
PMBuilder.PrepareForThinLTO = false;
PMBuilder.PrepareForLTO = false;
PMBuilder.RerollLoops = false;
} else {
PMBuilder.OptLevel = CodeGenOpts.OptimizationLevel;
PMBuilder.SizeLevel = CodeGenOpts.OptimizeSize;
PMBuilder.SLPVectorize = CodeGenOpts.VectorizeSLP;
PMBuilder.LoopVectorize = CodeGenOpts.VectorizeLoop;

PMBuilder.DisableUnrollLoops = !CodeGenOpts.UnrollLoops;
// Loop interleaving in the loop vectorizer has historically been set to be
// enabled when loop unrolling is enabled.
PMBuilder.LoopsInterleaved = CodeGenOpts.UnrollLoops;
PMBuilder.MergeFunctions = CodeGenOpts.MergeFunctions;
PMBuilder.PrepareForThinLTO = CodeGenOpts.PrepareForThinLTO;
PMBuilder.PrepareForLTO = CodeGenOpts.PrepareForLTO;
PMBuilder.RerollLoops = CodeGenOpts.RerollLoops;
}

MPM.add(new TargetLibraryInfoWrapperPass(*TLII));

Expand Down Expand Up @@ -865,14 +884,16 @@ void EmitAssemblyHelper::EmitAssembly(BackendAction Action,

std::unique_ptr<llvm::ToolOutputFile> ThinLinkOS, DwoOS;

// Clean-up SYCL device code if LLVM passes are disabled
if (LangOpts.SYCLIsDevice && CodeGenOpts.DisableLLVMPasses) {
PerModulePasses.add(createDeadCodeEliminationPass());
}
bader marked this conversation as resolved.
Show resolved Hide resolved

switch (Action) {
case Backend_EmitNothing:
break;

case Backend_EmitBC:
if (LangOpts.SYCLIsDevice) {
PerModulePasses.add(createDeadCodeEliminationPass());
}
if (CodeGenOpts.PrepareForThinLTO && !CodeGenOpts.DisableLLVMPasses) {
if (!CodeGenOpts.ThinLinkBitcodeFile.empty()) {
ThinLinkOS = openOutputFile(CodeGenOpts.ThinLinkBitcodeFile);
Expand Down Expand Up @@ -1346,9 +1367,6 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager(
break;

case Backend_EmitBC:
if (LangOpts.SYCLIsDevice) {
CodeGenPasses.add(createDeadCodeEliminationPass());
}
if (CodeGenOpts.PrepareForThinLTO && !CodeGenOpts.DisableLLVMPasses) {
if (!CodeGenOpts.ThinLinkBitcodeFile.empty()) {
ThinLinkOS = openOutputFile(CodeGenOpts.ThinLinkBitcodeFile);
Expand Down
4 changes: 2 additions & 2 deletions clang/test/CodeGen/target-data.c
Original file line number Diff line number Diff line change
Expand Up @@ -237,11 +237,11 @@

// RUN: %clang_cc1 -triple spir-unknown -o - -emit-llvm %s | \
// RUN: FileCheck %s -check-prefix=SPIR
// SPIR: target datalayout = "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024"
// SPIR: target datalayout = "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64"

// RUN: %clang_cc1 -triple spir64-unknown -o - -emit-llvm %s | \
// RUN: FileCheck %s -check-prefix=SPIR64
// SPIR64: target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024"
// SPIR64: target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64"

// RUN: %clang_cc1 -triple bpfel -o - -emit-llvm %s | \
// RUN: FileCheck %s -check-prefix=BPFEL
Expand Down
2 changes: 1 addition & 1 deletion clang/test/CodeGenOpenCL/convergent.cl
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ void test_unroll() {

// The new PM produces a slightly different IR for the loop from the legacy PM,
// but the test still checks that the loop is not unrolled.
// CHECK-LEGACY: br i1 %{{.+}}, label %[[for_body]], label %[[for_cond_cleanup]]
// CHECK-LEGACY: br i1 %{{.+}}, label %[[for_cond_cleanup]], label %[[for_body]]
bader marked this conversation as resolved.
Show resolved Hide resolved
// CHECK-NEW: br i1 %{{.+}}, label %[[for_body_crit_edge:.+]], label %[[for_cond_cleanup]]
// CHECK-NEW: [[for_body_crit_edge]]:

Expand Down
2 changes: 1 addition & 1 deletion clang/test/CodeGenSYCL/address-space-swap.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// RUN: %clang -fsycl-device-only -S -emit-llvm %s -o - | FileCheck %s
// RUN: %clang -fsycl-device-only -S -Xclang -disable-llvm-passes -emit-llvm %s -o - | FileCheck %s
#include <algorithm>

void test() {
Expand Down
2 changes: 1 addition & 1 deletion clang/test/CodeGenSYCL/debug-info-srcpos-kernel.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// RUN: %clang -fsycl-device-only %s -S -I %S/Inputs -emit-llvm -g -o - | FileCheck %s
// RUN: %clang -fsycl-device-only %s -S -emit-llvm -O0 -I %S/Inputs -g -o - | FileCheck %s
//
// Verify the SYCL kernel routine is marked artificial and has no source
// correlation.
Expand Down
20 changes: 13 additions & 7 deletions llvm-spirv/lib/SPIRV/SPIRVLowerMemmove.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -75,10 +75,12 @@ class SPIRVLowerMemmove : public ModulePass,
report_fatal_error("llvm.memmove of non-constant length not supported",
false);
auto *Length = cast<ConstantInt>(I.getLength());
if (isa<BitCastInst>(Src))
// The source could be bit-cast from another type,
// need the original type for the allocation of the temporary variable
SrcTy = cast<BitCastInst>(Src)->getOperand(0)->getType();
auto *S = Src;
// The source could be bit-cast or addrspacecast from another type,
// need the original type for the allocation of the temporary variable
while (isa<BitCastInst>(S) || isa<AddrSpaceCastInst>(S))
bader marked this conversation as resolved.
Show resolved Hide resolved
S = cast<CastInst>(S)->getOperand(0);
SrcTy = S->getType();
MaybeAlign Align = I.getSourceAlign();
auto Volatile = I.isVolatile();
Value *NumElements = nullptr;
Expand All @@ -87,9 +89,13 @@ class SPIRVLowerMemmove : public ModulePass,
NumElements = Builder.getInt32(SrcTy->getArrayNumElements());
ElementsCount = SrcTy->getArrayNumElements();
}
if (Mod->getDataLayout().getTypeSizeInBits(SrcTy->getPointerElementType()) *
ElementsCount !=
Length->getZExtValue() * 8)
if (((ElementsCount > 1) && (Mod->getDataLayout().getTypeSizeInBits(
SrcTy->getPointerElementType()) *
ElementsCount !=
Length->getZExtValue() * 8)) ||
((ElementsCount == 1) &&
(Mod->getDataLayout().getTypeSizeInBits(
SrcTy->getPointerElementType()) < Length->getZExtValue() * 8)))
report_fatal_error("Size of the memcpy should match the allocated memory",
false);

Expand Down
5 changes: 5 additions & 0 deletions llvm-spirv/lib/SPIRV/SPIRVWriter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1310,6 +1310,10 @@ SPIRVValue *LLVMToSPIRV::transValueWithoutDecoration(Value *V,
if (CallInst *CI = dyn_cast<CallInst>(V))
return mapValue(V, transCallInst(CI, BB));

// FIXME: this is not valid translation of freeze instruction
if (FreezeInst *FI = dyn_cast<FreezeInst>(V))
return mapValue(V, transValue(FI->getOperand(0), BB));

llvm_unreachable("Not implemented");
return nullptr;
}
Expand Down Expand Up @@ -1825,6 +1829,7 @@ SPIRVValue *LLVMToSPIRV::transIntrinsicInst(IntrinsicInst *II,
case Intrinsic::invariant_start:
case Intrinsic::invariant_end:
case Intrinsic::dbg_label:
case Intrinsic::assume:
return nullptr;
default:
if (SPIRVAllowUnknownIntrinsics)
Expand Down
46 changes: 41 additions & 5 deletions llvm-spirv/test/transcoding/llvm.memmove.ll
Original file line number Diff line number Diff line change
Expand Up @@ -17,18 +17,46 @@
; CHECK-SPIRV: Bitcast [[i8Ty]] [[tmp3:[0-9]+]] [[mem]]
; CHECK-SPIRV: LifetimeStop [[tmp3]] [[size]]

; CHECK-SPIRV: GenericCastToPtr {{[0-9]+}} [[out:[0-9]+]]
; CHECK-SPIRV: Variable {{[0-9]+}} [[mem:[0-9]+]] 7
; CHECK-SPIRV: Bitcast [[i8Ty:[0-9]+]] [[tmp0:[0-9]+]] [[mem]]
; CHECK-SPIRV: LifetimeStart [[tmp0]] [[size:[0-9]+]]
; CHECK-SPIRV: Bitcast [[i8Ty]] [[tmp1:[0-9]+]] [[mem]]
; CHECK-SPIRV: CopyMemorySized [[tmp1]] {{[0-9]+}} {{[0-9]+}}
; CHECK-SPIRV: Bitcast [[i8Ty]] [[tmp2:[0-9]+]] [[mem]]
; CHECK-SPIRV: CopyMemorySized [[out]] [[tmp2]] {{[0-9]+}}
; CHECK-SPIRV: Bitcast [[i8Ty]] [[tmp3:[0-9]+]] [[mem]]
; CHECK-SPIRV: LifetimeStop [[tmp3]] [[size]]

; CHECK-LLVM-NOT: llvm.memmove

; CHECK-LLVM-LABEL: @test_struct
; CHECK-LLVM: [[local:%[0-9]+]] = alloca %struct.SomeStruct
; CHECK-LLVM: [[tmp1:%[0-9]+]] = bitcast %struct.SomeStruct* [[local]] to [[type:i[0-9]+\*]]
; CHECK-LLVM: call void @llvm.lifetime.start.p0i8({{i[0-9]+}} {{-?[0-9]+}}, [[type]] [[tmp1]])
; CHECK-LLVM: [[tmp2:%[0-9]+]] = bitcast %struct.SomeStruct* [[local]] to [[type]]
; CHECK-LLVM: call void @llvm.memcpy
; CHECK-LLVM: ([[type]] align 64 [[tmp2]],
; CHECK-LLVM: {{i[0-9]+}} [[size:[0-9]+]]
; CHECK-LLVM: call void @llvm.memcpy.p0i8.p1i8.i32
; CHECK-LLVM-SAME: ([[type]] align 64 [[tmp2]],
; CHECK-LLVM-SAME: {{i[0-9]+}} [[size:[0-9]+]]
; CHECK-LLVM: [[tmp3:%[0-9]+]] = bitcast %struct.SomeStruct* [[local]] to [[type]]
; CHECK-LLVM: call void @llvm.memcpy
; CHECK-LLVM: , [[type]] align 64 [[tmp3]], {{i[0-9]+}} [[size]]
; CHECK-LLVM: call void @llvm.memcpy.p1i8.p0i8.i32
; CHECK-LLVM-SAME: , [[type]] align 64 [[tmp3]], {{i[0-9]+}} [[size]]
; CHECK-LLVM: [[tmp4:%[0-9]+]] = bitcast %struct.SomeStruct* [[local]] to [[type]]
; CHECK-LLVM: call void @llvm.lifetime.end.p0i8({{i[0-9]+}} {{-?[0-9]+}}, [[type]] [[tmp4]])

; CHECK-LLVM-LABEL: @copy_struct
; CHECK-LLVM: [[out:%[0-9]+]] = addrspacecast i8 addrspace(4)* %2 to i8 addrspace(1)*
; CHECK-LLVM: [[local:%[0-9]+]] = alloca %struct.SomeStruct
; CHECK-LLVM: [[tmp1:%[0-9]+]] = bitcast %struct.SomeStruct* [[local]] to [[type:i[0-9]+\*]]
; CHECK-LLVM: call void @llvm.lifetime.start.p0i8({{i[0-9]+}} {{-?[0-9]+}}, [[type]] [[tmp1]])
; CHECK-LLVM: [[tmp2:%[0-9]+]] = bitcast %struct.SomeStruct* [[local]] to [[type]]
; CHECK-LLVM: call void @llvm.memcpy.p0i8.p1i8.i32
; CHECK-LLVM-SAME: ([[type]] align 64 [[tmp2]],
; CHECK-LLVM-SAME: {{i[0-9]+}} [[size:[0-9]+]]
; CHECK-LLVM: [[tmp3:%[0-9]+]] = bitcast %struct.SomeStruct* [[local]] to [[type]]
; CHECK-LLVM: call void @llvm.memcpy.p1i8.p0i8.i32
; CHECK-LLVM-SAME: align 64 [[out]]
; CHECK-LLVM-SAME: , [[type]] align 64 [[tmp3]], {{i[0-9]+}} [[size]]
; CHECK-LLVM: [[tmp4:%[0-9]+]] = bitcast %struct.SomeStruct* [[local]] to [[type]]
; CHECK-LLVM: call void @llvm.lifetime.end.p0i8({{i[0-9]+}} {{-?[0-9]+}}, [[type]] [[tmp4]])

Expand All @@ -45,6 +73,14 @@ define spir_kernel void @test_struct(%struct.SomeStruct addrspace(1)* nocapture
ret void
}

define spir_func void @copy_struct(%struct.SomeStruct addrspace(1)* nocapture readonly %in, %struct.SomeStruct addrspace(4)* nocapture %out) {
%1 = bitcast %struct.SomeStruct addrspace(1)* %in to i8 addrspace(1)*
%2 = bitcast %struct.SomeStruct addrspace(4)* %out to i8 addrspace(4)*
%3 = addrspacecast i8 addrspace(4)* %2 to i8 addrspace(1)*
call void @llvm.memmove.p1i8.p1i8.i32(i8 addrspace(1)* align 64 %3, i8 addrspace(1)* align 64 %1, i32 68, i1 false)
ret void
}

; Function Attrs: nounwind
declare void @llvm.memmove.p1i8.p1i8.i32(i8 addrspace(1)* nocapture, i8 addrspace(1)* nocapture readonly, i32, i1) #1

Expand Down