Skip to content

Commit d893994

Browse files
authored
[SYCL][Driver][thinLTO] Don't pass -emit-only-kernels-as-entry-points to sycl-post-link in early splitting (#14991)
In early splitting with thinLTO, we could be generating an `.o` to be linked in with other files later. Right now, passing `-emit-only-kernels-as-entry-points` to `sycl-post-link` can cause functions to get dropped even though they are used by some other `.o`. We will need to prune non-entry points inside the thinLTO processing inside `clang-linker-wrapper`, but that's not implemented yet. --------- Signed-off-by: Sarnie, Nick <nick.sarnie@intel.com>
1 parent 121a7d5 commit d893994

File tree

2 files changed

+30
-13
lines changed

2 files changed

+30
-13
lines changed

clang/lib/Driver/ToolChains/Clang.cpp

Lines changed: 27 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -10741,6 +10741,32 @@ static void getNonTripleBasedSYCLPostLinkOpts(const ToolChain &TC,
1074110741
addArgs(PostLinkArgs, TCArgs, {"-support-dynamic-linking"});
1074210742
}
1074310743

10744+
// On Intel targets we don't need non-kernel functions as entry points,
10745+
// because it only increases amount of code for device compiler to handle,
10746+
// without any actual benefits.
10747+
// TODO: Try to extend this feature for non-Intel GPUs.
10748+
static bool shouldEmitOnlyKernelsAsEntryPoints(const ToolChain &TC,
10749+
const llvm::opt::ArgList &TCArgs,
10750+
llvm::Triple Triple) {
10751+
if (TCArgs.hasFlag(options::OPT_fno_sycl_remove_unused_external_funcs,
10752+
options::OPT_fsycl_remove_unused_external_funcs, false))
10753+
return false;
10754+
if (isSYCLNativeCPU(TC))
10755+
return false;
10756+
// When supporting dynamic linking, non-kernels in a device image can be
10757+
// called.
10758+
if (supportDynamicLinking(TCArgs))
10759+
return false;
10760+
if (Triple.isNVPTX() || Triple.isAMDGPU())
10761+
return false;
10762+
bool IsUsingLTO = TC.getDriver().isUsingLTO(/*IsDeviceOffloadAction=*/true);
10763+
auto LTOMode = TC.getDriver().getLTOMode(/*IsDeviceOffloadAction=*/true);
10764+
// With thinLTO, final entry point handing is done in clang-linker-wrapper
10765+
if (IsUsingLTO && LTOMode == LTOK_Thin)
10766+
return false;
10767+
return true;
10768+
}
10769+
1074410770
// Add any sycl-post-link options that rely on a specific Triple in addition
1074510771
// to user supplied options. This function is invoked only for the old
1074610772
// offloading model. For the new offloading model, a slightly modified version
@@ -10778,17 +10804,7 @@ static void getTripleBasedSYCLPostLinkOpts(const ToolChain &TC,
1077810804
(Triple.getArchName() != "spir64_fpga"))
1077910805
addArgs(PostLinkArgs, TCArgs, {"-split=auto"});
1078010806

10781-
// On Intel targets we don't need non-kernel functions as entry points,
10782-
// because it only increases amount of code for device compiler to handle,
10783-
// without any actual benefits.
10784-
// TODO: Try to extend this feature for non-Intel GPUs.
10785-
if ((!TCArgs.hasFlag(options::OPT_fno_sycl_remove_unused_external_funcs,
10786-
options::OPT_fsycl_remove_unused_external_funcs,
10787-
false) &&
10788-
!isSYCLNativeCPU(TC)) &&
10789-
// When supporting dynamic linking, non-kernels in a device image can be
10790-
// called.
10791-
!supportDynamicLinking(TCArgs) && !Triple.isNVPTX() && !Triple.isAMDGPU())
10807+
if (shouldEmitOnlyKernelsAsEntryPoints(TC, TCArgs, Triple))
1079210808
addArgs(PostLinkArgs, TCArgs, {"-emit-only-kernels-as-entry-points"});
1079310809

1079410810
if (!Triple.isAMDGCN())

clang/test/Driver/sycl-lto.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,10 @@
99
// CHECK_SPLIT_ERROR: '-fsycl-device-code-split=off' is not supported when '-foffload-lto=thin' is set with '-fsycl'
1010

1111
// Verify there's no error and we see the expected cc1 flags and tool invocations with the new offload driver.
12-
// RUN: %clangxx -fsycl --offload-new-driver -foffload-lto=thin %s -### 2>&1 | FileCheck -check-prefix=CHECK_SUPPORTED %s
12+
// RUN: %clangxx -fsycl --offload-new-driver -foffload-lto=thin %s -### 2>&1 | \
13+
// RUN: FileCheck -check-prefix=CHECK_SUPPORTED -implicit-check-not=-emit-only-kernels-as-entry-points %s
1314
// CHECK_SUPPORTED: clang{{.*}} "-cc1" "-triple" "spir64-unknown-unknown" {{.*}} "-flto=thin" "-flto-unit"
14-
// CHECK_SUPPORTED: sycl-post-link{{.*}}
15+
// CHECK_SUPPORTED: sycl-post-link
1516
// CHECK_SUPPORTED-NOT: -properties
1617
// CHECK_SUPPORTED-NEXT: file-table-tform{{.*}}
1718
// CHECK_SUPPORTED-NEXT: llvm-foreach{{.*}} "--" {{.*}}clang{{.*}} "-fsycl-is-device"{{.*}} "-flto=thin" "-flto-unit"

0 commit comments

Comments
 (0)