Skip to content

Commit 5c8b7e7

Browse files
authored
[SYCL] Add -fsycl-fp32-prec-sqrt flag (#5309)
This flag enables correctly rounded `sycl::sqrt` (the default precision requirement is 3 ULP). And enables the flag for CUDA and HIP targets.
1 parent 008519a commit 5c8b7e7

File tree

17 files changed

+130
-24
lines changed

17 files changed

+130
-24
lines changed

clang/include/clang/Basic/CodeGenOptions.def

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -182,6 +182,7 @@ CODEGENOPT(NoImplicitFloat , 1, 0) ///< Set when -mno-implicit-float is enable
182182
CODEGENOPT(NullPointerIsValid , 1, 0) ///< Assume Null pointer deference is defined.
183183
CODEGENOPT(OpenCLCorrectlyRoundedDivSqrt, 1, 0) ///< -cl-fp32-correctly-rounded-divide-sqrt
184184
CODEGENOPT(HIPCorrectlyRoundedDivSqrt, 1, 1) ///< -fno-hip-fp32-correctly-rounded-divide-sqrt
185+
CODEGENOPT(SYCLFp32PrecSqrt, 1, 0) ///< -fsycl-fp32-prec-sqrt
185186
CODEGENOPT(UniqueInternalLinkageNames, 1, 0) ///< Internal Linkage symbols get unique names.
186187
CODEGENOPT(SplitMachineFunctions, 1, 0) ///< Split machine functions using profile information.
187188

clang/include/clang/Driver/Options.td

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4750,6 +4750,9 @@ def fno_sycl_device_lib_EQ : CommaJoined<["-"], "fno-sycl-device-lib=">, Group<s
47504750
Values<"libc, libm-fp32, libm-fp64, all">, HelpText<"Control exclusion of "
47514751
"device libraries from device binary linkage. Valid arguments "
47524752
"are libc, libm-fp32, libm-fp64, all">;
4753+
def fsycl_fp32_prec_sqrt : Flag<["-"], "fsycl-fp32-prec-sqrt">, Group<sycl_Group>, Flags<[CC1Option]>,
4754+
HelpText<"SYCL only. Specify that single precision floating-point sqrt is correctly rounded.">,
4755+
MarshallingInfoFlag<CodeGenOpts<"SYCLFp32PrecSqrt">>;
47534756

47544757
//===----------------------------------------------------------------------===//
47554758
// FLangOption + CoreOption + NoXarchOption

clang/include/clang/Driver/ToolChain.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -708,7 +708,8 @@ class ToolChain {
708708

709709
/// Get paths of HIP device libraries.
710710
virtual llvm::SmallVector<BitCodeLibraryInfo, 12>
711-
getHIPDeviceLibs(const llvm::opt::ArgList &Args) const;
711+
getHIPDeviceLibs(const llvm::opt::ArgList &Args,
712+
const Action::OffloadKind DeviceOffloadingKind) const;
712713

713714
/// Return sanitizers which are available in this toolchain.
714715
virtual SanitizerMask getSupportedSanitizers() const;

clang/lib/Driver/ToolChain.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1119,7 +1119,9 @@ void ToolChain::AddHIPIncludeArgs(const ArgList &DriverArgs,
11191119
ArgStringList &CC1Args) const {}
11201120

11211121
llvm::SmallVector<ToolChain::BitCodeLibraryInfo, 12>
1122-
ToolChain::getHIPDeviceLibs(const ArgList &DriverArgs) const {
1122+
ToolChain::getHIPDeviceLibs(
1123+
const ArgList &DriverArgs,
1124+
const Action::OffloadKind DeviceOffloadingKind) const {
11231125
return {};
11241126
}
11251127

clang/lib/Driver/ToolChains/AMDGPU.cpp

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -895,9 +895,9 @@ bool AMDGPUToolChain::shouldSkipArgument(const llvm::opt::Arg *A) const {
895895
return false;
896896
}
897897

898-
llvm::SmallVector<std::string, 12>
899-
ROCMToolChain::getCommonDeviceLibNames(const llvm::opt::ArgList &DriverArgs,
900-
const std::string &GPUArch) const {
898+
llvm::SmallVector<std::string, 12> ROCMToolChain::getCommonDeviceLibNames(
899+
const llvm::opt::ArgList &DriverArgs, const std::string &GPUArch,
900+
const Action::OffloadKind DeviceOffloadingKind) const {
901901
auto Kind = llvm::AMDGPU::parseArchAMDGCN(GPUArch);
902902
const StringRef CanonArch = llvm::AMDGPU::getArchNameAMDGCN(Kind);
903903

@@ -920,9 +920,15 @@ ROCMToolChain::getCommonDeviceLibNames(const llvm::opt::ArgList &DriverArgs,
920920
options::OPT_fno_unsafe_math_optimizations, false);
921921
bool FastRelaxedMath = DriverArgs.hasFlag(options::OPT_ffast_math,
922922
options::OPT_fno_fast_math, false);
923-
bool CorrectSqrt = DriverArgs.hasFlag(
924-
options::OPT_fhip_fp32_correctly_rounded_divide_sqrt,
925-
options::OPT_fno_hip_fp32_correctly_rounded_divide_sqrt);
923+
bool CorrectSqrt = false;
924+
if (DeviceOffloadingKind == Action::OFK_SYCL) {
925+
// When using SYCL, sqrt is only correctly rounded if the flag is specified
926+
CorrectSqrt = DriverArgs.hasArg(options::OPT_fsycl_fp32_prec_sqrt);
927+
} else
928+
CorrectSqrt = DriverArgs.hasFlag(
929+
options::OPT_fhip_fp32_correctly_rounded_divide_sqrt,
930+
options::OPT_fno_hip_fp32_correctly_rounded_divide_sqrt);
931+
926932
bool Wave64 = isWave64(DriverArgs, Kind);
927933

928934
return RocmInstallation.getCommonBitcodeLibs(

clang/lib/Driver/ToolChains/AMDGPU.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -142,7 +142,8 @@ class LLVM_LIBRARY_VISIBILITY ROCMToolChain : public AMDGPUToolChain {
142142
// Returns a list of device library names shared by different languages
143143
llvm::SmallVector<std::string, 12>
144144
getCommonDeviceLibNames(const llvm::opt::ArgList &DriverArgs,
145-
const std::string &GPUArch) const;
145+
const std::string &GPUArch,
146+
const Action::OffloadKind DeviceOffloadingKind) const;
146147
};
147148

148149
} // end namespace toolchains

clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,8 @@ const char *AMDGCN::OpenMPLinker::constructLLVMLinkCommand(
124124
// - write an opt pass that sets that on every function it sees and pipe
125125
// the device-libs bitcode through that on the way to this llvm-link
126126
SmallVector<std::string, 12> BCLibs =
127-
AMDGPUOpenMPTC.getCommonDeviceLibNames(Args, SubArchName.str());
127+
AMDGPUOpenMPTC.getCommonDeviceLibNames(Args, SubArchName.str(),
128+
Action::OFK_OpenMP);
128129
llvm::for_each(BCLibs, [&](StringRef BCFile) {
129130
CmdArgs.push_back(Args.MakeArgString(BCFile));
130131
});

clang/lib/Driver/ToolChains/Cuda.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -710,6 +710,10 @@ void CudaToolChain::addClangTargetOptions(
710710
if (DeviceOffloadingKind == Action::OFK_SYCL) {
711711
toolchains::SYCLToolChain::AddSYCLIncludeArgs(getDriver(), DriverArgs,
712712
CC1Args);
713+
714+
if (DriverArgs.hasArg(options::OPT_fsycl_fp32_prec_sqrt)) {
715+
CC1Args.push_back("-fcuda-prec-sqrt");
716+
}
713717
}
714718

715719
auto NoLibSpirv = DriverArgs.hasArg(options::OPT_fno_sycl_libspirv,

clang/lib/Driver/ToolChains/HIPAMD.cpp

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -260,11 +260,12 @@ void HIPAMDToolChain::addClangTargetOptions(
260260
CC1Args.push_back(DriverArgs.MakeArgString(LibSpirvFile));
261261
}
262262

263-
llvm::for_each(getHIPDeviceLibs(DriverArgs), [&](auto BCFile) {
264-
CC1Args.push_back(BCFile.ShouldInternalize ? "-mlink-builtin-bitcode"
265-
: "-mlink-bitcode-file");
266-
CC1Args.push_back(DriverArgs.MakeArgString(BCFile.Path));
267-
});
263+
llvm::for_each(
264+
getHIPDeviceLibs(DriverArgs, DeviceOffloadingKind), [&](auto BCFile) {
265+
CC1Args.push_back(BCFile.ShouldInternalize ? "-mlink-builtin-bitcode"
266+
: "-mlink-bitcode-file");
267+
CC1Args.push_back(DriverArgs.MakeArgString(BCFile.Path));
268+
});
268269
}
269270

270271
llvm::opt::DerivedArgList *
@@ -359,7 +360,9 @@ VersionTuple HIPAMDToolChain::computeMSVCVersion(const Driver *D,
359360
}
360361

361362
llvm::SmallVector<ToolChain::BitCodeLibraryInfo, 12>
362-
HIPAMDToolChain::getHIPDeviceLibs(const llvm::opt::ArgList &DriverArgs) const {
363+
HIPAMDToolChain::getHIPDeviceLibs(
364+
const llvm::opt::ArgList &DriverArgs,
365+
const Action::OffloadKind DeviceOffloadingKind) const {
363366
llvm::SmallVector<BitCodeLibraryInfo, 12> BCLibs;
364367
if (DriverArgs.hasArg(options::OPT_nogpulib))
365368
return {};
@@ -416,7 +419,8 @@ HIPAMDToolChain::getHIPDeviceLibs(const llvm::opt::ArgList &DriverArgs) const {
416419
BCLibs.push_back(RocmInstallation.getHIPPath());
417420

418421
// Add common device libraries like ocml etc.
419-
for (auto N : getCommonDeviceLibNames(DriverArgs, GpuArch.str()))
422+
for (auto N : getCommonDeviceLibNames(DriverArgs, GpuArch.str(),
423+
DeviceOffloadingKind))
420424
BCLibs.push_back(StringRef(N));
421425

422426
// Add instrument lib.

clang/lib/Driver/ToolChains/HIPAMD.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -86,8 +86,9 @@ class LLVM_LIBRARY_VISIBILITY HIPAMDToolChain final : public ROCMToolChain {
8686
llvm::opt::ArgStringList &CC1Args) const override;
8787
void AddHIPIncludeArgs(const llvm::opt::ArgList &DriverArgs,
8888
llvm::opt::ArgStringList &CC1Args) const override;
89-
llvm::SmallVector<BitCodeLibraryInfo, 12>
90-
getHIPDeviceLibs(const llvm::opt::ArgList &Args) const override;
89+
llvm::SmallVector<BitCodeLibraryInfo, 12> getHIPDeviceLibs(
90+
const llvm::opt::ArgList &Args,
91+
const Action::OffloadKind DeviceOffloadingKind) const override;
9192

9293
SanitizerMask getSupportedSanitizers() const override;
9394

0 commit comments

Comments
 (0)