-
Notifications
You must be signed in to change notification settings - Fork 790
Add support for fpbuiltin accuracy lookup #9167
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from all commits
Commits
Show all changes
12 commits
Select commit
Hold shift + click to select a range
176243c
Add support for fpbuiltin accuracy lookup
13324ee
Minor cleanup
26d4036
More minor cleanup
e3a74ef
Convert fp-accuracy lookup to return a StringRef
780f298
Fixed function names to conform to coding standard
30f10e3
Address review feedback
c67f43b
More review feedback updates
c95a276
Removed unneeded include from unit test
687123d
Merge branch 'intel:sycl' into fp-accuracy-table
92d0a4e
clang-format fixes
0b78338
more clang-format fixes
7f9763c
One more clang-format fix
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,65 @@ | ||
//===--- llvm/IR/FPAccuracy.def - Mappings for fp accuracy -----*- C++ -*-===// | ||
// | ||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
// See https://llvm.org/LICENSE.txt for license information. | ||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
// | ||
//===----------------------------------------------------------------------===// | ||
// | ||
// Defines properties of floating point builtin intrinsics. | ||
// | ||
//===----------------------------------------------------------------------===// | ||
|
||
#ifndef FP_ACCURACY | ||
#define FP_ACCURACY(IID,SF,SD,CF,CD) | ||
#endif | ||
|
||
// Each entry below maps an fpbuiltin intrinsic ID to the required accuracy | ||
// for that operation for single- and double-precision for SYCL and CUDA | ||
// | ||
// All accuracies are returned as single-precision floating-point values. | ||
// | ||
// Note: for single-precision fdiv and sqrt, the value returned here assumes | ||
// that options to require correctly rounded results | ||
// (-cl-fp32-correctly-rounded-divide-sqrt for SYCL, -prec-div=true or | ||
// -prec-sqrt=true for CUDA) are not used. If such option are used, these | ||
// operations require special handling elsewhere. | ||
// | ||
// FP_ACCURACY(<IID>, <SYCL_FLOAT>, <SYCL_DOUBLE>, <CUDA_FLOAT>, <CUDA_DOUBLE>) | ||
// | ||
FP_ACCURACY(fpbuiltin_fadd, "0.0f", "0.0f", "0.0f", "0.0f") | ||
FP_ACCURACY(fpbuiltin_fsub, "0.0f", "0.0f", "0.0f", "0.0f") | ||
FP_ACCURACY(fpbuiltin_fmul, "0.0f", "0.0f", "0.0f", "0.0f") | ||
FP_ACCURACY(fpbuiltin_fdiv, "2.5f", "0.0f", "2.0f", "0.0f") | ||
FP_ACCURACY(fpbuiltin_frem, "0.0f", "0.0f", "0.0f", "0.0f") | ||
FP_ACCURACY(fpbuiltin_sin, "4.0f", "4.0f", "2.0f", "2.0f") | ||
FP_ACCURACY(fpbuiltin_cos, "4.0f", "4.0f", "2.0f", "2.0f") | ||
FP_ACCURACY(fpbuiltin_tan, "5.0f", "5.0f", "4.0f", "2.0f") | ||
FP_ACCURACY(fpbuiltin_sinh, "4.0f", "4.0f", "3.0f", "2.0f") | ||
FP_ACCURACY(fpbuiltin_cosh, "4.0f", "4.0f", "3.0f", "2.0f") | ||
FP_ACCURACY(fpbuiltin_tanh, "5.0f", "5.0f", "2.0f", "2.0f") | ||
FP_ACCURACY(fpbuiltin_asin, "4.0f", "4.0f", "4.0f", "2.0f") | ||
FP_ACCURACY(fpbuiltin_acos, "4.0f", "4.0f", "3.0f", "2.0f") | ||
FP_ACCURACY(fpbuiltin_atan, "5.0f", "5.0f", "2.0f", "2.0f") | ||
FP_ACCURACY(fpbuiltin_atan2, "6.0f", "6.0f", "3.0f", "2.0f") | ||
FP_ACCURACY(fpbuiltin_asinh, "4.0f", "4.0f", "3.0f", "2.0f") | ||
FP_ACCURACY(fpbuiltin_acosh, "4.0f", "4.0f", "4.0f", "2.0f") | ||
FP_ACCURACY(fpbuiltin_atanh, "5.0f", "5.0f", "3.0f", "2.0f") | ||
FP_ACCURACY(fpbuiltin_exp, "3.0f", "3.0f", "2.0f", "1.0f") | ||
FP_ACCURACY(fpbuiltin_exp2, "3.0f", "3.0f", "2.0f", "1.0f") | ||
FP_ACCURACY(fpbuiltin_exp10, "3.0f", "3.0f", "2.0f", "1.0f") | ||
FP_ACCURACY(fpbuiltin_expm1, "3.0f", "3.0f", "1.0f", "1.0f") | ||
FP_ACCURACY(fpbuiltin_log, "3.0f", "3.0f", "1.0f", "1.0f") | ||
FP_ACCURACY(fpbuiltin_log2, "3.0f", "3.0f", "1.0f", "1.0f") | ||
FP_ACCURACY(fpbuiltin_log10, "3.0f", "3.0f", "2.0f", "1.0f") | ||
FP_ACCURACY(fpbuiltin_log1p, "2.0f", "2.0f", "1.0f", "1.0f") | ||
FP_ACCURACY(fpbuiltin_hypot, "4.0f", "4.0f", "3.0f", "2.0f") | ||
FP_ACCURACY(fpbuiltin_pow, "16.0f", "16.0f", "8.0f", "2.0f") | ||
FP_ACCURACY(fpbuiltin_ldexp, "0.0f", "0.0f", "0.0f", "0.0f") | ||
FP_ACCURACY(fpbuiltin_sqrt, "2.5f", "0.0f", "2.0f", "0.0f") | ||
FP_ACCURACY(fpbuiltin_rsqrt, "2.0f", "2.0f", "2.0f", "1.0f") | ||
FP_ACCURACY(fpbuiltin_erf, "16.0f", "16.0f", "2.0f", "2.0f") | ||
FP_ACCURACY(fpbuiltin_erfc, "16.0f", "16.0f", "4.0f", "5.0f") | ||
FP_ACCURACY(fpbuiltin_sincos, "4.0f", "4.0f", "2.0f", "2.0f") | ||
|
||
#undef FP_ACCURACY |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
//===- llvm/IR/FPAccuracy.h -------------------------------------*- C++ -*-===// | ||
// | ||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
// See https://llvm.org/LICENSE.txt for license information. | ||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
// | ||
//===----------------------------------------------------------------------===// | ||
/// | ||
/// \file | ||
/// Interfaces related to floating-point accuracy control. | ||
/// | ||
//===----------------------------------------------------------------------===/ | ||
|
||
#ifndef LLVM_IR_FPACCURACY_H | ||
#define LLVM_IR_FPACCURACY_H | ||
|
||
namespace llvm { | ||
|
||
class StringRef; | ||
class Type; | ||
|
||
namespace Intrinsic { | ||
typedef unsigned ID; | ||
} | ||
|
||
namespace fp { | ||
|
||
/// FP accuracy | ||
/// | ||
/// Enumerates supported accuracy modes for fpbuiltin intrinisics. These | ||
/// modes are used to lookup required accuracy in terms of ULP for known | ||
/// math operations that are represented by the fpbuiltin intrinsics. | ||
/// | ||
/// These values can also be used to set the default accuracy for an IRBuilder | ||
/// object and the IRBuilder will automatically attach the corresponding | ||
/// "fpbuiltin-max-error" attribute to any fpbuiltin intrinsics that are | ||
/// created using the IRBuilder object. | ||
/// | ||
enum class FPAccuracy { High, Medium, Low, SYCL, CUDA }; | ||
|
||
/// Returns the required accuracy, in terms of ULP, for an fpbuiltin intrinsic | ||
/// given the intrinsic ID, the base type for the operation, and the required | ||
/// accuracy level (as an enumerated identifier). | ||
/// | ||
/// If the supplied intrinsic ID and type do not identify an operation for | ||
/// which required accuracy is available, this function will not return a value. | ||
StringRef getAccuracyForFPBuiltin(Intrinsic::ID, const Type *, FPAccuracy); | ||
|
||
} // namespace fp | ||
|
||
} // namespace llvm | ||
|
||
#endif // LLVM_IR_FPACCURACY_H |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,131 @@ | ||
//===-- FPAccuracy.cpp ---- FP Accuracy Support ---------------------------===// | ||
// | ||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
// See https://llvm.org/LICENSE.txt for license information. | ||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
// | ||
//===----------------------------------------------------------------------===// | ||
// | ||
/// \file | ||
/// This file contains the implementations of functions that map standard | ||
/// accuracy levels to required accuracy expressed in terms of ULPs. | ||
// | ||
//===----------------------------------------------------------------------===// | ||
|
||
#include "llvm/IR/FPAccuracy.h" | ||
#include "llvm/IR/Instruction.h" | ||
#include "llvm/IR/IntrinsicInst.h" | ||
#include "llvm/IR/Intrinsics.h" | ||
|
||
namespace llvm { | ||
|
||
static bool isFPBuiltinIntrinsic(Intrinsic::ID IID) { | ||
switch (IID) { | ||
#define OPERATION(NAME, INTRINSIC) case Intrinsic::INTRINSIC: | ||
#include "llvm/IR/FPBuiltinOps.def" | ||
return true; | ||
default: | ||
return false; | ||
} | ||
} | ||
|
||
static StringRef lookupSyclFloatAccuracy(Intrinsic::ID IID) { | ||
switch (IID) { | ||
#define FP_ACCURACY(INTRINSIC, SYCL_FLOAT_ACCURACY, SDA, CFA, CDA) \ | ||
case Intrinsic::INTRINSIC: \ | ||
return SYCL_FLOAT_ACCURACY; | ||
#include "llvm/IR/FPAccuracy.def" | ||
default: | ||
return StringRef(); | ||
} | ||
} | ||
|
||
static StringRef lookupSyclDoubleAccuracy(Intrinsic::ID IID) { | ||
switch (IID) { | ||
#define FP_ACCURACY(INTRINSIC, SFA, SYCL_DOUBLE_ACCURACY, CFA, CDA) \ | ||
case Intrinsic::INTRINSIC: \ | ||
return SYCL_DOUBLE_ACCURACY; | ||
#include "llvm/IR/FPAccuracy.def" | ||
default: | ||
return StringRef(); | ||
} | ||
} | ||
|
||
static StringRef lookupCudaFloatAccuracy(Intrinsic::ID IID) { | ||
switch (IID) { | ||
#define FP_ACCURACY(INTRINSIC, SFA, SDA, CUDA_FLOAT_ACCURACY, CDA) \ | ||
case Intrinsic::INTRINSIC: \ | ||
return CUDA_FLOAT_ACCURACY; | ||
#include "llvm/IR/FPAccuracy.def" | ||
default: | ||
return StringRef(); | ||
} | ||
} | ||
|
||
static StringRef lookupCudaDoubleAccuracy(Intrinsic::ID IID) { | ||
switch (IID) { | ||
#define FP_ACCURACY(INTRINSIC, SFA, SDA, CFA, CUDA_DOUBLE_ACCURACY) \ | ||
case Intrinsic::INTRINSIC: \ | ||
return CUDA_DOUBLE_ACCURACY; | ||
#include "llvm/IR/FPAccuracy.def" | ||
default: | ||
return StringRef(); | ||
} | ||
} | ||
|
||
StringRef fp::getAccuracyForFPBuiltin(Intrinsic::ID IID, const Type *Ty, | ||
fp::FPAccuracy AccuracyLevel) { | ||
assert(isFPBuiltinIntrinsic(IID) && "Invalid intrinsic ID for FPAccuracy"); | ||
|
||
assert(Ty->isFPOrFPVectorTy() && "Invalid type for FPAccuracy"); | ||
|
||
// Vector fpbuiltins have the same accuracy requirements as the corresponding | ||
// scalar operation. | ||
if (const auto *VecTy = dyn_cast<VectorType>(Ty)) | ||
Ty = VecTy->getElementType(); | ||
|
||
// This will probably change at some point. | ||
assert((Ty->isFloatTy() || Ty->isDoubleTy()) && | ||
"Invalid type for FPAccuracy"); | ||
|
||
// High and medium accuracy have the same requirement for all functions | ||
if (AccuracyLevel == fp::FPAccuracy::High) | ||
return "1.0f"; | ||
if (AccuracyLevel == fp::FPAccuracy::Medium) | ||
return "4.0f"; | ||
|
||
// Low accuracy is computed in terms of accurate bits, so it depends on the | ||
// type | ||
if (AccuracyLevel == fp::FPAccuracy::Low) { | ||
if (Ty->isFloatTy()) | ||
return "8192.0f"; | ||
if (Ty->isDoubleTy()) | ||
return "67108864.0f"; // 2^(53-26-1) == 26-bits of accuracy | ||
|
||
// Other types are not supported | ||
llvm_unreachable("Unexpected type for FPAccuracy"); | ||
} | ||
|
||
assert((AccuracyLevel == fp::FPAccuracy::SYCL || | ||
AccuracyLevel == fp::FPAccuracy::CUDA) && | ||
"Unexpected FPAccuracy level"); | ||
|
||
if (Ty->isFloatTy()) { | ||
if (AccuracyLevel == fp::FPAccuracy::SYCL) | ||
return lookupSyclFloatAccuracy(IID); | ||
if (AccuracyLevel == fp::FPAccuracy::CUDA) | ||
return lookupCudaFloatAccuracy(IID); | ||
llvm_unreachable("Unexpected FPAccuracy level"); | ||
} else if (Ty->isDoubleTy()) { | ||
if (AccuracyLevel == fp::FPAccuracy::SYCL) | ||
return lookupSyclDoubleAccuracy(IID); | ||
if (AccuracyLevel == fp::FPAccuracy::CUDA) | ||
return lookupCudaDoubleAccuracy(IID); | ||
llvm_unreachable("Unexpected FPAccuracy level"); | ||
} else { | ||
// This is here for error detection if the logic above is changed. | ||
llvm_unreachable("Unexpected type for FPAccuracy"); | ||
} | ||
} | ||
|
||
} // namespace llvm |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is there documentation for how these values are selected? May be a link here can be handy?
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
For high, medium, and low, the accuracy requirements are copied from the icc compiler's -fimf-precision=[high|medium|low] option (https://www.intel.com/content/www/us/en/docs/dpcpp-cpp-compiler/developer-guide-reference/2023-0/fimf-precision-qimf-precision.html). To those who have never used icc, these values are essentially arbitrary, but we've found them to be good choices for users.
I don't intend for this to be permanently bound to the icc meaning, so I don't think this is worth mentioning in the comments.