Skip to content

Add support for fpbuiltin accuracy lookup #9167

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 12 commits into from
May 30, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 65 additions & 0 deletions llvm/include/llvm/IR/FPAccuracy.def
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
//===--- llvm/IR/FPAccuracy.def - Mappings for fp accuracy -----*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// Defines properties of floating point builtin intrinsics.
//
//===----------------------------------------------------------------------===//

#ifndef FP_ACCURACY
#define FP_ACCURACY(IID,SF,SD,CF,CD)
#endif

// Each entry below maps an fpbuiltin intrinsic ID to the required accuracy
// for that operation for single- and double-precision for SYCL and CUDA
//
// All accuracies are returned as single-precision floating-point values.
//
// Note: for single-precision fdiv and sqrt, the value returned here assumes
// that options to require correctly rounded results
// (-cl-fp32-correctly-rounded-divide-sqrt for SYCL, -prec-div=true or
// -prec-sqrt=true for CUDA) are not used. If such option are used, these
// operations require special handling elsewhere.
//
// FP_ACCURACY(<IID>, <SYCL_FLOAT>, <SYCL_DOUBLE>, <CUDA_FLOAT>, <CUDA_DOUBLE>)
//
FP_ACCURACY(fpbuiltin_fadd, "0.0f", "0.0f", "0.0f", "0.0f")
FP_ACCURACY(fpbuiltin_fsub, "0.0f", "0.0f", "0.0f", "0.0f")
FP_ACCURACY(fpbuiltin_fmul, "0.0f", "0.0f", "0.0f", "0.0f")
FP_ACCURACY(fpbuiltin_fdiv, "2.5f", "0.0f", "2.0f", "0.0f")
FP_ACCURACY(fpbuiltin_frem, "0.0f", "0.0f", "0.0f", "0.0f")
FP_ACCURACY(fpbuiltin_sin, "4.0f", "4.0f", "2.0f", "2.0f")
FP_ACCURACY(fpbuiltin_cos, "4.0f", "4.0f", "2.0f", "2.0f")
FP_ACCURACY(fpbuiltin_tan, "5.0f", "5.0f", "4.0f", "2.0f")
FP_ACCURACY(fpbuiltin_sinh, "4.0f", "4.0f", "3.0f", "2.0f")
FP_ACCURACY(fpbuiltin_cosh, "4.0f", "4.0f", "3.0f", "2.0f")
FP_ACCURACY(fpbuiltin_tanh, "5.0f", "5.0f", "2.0f", "2.0f")
FP_ACCURACY(fpbuiltin_asin, "4.0f", "4.0f", "4.0f", "2.0f")
FP_ACCURACY(fpbuiltin_acos, "4.0f", "4.0f", "3.0f", "2.0f")
FP_ACCURACY(fpbuiltin_atan, "5.0f", "5.0f", "2.0f", "2.0f")
FP_ACCURACY(fpbuiltin_atan2, "6.0f", "6.0f", "3.0f", "2.0f")
FP_ACCURACY(fpbuiltin_asinh, "4.0f", "4.0f", "3.0f", "2.0f")
FP_ACCURACY(fpbuiltin_acosh, "4.0f", "4.0f", "4.0f", "2.0f")
FP_ACCURACY(fpbuiltin_atanh, "5.0f", "5.0f", "3.0f", "2.0f")
FP_ACCURACY(fpbuiltin_exp, "3.0f", "3.0f", "2.0f", "1.0f")
FP_ACCURACY(fpbuiltin_exp2, "3.0f", "3.0f", "2.0f", "1.0f")
FP_ACCURACY(fpbuiltin_exp10, "3.0f", "3.0f", "2.0f", "1.0f")
FP_ACCURACY(fpbuiltin_expm1, "3.0f", "3.0f", "1.0f", "1.0f")
FP_ACCURACY(fpbuiltin_log, "3.0f", "3.0f", "1.0f", "1.0f")
FP_ACCURACY(fpbuiltin_log2, "3.0f", "3.0f", "1.0f", "1.0f")
FP_ACCURACY(fpbuiltin_log10, "3.0f", "3.0f", "2.0f", "1.0f")
FP_ACCURACY(fpbuiltin_log1p, "2.0f", "2.0f", "1.0f", "1.0f")
FP_ACCURACY(fpbuiltin_hypot, "4.0f", "4.0f", "3.0f", "2.0f")
FP_ACCURACY(fpbuiltin_pow, "16.0f", "16.0f", "8.0f", "2.0f")
FP_ACCURACY(fpbuiltin_ldexp, "0.0f", "0.0f", "0.0f", "0.0f")
FP_ACCURACY(fpbuiltin_sqrt, "2.5f", "0.0f", "2.0f", "0.0f")
FP_ACCURACY(fpbuiltin_rsqrt, "2.0f", "2.0f", "2.0f", "1.0f")
FP_ACCURACY(fpbuiltin_erf, "16.0f", "16.0f", "2.0f", "2.0f")
FP_ACCURACY(fpbuiltin_erfc, "16.0f", "16.0f", "4.0f", "5.0f")
FP_ACCURACY(fpbuiltin_sincos, "4.0f", "4.0f", "2.0f", "2.0f")

#undef FP_ACCURACY
53 changes: 53 additions & 0 deletions llvm/include/llvm/IR/FPAccuracy.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
//===- llvm/IR/FPAccuracy.h -------------------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
/// \file
/// Interfaces related to floating-point accuracy control.
///
//===----------------------------------------------------------------------===/

#ifndef LLVM_IR_FPACCURACY_H
#define LLVM_IR_FPACCURACY_H

namespace llvm {

class StringRef;
class Type;

namespace Intrinsic {
typedef unsigned ID;
}

namespace fp {

/// FP accuracy
///
/// Enumerates supported accuracy modes for fpbuiltin intrinisics. These
/// modes are used to lookup required accuracy in terms of ULP for known
/// math operations that are represented by the fpbuiltin intrinsics.
///
/// These values can also be used to set the default accuracy for an IRBuilder
/// object and the IRBuilder will automatically attach the corresponding
/// "fpbuiltin-max-error" attribute to any fpbuiltin intrinsics that are
/// created using the IRBuilder object.
///
enum class FPAccuracy { High, Medium, Low, SYCL, CUDA };

/// Returns the required accuracy, in terms of ULP, for an fpbuiltin intrinsic
/// given the intrinsic ID, the base type for the operation, and the required
/// accuracy level (as an enumerated identifier).
///
/// If the supplied intrinsic ID and type do not identify an operation for
/// which required accuracy is available, this function will not return a value.
StringRef getAccuracyForFPBuiltin(Intrinsic::ID, const Type *, FPAccuracy);

} // namespace fp

} // namespace llvm

#endif // LLVM_IR_FPACCURACY_H
1 change: 1 addition & 0 deletions llvm/lib/IR/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ add_llvm_component_library(LLVMCore
DiagnosticPrinter.cpp
Dominators.cpp
EHPersonalities.cpp
FPAccuracy.cpp
FPEnv.cpp
Function.cpp
GCStrategy.cpp
Expand Down
131 changes: 131 additions & 0 deletions llvm/lib/IR/FPAccuracy.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
//===-- FPAccuracy.cpp ---- FP Accuracy Support ---------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
/// \file
/// This file contains the implementations of functions that map standard
/// accuracy levels to required accuracy expressed in terms of ULPs.
//
//===----------------------------------------------------------------------===//

#include "llvm/IR/FPAccuracy.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"

namespace llvm {

static bool isFPBuiltinIntrinsic(Intrinsic::ID IID) {
switch (IID) {
#define OPERATION(NAME, INTRINSIC) case Intrinsic::INTRINSIC:
#include "llvm/IR/FPBuiltinOps.def"
return true;
default:
return false;
}
}

static StringRef lookupSyclFloatAccuracy(Intrinsic::ID IID) {
switch (IID) {
#define FP_ACCURACY(INTRINSIC, SYCL_FLOAT_ACCURACY, SDA, CFA, CDA) \
case Intrinsic::INTRINSIC: \
return SYCL_FLOAT_ACCURACY;
#include "llvm/IR/FPAccuracy.def"
default:
return StringRef();
}
}

static StringRef lookupSyclDoubleAccuracy(Intrinsic::ID IID) {
switch (IID) {
#define FP_ACCURACY(INTRINSIC, SFA, SYCL_DOUBLE_ACCURACY, CFA, CDA) \
case Intrinsic::INTRINSIC: \
return SYCL_DOUBLE_ACCURACY;
#include "llvm/IR/FPAccuracy.def"
default:
return StringRef();
}
}

static StringRef lookupCudaFloatAccuracy(Intrinsic::ID IID) {
switch (IID) {
#define FP_ACCURACY(INTRINSIC, SFA, SDA, CUDA_FLOAT_ACCURACY, CDA) \
case Intrinsic::INTRINSIC: \
return CUDA_FLOAT_ACCURACY;
#include "llvm/IR/FPAccuracy.def"
default:
return StringRef();
}
}

static StringRef lookupCudaDoubleAccuracy(Intrinsic::ID IID) {
switch (IID) {
#define FP_ACCURACY(INTRINSIC, SFA, SDA, CFA, CUDA_DOUBLE_ACCURACY) \
case Intrinsic::INTRINSIC: \
return CUDA_DOUBLE_ACCURACY;
#include "llvm/IR/FPAccuracy.def"
default:
return StringRef();
}
}

StringRef fp::getAccuracyForFPBuiltin(Intrinsic::ID IID, const Type *Ty,
fp::FPAccuracy AccuracyLevel) {
assert(isFPBuiltinIntrinsic(IID) && "Invalid intrinsic ID for FPAccuracy");

assert(Ty->isFPOrFPVectorTy() && "Invalid type for FPAccuracy");

// Vector fpbuiltins have the same accuracy requirements as the corresponding
// scalar operation.
if (const auto *VecTy = dyn_cast<VectorType>(Ty))
Ty = VecTy->getElementType();

// This will probably change at some point.
assert((Ty->isFloatTy() || Ty->isDoubleTy()) &&
"Invalid type for FPAccuracy");

// High and medium accuracy have the same requirement for all functions
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there documentation for how these values are selected? May be a link here can be handy?

Copy link
Contributor Author

@andykaylor andykaylor May 11, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For high, medium, and low, the accuracy requirements are copied from the icc compiler's -fimf-precision=[high|medium|low] option (https://www.intel.com/content/www/us/en/docs/dpcpp-cpp-compiler/developer-guide-reference/2023-0/fimf-precision-qimf-precision.html). To those who have never used icc, these values are essentially arbitrary, but we've found them to be good choices for users.

I don't intend for this to be permanently bound to the icc meaning, so I don't think this is worth mentioning in the comments.

if (AccuracyLevel == fp::FPAccuracy::High)
return "1.0f";
if (AccuracyLevel == fp::FPAccuracy::Medium)
return "4.0f";

// Low accuracy is computed in terms of accurate bits, so it depends on the
// type
if (AccuracyLevel == fp::FPAccuracy::Low) {
if (Ty->isFloatTy())
return "8192.0f";
if (Ty->isDoubleTy())
return "67108864.0f"; // 2^(53-26-1) == 26-bits of accuracy

// Other types are not supported
llvm_unreachable("Unexpected type for FPAccuracy");
}

assert((AccuracyLevel == fp::FPAccuracy::SYCL ||
AccuracyLevel == fp::FPAccuracy::CUDA) &&
"Unexpected FPAccuracy level");

if (Ty->isFloatTy()) {
if (AccuracyLevel == fp::FPAccuracy::SYCL)
return lookupSyclFloatAccuracy(IID);
if (AccuracyLevel == fp::FPAccuracy::CUDA)
return lookupCudaFloatAccuracy(IID);
llvm_unreachable("Unexpected FPAccuracy level");
} else if (Ty->isDoubleTy()) {
if (AccuracyLevel == fp::FPAccuracy::SYCL)
return lookupSyclDoubleAccuracy(IID);
if (AccuracyLevel == fp::FPAccuracy::CUDA)
return lookupCudaDoubleAccuracy(IID);
llvm_unreachable("Unexpected FPAccuracy level");
} else {
// This is here for error detection if the logic above is changed.
llvm_unreachable("Unexpected type for FPAccuracy");
}
}

} // namespace llvm
1 change: 1 addition & 0 deletions llvm/unittests/IR/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ add_llvm_unittest(IRTests
DemandedBitsTest.cpp
DominatorTreeTest.cpp
DominatorTreeBatchUpdatesTest.cpp
FPAccuracyTest.cpp
FunctionTest.cpp
PassBuilderCallbacksTest.cpp
IRBuilderTest.cpp
Expand Down
Loading