intel · bader · May 30, 2023 · Apr 19, 2023 · Apr 22, 2023 · Apr 22, 2023
@@ -0,0 +1,65 @@
+//===--- llvm/IR/FPAccuracy.def - Mappings for fp accuracy  -----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Defines properties of floating point builtin intrinsics.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef FP_ACCURACY
+#define FP_ACCURACY(IID,SF,SD,CF,CD)
+#endif
+
+// Each entry below maps an fpbuiltin intrinsic ID to the required accuracy
+// for that operation for single- and double-precision for SYCL and CUDA
+//
+// All accuracies are returned as single-precision floating-point values.
+//
+// Note: for single-precision fdiv and sqrt, the value returned here assumes
+// that options to require correctly rounded results
+// (-cl-fp32-correctly-rounded-divide-sqrt for SYCL, -prec-div=true or
+// -prec-sqrt=true for CUDA) are not used. If such option are used, these
+// operations require special handling elsewhere.
+//
+// FP_ACCURACY(<IID>, <SYCL_FLOAT>, <SYCL_DOUBLE>, <CUDA_FLOAT>, <CUDA_DOUBLE>)
+//
+FP_ACCURACY(fpbuiltin_fadd,   "0.0f",  "0.0f",  "0.0f",  "0.0f")
+FP_ACCURACY(fpbuiltin_fsub,   "0.0f",  "0.0f",  "0.0f",  "0.0f")
+FP_ACCURACY(fpbuiltin_fmul,   "0.0f",  "0.0f",  "0.0f",  "0.0f")
+FP_ACCURACY(fpbuiltin_fdiv,   "2.5f",  "0.0f",  "2.0f",  "0.0f")
+FP_ACCURACY(fpbuiltin_frem,   "0.0f",  "0.0f",  "0.0f",  "0.0f")
+FP_ACCURACY(fpbuiltin_sin,    "4.0f",  "4.0f",  "2.0f",  "2.0f")
+FP_ACCURACY(fpbuiltin_cos,    "4.0f",  "4.0f",  "2.0f",  "2.0f")
+FP_ACCURACY(fpbuiltin_tan,    "5.0f",  "5.0f",  "4.0f",  "2.0f")
+FP_ACCURACY(fpbuiltin_sinh,   "4.0f",  "4.0f",  "3.0f",  "2.0f")
+FP_ACCURACY(fpbuiltin_cosh,   "4.0f",  "4.0f",  "3.0f",  "2.0f")
+FP_ACCURACY(fpbuiltin_tanh,   "5.0f",  "5.0f",  "2.0f",  "2.0f")
+FP_ACCURACY(fpbuiltin_asin,   "4.0f",  "4.0f",  "4.0f",  "2.0f")
+FP_ACCURACY(fpbuiltin_acos,   "4.0f",  "4.0f",  "3.0f",  "2.0f")
+FP_ACCURACY(fpbuiltin_atan,   "5.0f",  "5.0f",  "2.0f",  "2.0f")
+FP_ACCURACY(fpbuiltin_atan2,  "6.0f",  "6.0f",  "3.0f",  "2.0f")
+FP_ACCURACY(fpbuiltin_asinh,  "4.0f",  "4.0f",  "3.0f",  "2.0f")
+FP_ACCURACY(fpbuiltin_acosh,  "4.0f",  "4.0f",  "4.0f",  "2.0f")
+FP_ACCURACY(fpbuiltin_atanh,  "5.0f",  "5.0f",  "3.0f",  "2.0f")
+FP_ACCURACY(fpbuiltin_exp,    "3.0f",  "3.0f",  "2.0f",  "1.0f")
+FP_ACCURACY(fpbuiltin_exp2,   "3.0f",  "3.0f",  "2.0f",  "1.0f")
+FP_ACCURACY(fpbuiltin_exp10,  "3.0f",  "3.0f",  "2.0f",  "1.0f")
+FP_ACCURACY(fpbuiltin_expm1,  "3.0f",  "3.0f",  "1.0f",  "1.0f")
+FP_ACCURACY(fpbuiltin_log,    "3.0f",  "3.0f",  "1.0f",  "1.0f")
+FP_ACCURACY(fpbuiltin_log2,   "3.0f",  "3.0f",  "1.0f",  "1.0f")
+FP_ACCURACY(fpbuiltin_log10,  "3.0f",  "3.0f",  "2.0f",  "1.0f")
+FP_ACCURACY(fpbuiltin_log1p,  "2.0f",  "2.0f",  "1.0f",  "1.0f")
+FP_ACCURACY(fpbuiltin_hypot,  "4.0f",  "4.0f",  "3.0f",  "2.0f")
+FP_ACCURACY(fpbuiltin_pow,   "16.0f", "16.0f",  "8.0f",  "2.0f")
+FP_ACCURACY(fpbuiltin_ldexp,  "0.0f",  "0.0f",  "0.0f",  "0.0f")
+FP_ACCURACY(fpbuiltin_sqrt,   "2.5f",  "0.0f",  "2.0f",  "0.0f")
+FP_ACCURACY(fpbuiltin_rsqrt,  "2.0f",  "2.0f",  "2.0f",  "1.0f")
+FP_ACCURACY(fpbuiltin_erf,   "16.0f", "16.0f",  "2.0f",  "2.0f")
+FP_ACCURACY(fpbuiltin_erfc,  "16.0f", "16.0f",  "4.0f",  "5.0f")
+FP_ACCURACY(fpbuiltin_sincos, "4.0f",  "4.0f",  "2.0f",  "2.0f")
+
+#undef FP_ACCURACY
@@ -0,0 +1,53 @@
+//===- llvm/IR/FPAccuracy.h -------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// Interfaces related to floating-point accuracy control.
+///
+//===----------------------------------------------------------------------===/
+
+#ifndef LLVM_IR_FPACCURACY_H
+#define LLVM_IR_FPACCURACY_H
+
+namespace llvm {
+
+class StringRef;
+class Type;
+
+namespace Intrinsic {
+typedef unsigned ID;
+}
+
+namespace fp {
+
+/// FP accuracy
+///
+/// Enumerates supported accuracy modes for fpbuiltin intrinisics. These
+/// modes are used to lookup required accuracy in terms of ULP for known
+/// math operations that are represented by the fpbuiltin intrinsics.
+///
+/// These values can also be used to set the default accuracy for an IRBuilder
+/// object and the IRBuilder will automatically attach the corresponding
+/// "fpbuiltin-max-error" attribute to any fpbuiltin intrinsics that are
+/// created using the IRBuilder object.
+///
+enum class FPAccuracy { High, Medium, Low, SYCL, CUDA };
+
+/// Returns the required accuracy, in terms of ULP, for an fpbuiltin intrinsic
+/// given the intrinsic ID, the base type for the operation, and the required
+/// accuracy level (as an enumerated identifier).
+///
+/// If the supplied intrinsic ID and type do not identify an operation for
+/// which required accuracy is available, this function will not return a value.
+StringRef getAccuracyForFPBuiltin(Intrinsic::ID, const Type *, FPAccuracy);
+
+} // namespace fp
+
+} // namespace llvm
+
+#endif // LLVM_IR_FPACCURACY_H
@@ -21,6 +21,7 @@ add_llvm_component_library(LLVMCore
   DiagnosticPrinter.cpp
   Dominators.cpp
   EHPersonalities.cpp
+  FPAccuracy.cpp
   FPEnv.cpp
   Function.cpp
   GCStrategy.cpp

@@ -0,0 +1,131 @@
+//===-- FPAccuracy.cpp ---- FP Accuracy Support ---------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// This file contains the implementations of functions that map standard
+/// accuracy levels to required accuracy expressed in terms of ULPs.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/IR/FPAccuracy.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
+
+namespace llvm {
+
+static bool isFPBuiltinIntrinsic(Intrinsic::ID IID) {
+  switch (IID) {
+#define OPERATION(NAME, INTRINSIC) case Intrinsic::INTRINSIC:
+#include "llvm/IR/FPBuiltinOps.def"
+    return true;
+  default:
+    return false;
+  }
+}
+
+static StringRef lookupSyclFloatAccuracy(Intrinsic::ID IID) {
+  switch (IID) {
+#define FP_ACCURACY(INTRINSIC, SYCL_FLOAT_ACCURACY, SDA, CFA, CDA)             \
+  case Intrinsic::INTRINSIC:                                                   \
+    return SYCL_FLOAT_ACCURACY;
+#include "llvm/IR/FPAccuracy.def"
+  default:
+    return StringRef();
+  }
+}
+
+static StringRef lookupSyclDoubleAccuracy(Intrinsic::ID IID) {
+  switch (IID) {
+#define FP_ACCURACY(INTRINSIC, SFA, SYCL_DOUBLE_ACCURACY, CFA, CDA)            \
+  case Intrinsic::INTRINSIC:                                                   \
+    return SYCL_DOUBLE_ACCURACY;
+#include "llvm/IR/FPAccuracy.def"
+  default:
+    return StringRef();
+  }
+}
+
+static StringRef lookupCudaFloatAccuracy(Intrinsic::ID IID) {
+  switch (IID) {
+#define FP_ACCURACY(INTRINSIC, SFA, SDA, CUDA_FLOAT_ACCURACY, CDA)             \
+  case Intrinsic::INTRINSIC:                                                   \
+    return CUDA_FLOAT_ACCURACY;
+#include "llvm/IR/FPAccuracy.def"
+  default:
+    return StringRef();
+  }
+}
+
+static StringRef lookupCudaDoubleAccuracy(Intrinsic::ID IID) {
+  switch (IID) {
+#define FP_ACCURACY(INTRINSIC, SFA, SDA, CFA, CUDA_DOUBLE_ACCURACY)            \
+  case Intrinsic::INTRINSIC:                                                   \
+    return CUDA_DOUBLE_ACCURACY;
+#include "llvm/IR/FPAccuracy.def"
+  default:
+    return StringRef();
+  }
+}
+
+StringRef fp::getAccuracyForFPBuiltin(Intrinsic::ID IID, const Type *Ty,
+                                      fp::FPAccuracy AccuracyLevel) {
+  assert(isFPBuiltinIntrinsic(IID) && "Invalid intrinsic ID for FPAccuracy");
+
+  assert(Ty->isFPOrFPVectorTy() && "Invalid type for FPAccuracy");
+
+  // Vector fpbuiltins have the same accuracy requirements as the corresponding
+  // scalar operation.
+  if (const auto *VecTy = dyn_cast<VectorType>(Ty))
+    Ty = VecTy->getElementType();
+
+  // This will probably change at some point.
+  assert((Ty->isFloatTy() || Ty->isDoubleTy()) &&
+         "Invalid type for FPAccuracy");
+
+  // High and medium accuracy have the same requirement for all functions
+  if (AccuracyLevel == fp::FPAccuracy::High)
+    return "1.0f";
+  if (AccuracyLevel == fp::FPAccuracy::Medium)
+    return "4.0f";
+
+  // Low accuracy is computed in terms of accurate bits, so it depends on the
+  // type
+  if (AccuracyLevel == fp::FPAccuracy::Low) {
+    if (Ty->isFloatTy())
+      return "8192.0f";
+    if (Ty->isDoubleTy())
+      return "67108864.0f"; // 2^(53-26-1) == 26-bits of accuracy
+
+    // Other types are not supported
+    llvm_unreachable("Unexpected type for FPAccuracy");
+  }
+
+  assert((AccuracyLevel == fp::FPAccuracy::SYCL ||
+          AccuracyLevel == fp::FPAccuracy::CUDA) &&
+         "Unexpected FPAccuracy level");
+
+  if (Ty->isFloatTy()) {
+    if (AccuracyLevel == fp::FPAccuracy::SYCL)
+      return lookupSyclFloatAccuracy(IID);
+    if (AccuracyLevel == fp::FPAccuracy::CUDA)
+      return lookupCudaFloatAccuracy(IID);
+    llvm_unreachable("Unexpected FPAccuracy level");
+  } else if (Ty->isDoubleTy()) {
+    if (AccuracyLevel == fp::FPAccuracy::SYCL)
+      return lookupSyclDoubleAccuracy(IID);
+    if (AccuracyLevel == fp::FPAccuracy::CUDA)
+      return lookupCudaDoubleAccuracy(IID);
+    llvm_unreachable("Unexpected FPAccuracy level");
+  } else {
+    // This is here for error detection if the logic above is changed.
+    llvm_unreachable("Unexpected type for FPAccuracy");
+  }
+}
+
+} // namespace llvm
@@ -23,6 +23,7 @@ add_llvm_unittest(IRTests
   DemandedBitsTest.cpp
   DominatorTreeTest.cpp
   DominatorTreeBatchUpdatesTest.cpp
+  FPAccuracyTest.cpp
   FunctionTest.cpp
   PassBuilderCallbacksTest.cpp
   IRBuilderTest.cpp