intel · pvchupin · Nov 28, 2022 · Aug 3, 2022 · Aug 3, 2022 · Aug 24, 2022
@@ -181,7 +181,7 @@ void NVPTXTargetInfo::getTargetDefines(const LangOptions &Opts,
                                        MacroBuilder &Builder) const {
   Builder.defineMacro("__PTX__");
   Builder.defineMacro("__NVPTX__");
-  if (Opts.CUDAIsDevice || Opts.OpenMPIsDevice) {
+  if (Opts.CUDAIsDevice || Opts.OpenMPIsDevice || Opts.SYCLIsDevice) {
     // Set __CUDA_ARCH__ for the GPU specified.
     std::string CUDAArchCode = [this] {
       switch (GPU) {

@@ -5081,6 +5081,62 @@ class OffloadingActionBuilder final {
       }
     }
 
+    // Return whether to use native bfloat16 library.
+    bool useNativeBfloat(const ToolChain *TC, bool &isAOT) {
+      isAOT = false;
+      if (!TC->getTriple().isSPIR())
+        return false;
+
+      const OptTable &Opts = C.getDriver().getOpts();
+      const char *TargetOpt = nullptr;
+      const char *DeviceOpt = nullptr;
+      for (auto *A : Args) {
+        llvm::Triple *TargetBE = nullptr;
+
+        auto GetTripleIt = [&, this](llvm::StringRef Triple) {
+          llvm::Triple TargetTriple{Triple};
+          auto TripleIt = llvm::find_if(SYCLTripleList, [&](auto &SYCLTriple) {
+            return SYCLTriple == TargetTriple;
+          });
+          return TripleIt != SYCLTripleList.end() ? &*TripleIt : nullptr;
+        };
+
+        if (A->getOption().matches(options::OPT_fsycl_targets_EQ)) {
+          // Passing arg: -fsycl-targets=<targets>.
+          isAOT = true;
+          TargetBE = GetTripleIt(A->getValue(0));
+          if (TargetBE)
+            TargetOpt = A->getValue(0);
+          else
+            continue;
+        } else if (A->getOption().matches(options::OPT_Xsycl_backend_EQ)) {
+          // Passing device args: -Xsycl-target-backend=<triple> -opt=val.
+          TargetBE = GetTripleIt(A->getValue(0));
+          if (TargetBE)
+            DeviceOpt = A->getValue(1);
+          else
+            continue;
+        } else if (A->getOption().matches(options::OPT_Xsycl_backend)) {
+          // Passing device args: -Xsycl-target-backend -opt=val.
+          TargetBE = &SYCLTripleList.front();
+          DeviceOpt = A->getValue(0);
+        } else {
+          continue;
+        };
+      }
+      if (TC->getTriple().getSubArch() != llvm::Triple::SPIRSubArch_gen)
+        return false;
+
+      if (TargetOpt && DeviceOpt) {
+        // Currently we support only single AOT target for bfloat16.
+        if (!(strstr(TargetOpt, "*") || strstr(TargetOpt, ",")))
+          return strstr(DeviceOpt, "pvc") || strstr(DeviceOpt, "ats");
+        else
+          return false;
+      }
+      return false;
+    }
+
     bool addSYCLDeviceLibs(const ToolChain *TC, ActionList &DeviceLinkObjects,
                            bool isSpirvAOT, bool isMSVCEnv) {
       struct DeviceLibOptInfo {
@@ -5094,7 +5150,8 @@ class OffloadingActionBuilder final {
       // of "internal" libraries cannot be affected via -fno-sycl-device-lib.
       llvm::StringMap<bool> devicelib_link_info = {
           {"libc", true},        {"libm-fp32", true},   {"libm-fp64", true},
-          {"libimf-fp32", true}, {"libimf-fp64", true}, {"internal", true}};
+          {"libimf-fp32", true}, {"libimf-fp64", true}, {"libm-bfloat16", true},
+          {"internal", true}};
       if (Arg *A = Args.getLastArg(options::OPT_fsycl_device_lib_EQ,
                                    options::OPT_fno_sycl_device_lib_EQ)) {
         if (A->getValues().size() == 0)
@@ -5153,6 +5210,10 @@ class OffloadingActionBuilder final {
           {"libsycl-fallback-cmath-fp64", "libm-fp64"},
           {"libsycl-fallback-imf", "libimf-fp32"},
           {"libsycl-fallback-imf-fp64", "libimf-fp64"}};
+      const SYCLDeviceLibsList sycl_device_bfloat16_fallback_lib = {
+          {"libsycl-fallback-bfloat16", "libm-bfloat16"}};
+      const SYCLDeviceLibsList sycl_device_bfloat16_native_lib = {
+          {"libsycl-native-bfloat16", "libm-bfloat16"}};
       // ITT annotation libraries are linked in separately whenever the device
       // code instrumentation is enabled.
       const SYCLDeviceLibsList sycl_device_annotation_libs = {
@@ -5202,6 +5263,18 @@ class OffloadingActionBuilder final {
       addInputs(sycl_device_wrapper_libs);
       if (isSpirvAOT || TC->getTriple().isNVPTX())
         addInputs(sycl_device_fallback_libs);
+
+      bool isAOT;
+      bool useNativeBfloatLib = useNativeBfloat(TC, isAOT);
+      if (isAOT &&
+          TC->getTriple().getSubArch() != llvm::Triple::SPIRSubArch_fpga) {
+        // Add native or fallback bfloat16 library.
+        if (useNativeBfloatLib)
+          addInputs(sycl_device_bfloat16_native_lib);
+        else
+          addInputs(sycl_device_bfloat16_fallback_lib);
+      }
+
       if (Args.hasFlag(options::OPT_fsycl_instrument_device_code,
                        options::OPT_fno_sycl_instrument_device_code, true))
         addInputs(sycl_device_annotation_libs);

@@ -0,0 +1,26 @@
+//==--- bfloat16_wrapper.cpp - wrappers for bfloat16 library functions ----==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===---------------------------------------------------------------------===//
+
+#include "device.h"
+
+#ifdef __SPIR__
+
+#include <CL/__spirv/spirv_ops.hpp>
+#include <cstdint>
+
+DEVICE_EXTERN_C_INLINE
+uint16_t __devicelib_ConvertFToBF16INTEL(const float &x) {
+  return __spirv_ConvertFToBF16INTEL(x);
+}
+
+DEVICE_EXTERN_C_INLINE
+float __devicelib_ConvertBF16ToFINTEL(const uint16_t &x) {
+  return __spirv_ConvertBF16ToFINTEL(x);
+}
+
+#endif // __SPIR__
@@ -101,6 +101,7 @@ set(complex_obj_deps device_complex.h device.h sycl-compiler)
 set(cmath_obj_deps device_math.h device.h sycl-compiler)
 set(imf_obj_deps device_imf.hpp imf_half.hpp device.h sycl-compiler)
 set(itt_obj_deps device_itt.h spirv_vars.h device.h sycl-compiler)
+set(bfloat16_obj_deps sycl-compiler)
 
 add_devicelib_obj(libsycl-itt-stubs SRC itt_stubs.cpp DEP ${itt_obj_deps})
 add_devicelib_obj(libsycl-itt-compiler-wrappers SRC itt_compiler_wrappers.cpp DEP ${itt_obj_deps})
@@ -113,6 +114,7 @@ add_devicelib_obj(libsycl-cmath SRC cmath_wrapper.cpp DEP ${cmath_obj_deps})
 add_devicelib_obj(libsycl-cmath-fp64 SRC cmath_wrapper_fp64.cpp DEP ${cmath_obj_deps} )
 add_devicelib_obj(libsycl-imf SRC imf_wrapper.cpp DEP ${imf_obj_deps})
 add_devicelib_obj(libsycl-imf-fp64 SRC imf_wrapper_fp64.cpp DEP ${imf_obj_deps})
+add_devicelib_obj(libsycl-bfloat16 SRC bfloat16_wrapper.cpp DEP ${cmath_obj_deps} )
 if(WIN32)
 add_devicelib_obj(libsycl-msvc-math SRC msvc_math.cpp DEP ${cmath_obj_deps})
 endif()
@@ -123,6 +125,8 @@ add_fallback_devicelib(libsycl-fallback-complex SRC fallback-complex.cpp DEP ${c
 add_fallback_devicelib(libsycl-fallback-complex-fp64 SRC fallback-complex-fp64.cpp DEP ${complex_obj_deps} )
 add_fallback_devicelib(libsycl-fallback-cmath SRC fallback-cmath.cpp DEP ${cmath_obj_deps})
 add_fallback_devicelib(libsycl-fallback-cmath-fp64 SRC fallback-cmath-fp64.cpp DEP ${cmath_obj_deps})
+add_fallback_devicelib(libsycl-fallback-bfloat16 SRC fallback-bfloat16.cpp DEP ${bfloat16_obj_deps})
+add_fallback_devicelib(libsycl-native-bfloat16 SRC bfloat16_wrapper.cpp DEP ${bfloat16_obj_deps})
 
 file(MAKE_DIRECTORY ${obj_binary_dir}/libdevice)
 set(imf_fallback_src_dir ${obj_binary_dir}/libdevice)

@@ -0,0 +1,46 @@
+//==------- fallback-bfloat16.cpp - bfloat16 conversions in software -------==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===---------------------------------------------------------------------===//
+
+#include "device.h"
+
+#ifdef __SPIR__
+
+#include <cstdint>
+
+// To support fallback device libraries on-demand loading, please update the
+// DeviceLibFuncMap in llvm/tools/sycl-post-link/sycl-post-link.cpp if you add
+// or remove any item in this file.
+// TODO: generate the DeviceLibFuncMap in sycl-post-link.cpp automatically
+// during the build based on libdevice to avoid manually sync.
+
+DEVICE_EXTERN_C_INLINE uint16_t
+__devicelib_ConvertFToBF16INTEL(const float &a) {
+  // In case float value is nan - propagate bfloat16's qnan
+  if (__spirv_IsNan(a))
+    return 0xffc1;
+  union {
+    uint32_t intStorage;
+    float floatValue;
+  };
+  floatValue = a;
+  // Do RNE and truncate
+  uint32_t roundingBias = ((intStorage >> 16) & 0x1) + 0x00007FFF;
+  return static_cast<uint16_t>((intStorage + roundingBias) >> 16);
+}
+
+DEVICE_EXTERN_C_INLINE float
+__devicelib_ConvertBF16ToFINTEL(const uint16_t &a) {
+  union {
+    uint32_t intStorage;
+    float floatValue;
+  };
+  intStorage = a << 16;
+  return floatValue;
+}
+
+#endif // __SPIR__
@@ -415,6 +415,10 @@ SYCLDeviceLibFuncMap SDLMap = {
      DeviceLibExt::cl_intel_devicelib_imf_fp64},
     {"__devicelib_imf_longlong_as_double",
      DeviceLibExt::cl_intel_devicelib_imf_fp64},
+    {"__devicelib_ConvertFToBF16INTEL",
+     DeviceLibExt::cl_intel_devicelib_bfloat16},
+    {"__devicelib_ConvertBF16ToFINTEL",
+     DeviceLibExt::cl_intel_devicelib_bfloat16},
 };
 
 // Each fallback device library corresponds to one bit in "require mask" which
@@ -429,6 +433,7 @@ SYCLDeviceLibFuncMap SDLMap = {
 // fallback-cstring:      0x20
 // fallback-imf:          0x40
 // fallback-imf-fp64:     0x80
+// fallback-bfloat16:     0x100
 uint32_t getDeviceLibBits(const std::string &FuncName) {
   auto DeviceLibFuncIter = SDLMap.find(FuncName);
   return ((DeviceLibFuncIter == SDLMap.end())

@@ -34,6 +34,7 @@ enum class DeviceLibExt : std::uint32_t {
   cl_intel_devicelib_cstring,
   cl_intel_devicelib_imf,
   cl_intel_devicelib_imf_fp64,
+  cl_intel_devicelib_bfloat16,
 };
 
 uint32_t getSYCLDeviceLibReqMask(const Module &M);