Skip to content
This repository was archived by the owner on Mar 28, 2023. It is now read-only.

Tests for conversions between fp32 and bf16 #1364

Closed
wants to merge 8 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
235 changes: 235 additions & 0 deletions SYCL/DeviceLib/imf_bf16_comp_test.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,235 @@
// RUN: %clangxx -fsycl %s -o %t.out
// RUN: %CPU_RUN_PLACEHOLDER %t.out
// RUN: %ACC_RUN_PLACEHOLDER %t.out
// RUN: %GPU_RUN_PLACEHOLDER %t.out

// RUN: %clangxx -fsycl -fno-builtin -fsycl-device-lib-jit-link %s -o %t.out
// RUN: %CPU_RUN_PLACEHOLDER %t.out
// RUN: %ACC_RUN_PLACEHOLDER %t.out
// RUN: %GPU_RUN_PLACEHOLDER %t.out
//
// UNSUPPORTED: cuda || hip

#include "imf_utils.hpp"
#include <sycl/ext/intel/math.hpp>
using sycl_bfloat16 = sycl::ext::oneapi::experimental::bfloat16;
namespace sycl_imf = sycl::ext::intel::math;
int main() {
sycl::queue device_queue(sycl::default_selector_v);
std::cout << "Running on "
<< device_queue.get_device().get_info<sycl::info::device::name>()
<< "\n";

// hisnan test
{
std::initializer_list<uint16_t> input_vals = {0x0, 0x1, 0x7A,
0x7F80, 0x7FC0, 0x7FC5};
std::initializer_list<bool> ref_vals = {false, false, false,
false, true, true};
test(device_queue, input_vals, ref_vals,
FT1(sycl_bfloat16, sycl_imf::hisnan));
}

// hisinf
{
std::initializer_list<uint16_t> input_vals = {
0x0, 0x1, 0x7A, 0x7F80, 0x7FC0, 0x7FC5, 0xFF80, 0xAE44, 0xFF84};
std::initializer_list<bool> ref_vals = {false, false, false, true, false,
false, true, false, false};
test(device_queue, input_vals, ref_vals,
FT1(sycl_bfloat16, sycl_imf::hisinf));
}

// heq
{
std::initializer_list<uint16_t> input_vals1 = {
0x0, 0x1, 0x7A, 0x7F81, 0x7FC0, 0x7FC5, 0xFF80, 0xAE44, 0xFF74};
std::initializer_list<uint16_t> input_vals2 = {
0x1, 0x1, 0x7C, 0x7F80, 0x7FC0, 0x7FC5, 0xFF80, 0xAE44, 0xFF84};
std::initializer_list<bool> ref_vals = {false, true, false, false, false,
false, true, true, false};
test2(device_queue, input_vals1, input_vals2, ref_vals,
FT2(sycl_bfloat16, sycl_imf::heq));
}

// hequ
{
std::initializer_list<uint16_t> input_vals1 = {
0x0, 0x1, 0x7A, 0x7F81, 0x7FC0, 0x7FC5, 0xFF80, 0xAE44, 0xFF74, 0x7FC2};
std::initializer_list<uint16_t> input_vals2 = {
0x1, 0x1, 0x7C, 0x7F80, 0x7FC0, 0x7FC9, 0xFF80, 0xAE44, 0xFF84, 0xAAEC};
std::initializer_list<bool> ref_vals = {false, true, false, true, true,
true, true, true, false, true};
test2(device_queue, input_vals1, input_vals2, ref_vals,
FT2(sycl_bfloat16, sycl_imf::hequ));
}

// hne
{
std::initializer_list<uint16_t> input_vals1 = {
0x0, 0x1, 0x7A, 0x7F81, 0x7FC0, 0x7FC5, 0xFF80, 0xAE44, 0xFF74};
std::initializer_list<uint16_t> input_vals2 = {
0x1, 0x1, 0x7C, 0x7F80, 0x7FC0, 0x7FC5, 0xFF80, 0xAE44, 0xFF84};
std::initializer_list<bool> ref_vals = {true, false, true, false, false,
false, false, false, false};
test2(device_queue, input_vals1, input_vals2, ref_vals,
FT2(sycl_bfloat16, sycl_imf::hne));
}

// hneu
{
std::initializer_list<uint16_t> input_vals1 = {
0x0, 0x1, 0x7A, 0x7F81, 0x7FC0, 0x7FC5, 0xFF80, 0xAE44, 0xFF74, 0x7FC2};
std::initializer_list<uint16_t> input_vals2 = {
0x1, 0x1, 0x7C, 0x7F80, 0x7FC0, 0x7FC9, 0xFF80, 0xAE44, 0xFF84, 0xAAEC};
std::initializer_list<bool> ref_vals = {true, false, true, true, true,
true, false, false, true, true};
test2(device_queue, input_vals1, input_vals2, ref_vals,
FT2(sycl_bfloat16, sycl_imf::hneu));
}

// hge
{
std::initializer_list<uint16_t> input_vals1 = {
0x1, 0x1, 0x7A, 0x7F81, 0x7FC0, 0x7FC5, 0xFF80, 0x5000, 0xAF74, 0x71C2};
std::initializer_list<uint16_t> input_vals2 = {
0x0, 0x1, 0x7C, 0x7F80, 0x7FC0, 0x7FC9, 0xFF80, 0x6000, 0xAF84, 0xAAEC};
std::initializer_list<bool> ref_vals = {true, true, false, false, false,
false, true, false, true, true};
test2(device_queue, input_vals1, input_vals2, ref_vals,
FT2(sycl_bfloat16, sycl_imf::hge));
}

// hgeu
{
std::initializer_list<uint16_t> input_vals1 = {
0x1, 0x1, 0x7A, 0x7F81, 0x7FC0, 0x7FC5, 0xFF80, 0x5000, 0xAF74, 0x71C2};
std::initializer_list<uint16_t> input_vals2 = {
0x0, 0x1, 0x7C, 0x7F80, 0x7FC0, 0x7FC9, 0xFF80, 0x6000, 0xAF84, 0xAAEC};
std::initializer_list<bool> ref_vals = {true, true, false, true, true,
true, true, false, true, true};
test2(device_queue, input_vals1, input_vals2, ref_vals,
FT2(sycl_bfloat16, sycl_imf::hgeu));
}

// hgt
{
std::initializer_list<uint16_t> input_vals1 = {
0x1, 0x1, 0x7A, 0x7F81, 0x7FC0, 0x7FC5, 0xFF80, 0x5000, 0xAF74, 0x71C2};
std::initializer_list<uint16_t> input_vals2 = {
0x0, 0x1, 0x7C, 0x7F80, 0x7FC0, 0x7FC9, 0xFF80, 0x6000, 0xAF84, 0xAAEC};
std::initializer_list<bool> ref_vals = {true, false, false, false, false,
false, false, false, true, true};
test2(device_queue, input_vals1, input_vals2, ref_vals,
FT2(sycl_bfloat16, sycl_imf::hgt));
}

// hgtu
{
std::initializer_list<uint16_t> input_vals1 = {
0x1, 0x1, 0x7A, 0x7F81, 0x7FC0, 0x7FC5, 0xFF80, 0x5000, 0xAF74, 0x71C2};
std::initializer_list<uint16_t> input_vals2 = {
0x0, 0x1, 0x7C, 0x7F80, 0x7FC0, 0x7FC9, 0xFF80, 0x6000, 0xAF84, 0xAAEC};
std::initializer_list<bool> ref_vals = {true, false, false, true, true,
true, false, false, true, true};
test2(device_queue, input_vals1, input_vals2, ref_vals,
FT2(sycl_bfloat16, sycl_imf::hgtu));
}

// hle
{
std::initializer_list<uint16_t> input_vals1 = {
0x1, 0x1, 0x7A, 0x7F81, 0x7FC0, 0x7FC5, 0xFF80, 0x5000, 0xAF74, 0x71C2};
std::initializer_list<uint16_t> input_vals2 = {
0x0, 0x1, 0x7C, 0x7F80, 0x7FC0, 0x7FC9, 0xFF80, 0x6000, 0xAF84, 0xAAEC};
std::initializer_list<bool> ref_vals = {false, true, true, false, false,
false, true, true, false, false};
test2(device_queue, input_vals1, input_vals2, ref_vals,
FT2(sycl_bfloat16, sycl_imf::hle));
}

// hleu
{
std::initializer_list<uint16_t> input_vals1 = {
0x1, 0x1, 0x7A, 0x7F81, 0x7FC0, 0x7FC5, 0xFF80, 0x5000, 0xAF74, 0x71C2};
std::initializer_list<uint16_t> input_vals2 = {
0x0, 0x1, 0x7C, 0x7F80, 0x7FC0, 0x7FC9, 0xFF80, 0x6000, 0xAF84, 0xAAEC};
std::initializer_list<bool> ref_vals = {false, true, true, true, true,
true, true, true, false, false};
test2(device_queue, input_vals1, input_vals2, ref_vals,
FT2(sycl_bfloat16, sycl_imf::hleu));
}

// hlt
{
std::initializer_list<uint16_t> input_vals1 = {
0x1, 0x1, 0x7A, 0x7F81, 0x7FC0, 0x7FC5, 0xFF80, 0x5000, 0xAF74, 0x71C2};
std::initializer_list<uint16_t> input_vals2 = {
0x0, 0x1, 0x7C, 0x7F80, 0x7FC0, 0x7FC9, 0xFF80, 0x6000, 0xAF84, 0xAAEC};
std::initializer_list<bool> ref_vals = {false, false, true, false, false,
false, false, true, false, false};
test2(device_queue, input_vals1, input_vals2, ref_vals,
FT2(sycl_bfloat16, sycl_imf::hlt));
}

// hltu
{
std::initializer_list<uint16_t> input_vals1 = {
0x1, 0x1, 0x7A, 0x7F81, 0x7FC0, 0x7FC5, 0xFF80, 0x5000, 0xAF74, 0x71C2};
std::initializer_list<uint16_t> input_vals2 = {
0x0, 0x1, 0x7C, 0x7F80, 0x7FC0, 0x7FC9, 0xFF80, 0x6000, 0xAF84, 0xAAEC};
std::initializer_list<bool> ref_vals = {false, false, true, true, true,
true, false, true, false, false};
test2(device_queue, input_vals1, input_vals2, ref_vals,
FT2(sycl_bfloat16, sycl_imf::hltu));
}

// hmax
{
std::initializer_list<uint16_t> input_vals1 = {
0x1, 0xAF84, 0x8000, 0x7FC0, 0x1123, 0x7FCC, 0x7F80, 0x2E05};
std::initializer_list<uint16_t> input_vals2 = {
0x2, 0x4044, 0x0, 0xAAAA, 0x7FC8, 0x7FC8, 0x7EEE, 0x2E55};
std::initializer_list<uint16_t> ref_vals = {0x2, 0x4044, 0x0, 0xAAAA,
0x1123, 0x7FC0, 0x7F80, 0x2E55};
test2(device_queue, input_vals1, input_vals2, ref_vals,
FT22(sycl_bfloat16, uint16_t, sycl_imf::hmax));
}

// hmax_nan
{
std::initializer_list<uint16_t> input_vals1 = {
0x1, 0xAF84, 0x8000, 0x7FC0, 0x1123, 0x7FCC, 0x7F80, 0x2E05};
std::initializer_list<uint16_t> input_vals2 = {
0x2, 0x4044, 0x0, 0xAAAA, 0x7FC8, 0x7FC8, 0x7EEE, 0x2E55};
std::initializer_list<uint16_t> ref_vals = {0x2, 0x4044, 0x0, 0x7FC0,
0x7FC0, 0x7FC0, 0x7F80, 0x2E55};
test2(device_queue, input_vals1, input_vals2, ref_vals,
FT22(sycl_bfloat16, uint16_t, sycl_imf::hmax_nan));
}

// hmin
{
std::initializer_list<uint16_t> input_vals1 = {
0x1, 0xAF84, 0x8000, 0x7FC0, 0x1123, 0x7FCC, 0x7F80, 0x2E05};
std::initializer_list<uint16_t> input_vals2 = {
0x2, 0x4044, 0x0, 0xAAAA, 0x7FC8, 0x7FC8, 0x7EEE, 0x2E55};
std::initializer_list<uint16_t> ref_vals = {0x1, 0xAF84, 0x8000, 0xAAAA,
0x1123, 0x7FC0, 0x7EEE, 0x2E05};
test2(device_queue, input_vals1, input_vals2, ref_vals,
FT22(sycl_bfloat16, uint16_t, sycl_imf::hmin));
}

// hmin_nan
{
std::initializer_list<uint16_t> input_vals1 = {
0x1, 0xAF84, 0x8000, 0x7FC0, 0x1123, 0x7FCC, 0x7F80, 0x2E05};
std::initializer_list<uint16_t> input_vals2 = {
0x2, 0x4044, 0x0, 0xAAAA, 0x7FC8, 0x7FC8, 0x7EEE, 0x2E55};
std::initializer_list<uint16_t> ref_vals = {0x1, 0xAF84, 0x8000, 0x7FC0,
0x7FC0, 0x7FC0, 0x7EEE, 0x2E05};
test2(device_queue, input_vals1, input_vals2, ref_vals,
FT22(sycl_bfloat16, uint16_t, sycl_imf::hmin_nan));
}
return 0;
}
53 changes: 53 additions & 0 deletions SYCL/DeviceLib/imf_bf16_to_fp32.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
// RUN: %clangxx -fsycl %s -o %t.out
// RUN: %CPU_RUN_PLACEHOLDER %t.out
// RUN: %ACC_RUN_PLACEHOLDER %t.out
// RUN: %GPU_RUN_PLACEHOLDER %t.out

// RUN: %clangxx -fsycl -fno-builtin -fsycl-device-lib-jit-link %s -o %t.out
// RUN: %CPU_RUN_PLACEHOLDER %t.out
// RUN: %ACC_RUN_PLACEHOLDER %t.out
// RUN: %GPU_RUN_PLACEHOLDER %t.out
//
// UNSUPPORTED: cuda || hip

// All __imf_* bf16 functions are implemented via fp32 emulation, so we don't
// need to check whether underlying device supports bf16 or not.
#include "imf_utils.hpp"
#include <sycl/ext/intel/math.hpp>
using sycl_bfloat16 = sycl::ext::oneapi::experimental::bfloat16;
namespace sycl_imf = sycl::ext::intel::math;
int main() {
sycl::queue device_queue(sycl::default_selector_v);
std::cout << "Running on "
<< device_queue.get_device().get_info<sycl::info::device::name>()
<< "\n";

{
std::initializer_list<uint16_t> input_vals = {
0x0, // +0
0x8000, // -0
0x1, // min positive subnormal
0x7F, // max positive subnormal
0x5A, // positive subnormal
0x8001, // max negative subnormal
0x807F, // min negative subnormal
0x805A, // negative subnormal
0x7F80, // +inf
0xFF80, // -inf
0x2E05, // positive normal
0x7E5A, // positive normal
0xAE44, // negative normal
0xFF84, // negative normal
0x7F7F, // max positive normal
0xFF7F, // min negative normal
};

std::initializer_list<uint32_t> ref_vals = {
0x0, 0x80000000, 0x10000, 0x7F0000, 0x5A0000, 0x80010000,
0x807F0000, 0x805A0000, 0x7F800000, 0xFF800000, 0x2E050000, 0x7E5A0000,
0xAE440000, 0xFF840000, 0x7F7F0000, 0xFF7F0000};

test(device_queue, input_vals, ref_vals,
FT12(sycl_bfloat16, uint32_t, sycl_imf::bfloat162float));
}
}
74 changes: 74 additions & 0 deletions SYCL/DeviceLib/imf_fp32_to_bf16.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
// RUN: %clangxx -fsycl %s -o %t.out
// RUN: %CPU_RUN_PLACEHOLDER %t.out
// RUN: %ACC_RUN_PLACEHOLDER %t.out
// RUN: %GPU_RUN_PLACEHOLDER %t.out

// RUN: %clangxx -fsycl -fno-builtin -fsycl-device-lib-jit-link %s -o %t.out
// RUN: %CPU_RUN_PLACEHOLDER %t.out
// RUN: %ACC_RUN_PLACEHOLDER %t.out
// RUN: %GPU_RUN_PLACEHOLDER %t.out
//
// UNSUPPORTED: cuda || hip

// All __imf_* bf16 functions are implemented via fp32 emulation, so we don't
// need to check whether underlying device supports bf16 or not.
#include "imf_utils.hpp"
#include <sycl/ext/intel/math.hpp>
using sycl_bfloat16 = sycl::ext::oneapi::experimental::bfloat16;
namespace sycl_imf = sycl::ext::intel::math;
int main() {
sycl::queue device_queue(sycl::default_selector_v);
std::cout << "Running on "
<< device_queue.get_device().get_info<sycl::info::device::name>()
<< "\n";

{
std::initializer_list<uint32_t> input_vals = {
0x0, // +0
0x80000000, // -0
0x1, // min positive subnormal
0x7FFFFF, // max positive subnormal
0x5A6BFC, // positive subnormal
0x80000001, // max negative subnormal
0x807FFFFF, // min negative subnormal
0x805A6FED, // negative subnormal
0x7F800000, // +inf
0xFF800000, // -inf
0x2E05CBA9, // positive normal
0x7E5A8935, // positive normal
0xAE4411FC, // negative normal
0xFA84C773, // negative normal
0x7F7FFFFF, // max positive normal
0x765FCEED, // positive normal
0xFF7FFFFF, // min negative normal
0xAC763561, // negative normal
};

std::initializer_list<uint16_t> ref_vals = {
0x0, 0x8000, 0x0, 0x80, 0x5a, 0x8000, 0x8080, 0x805A, 0x7F80,
0xFF80, 0x2E06, 0x7E5B, 0xAE44, 0xFA85, 0x7F80, 0x7660, 0xFF80, 0xAC76};

std::initializer_list<uint16_t> ref_vals_rd = {
0x0, 0x8000, 0x0, 0x7F, 0x5A, 0x8001, 0x8080, 0x805B, 0x7F80,
0xFF80, 0x2E05, 0x7E5A, 0xAE45, 0xFA85, 0x7F7F, 0x765F, 0xFF80, 0xAC77};

std::initializer_list<uint16_t> ref_vals_ru = {
0x0, 0x8000, 0x1, 0x80, 0x5B, 0x8000, 0x807F, 0x805A, 0x7F80,
0xFF80, 0x2E06, 0x7E5B, 0xAE44, 0xFA84, 0x7F80, 0x7660, 0xFF7F, 0xAC76};

std::initializer_list<uint16_t> ref_vals_rz = {
0x0, 0x8000, 0x0, 0x7F, 0x5A, 0x8000, 0x807F, 0x805A, 0x7F80,
0xFF80, 0x2E05, 0x7E5A, 0xAE44, 0xFA84, 0x7F7F, 0x765F, 0xFF7F, 0xAC76};

test(device_queue, input_vals, ref_vals,
FT12(float, uint16_t, sycl_imf::float2bfloat16));
test(device_queue, input_vals, ref_vals_rd,
FT12(float, uint16_t, sycl_imf::float2bfloat16_rd));
test(device_queue, input_vals, ref_vals,
FT12(float, uint16_t, sycl_imf::float2bfloat16_rn));
test(device_queue, input_vals, ref_vals_ru,
FT12(float, uint16_t, sycl_imf::float2bfloat16_ru));
test(device_queue, input_vals, ref_vals_rz,
FT12(float, uint16_t, sycl_imf::float2bfloat16_rz));
}
}
16 changes: 16 additions & 0 deletions SYCL/DeviceLib/imf_utils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,23 @@ void test3(sycl::queue &q, std::initializer_list<InputTy1> Input1,

#define F(Name) [](auto x) { return (Name)(x); }
#define FT(T, Name) [](auto x) { return __builtin_bit_cast(T, (Name)(x)); }
// Used for float2bf16 tests, all uint32_t input converted to float, then pass
// to __imf_float2bfloat16* utils.
#define FT1(T, Name) [](auto x) { return (Name)(__builtin_bit_cast(T, x)); }
#define FT12(T1, T2, Name) \
[](auto x) { \
return __builtin_bit_cast(T2, (Name)(__builtin_bit_cast(T1, x))); \
}
#define F2(Name) [](auto x, auto y) { return (Name)(x, y); }
#define FT2(T, Name) \
[](auto x, auto y) { \
return (Name)(__builtin_bit_cast(T, x), __builtin_bit_cast(T, y)); \
}
#define FT22(T1, T2, Name) \
[](auto x, auto y) { \
return __builtin_bit_cast( \
T2, (Name)(__builtin_bit_cast(T1, x), __builtin_bit_cast(T1, y))); \
}
#define F3(Name) [](auto x, auto y, auto z) { return (Name)(x, y, z); }
#if defined(__SPIR__)
#define F_Half1(Name) \
Expand Down