Skip to content

Commit

Permalink
[SYCL] Test corrections after moving bfloat16 support out of experime…
Browse files Browse the repository at this point in the history
…ntal status. (intel/llvm-test-suite#1129)

Tests changes for intel#6524

Signed-off-by: Rajiv Deodhar <rajiv.deodhar@intel.com>
Co-authored-by: JackAKirk <jack.kirk@codeplay.com>
  • Loading branch information
rdeodhar and JackAKirk authored Nov 28, 2022
1 parent 07733a8 commit 74100da
Show file tree
Hide file tree
Showing 24 changed files with 239 additions and 82 deletions.
4 changes: 2 additions & 2 deletions SYCL/BFloat16/bfloat16_builtins.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
#include <vector>

using namespace sycl;
using namespace sycl::ext::oneapi::experimental;
using namespace sycl::ext::oneapi;

constexpr int N = 60; // divisible by all tested array sizes
constexpr float bf16_eps = 0.00390625;
Expand Down Expand Up @@ -222,7 +222,7 @@ bool check(float a, float b) {
int main() {
queue q;

if (q.get_device().has(aspect::ext_oneapi_bfloat16)) {
if (q.get_device().has(aspect::ext_oneapi_bfloat16_math_functions)) {
std::vector<float> a(N), b(N), c(N);
int err = 0;

Expand Down
70 changes: 70 additions & 0 deletions SYCL/BFloat16/bfloat16_conversions.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
// UNSUPPORTED: hip
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -fsycl-device-code-split=per_kernel %s -o %t.out
// Currently the feature is supported only on CPU and GPU, natively or by
// software emulation.
// RUN: %CPU_RUN_PLACEHOLDER %t.out
// RUN: %GPU_RUN_PLACEHOLDER %t.out
// RUNx: %ACC_RUN_PLACEHOLDER %t.out

//==---------- bfloat16_conversions.cpp - SYCL bfloat16 type test ---------==//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===---------------------------------------------------------------------===//

#include <iostream>
#include <sycl/sycl.hpp>

using namespace sycl;

template <typename T> T calculate(T a, T b) {
sycl::ext::oneapi::bfloat16 x = -a;
sycl::ext::oneapi::bfloat16 y = b;
sycl::ext::oneapi::bfloat16 z = x + y;
T result = z;
return result;
}

template <typename T> int test_device(queue Q) {
T data[3] = {-7.0f, 8.1f, 0.0f};

buffer<T, 1> buf{data, 3};
Q.submit([&](handler &cgh) {
accessor numbers{buf, cgh, read_write};
cgh.single_task([=]() { numbers[2] = calculate(numbers[0], numbers[1]); });
});

host_accessor hostOutAcc{buf, read_only};
std::cout << "Device Result = " << hostOutAcc[2] << std::endl;
if (hostOutAcc[2] == 15.125f)
return 0;
return 1;
}

template <typename T> int test_host() {
T a{-5.6f};
T b{-1.1f};
T result = calculate(a, b);
std::cout << "Host Result = " << result << std::endl;
if (result == 4.5f)
return 0;
return 1;
}

int main() {
queue Q;
int result;
result = test_host<sycl::half>();
result |= test_host<float>();
if (Q.get_device().has(aspect::fp16))
result |= test_device<sycl::half>(Q);
result |= test_device<float>(Q);
if (result)
std::cout << "FAIL\n";
else
std::cout << "PASS\n";

return result;
}
83 changes: 83 additions & 0 deletions SYCL/BFloat16/bfloat16_example.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
///
/// Check if bfloat16 example works using fallback libraries
///

// REQUIRES: opencl-aot, ocloc, cpu, gpu-intel-gen9
// UNSUPPORTED: cuda
// CUDA is not compatible with SPIR.

// RUN: %clangxx -fsycl %s -o %t.out
// RUN: %CPU_RUN_PLACEHOLDER %t.out
// RUN: %GPU_RUN_PLACEHOLDER %t.out

// RUN: %clangxx -fsycl -fsycl-targets=spir64 %s -o %t.out
// RUN: %CPU_RUN_PLACEHOLDER %t.out
// RUN: %GPU_RUN_PLACEHOLDER %t.out

// RUN: %clangxx -fsycl -fsycl-targets=spir64_gen -Xsycl-target-backend "-device gen9" %s -o %t.out
// RUN: %CPU_RUN_PLACEHOLDER %t.out
// RUN: %GPU_RUN_PLACEHOLDER %t.out

// RUN: %clangxx -fsycl -fsycl-targets=spir64_gen -Xsycl-target-backend "-device *" %s -o %t.out
// RUN: %GPU_RUN_PLACEHOLDER %t.out

// RUN: %clangxx -fsycl -fsycl-targets=spir64,spir64_gen -Xsycl-target-backend=spir64_gen "-device gen9" %s -o %t.out
// RUN: %CPU_RUN_PLACEHOLDER %t.out
// RUN: %GPU_RUN_PLACEHOLDER %t.out

// RUN: %clangxx -fsycl -fsycl-targets=spir64_x86_64,spir64_gen -Xsycl-target-backend=spir64_gen "-device gen9" %s -o %t.out
// RUN: %CPU_RUN_PLACEHOLDER %t.out
// RUN: %GPU_RUN_PLACEHOLDER %t.out

// RUN: %clangxx -fsycl -fsycl-targets=spir64,spir64_gen -Xsycl-target-backend=spir64_gen "-device pvc" %s -o %t.out
// RUN: %CPU_RUN_PLACEHOLDER %t.out

// RUN: %clangxx -fsycl -fsycl-targets=spir64_x86_64,spir64_gen -Xsycl-target-backend=spir64_gen "-device pvc" %s -o %t.out
// RUN: %CPU_RUN_PLACEHOLDER %t.out

#include <sycl/sycl.hpp>

using namespace sycl;
using sycl::ext::oneapi::bfloat16;

float foo(float a, float b) {
// Convert from float to bfloat16.
bfloat16 A{a};
bfloat16 B{b};

// Convert A and B from bfloat16 to float, do addition on floating-point
// numbers, then convert the result to bfloat16 and store it in C.
bfloat16 C = A + B;

// Return the result converted from bfloat16 to float.
return C;
}

int main(int argc, char *argv[]) {
float data[3] = {7.0f, 8.1f, 0.0f};

float result_host = foo(7.0f, 8.1f);
std::cout << "CPU Result = " << result_host << std::endl;
if (std::abs(15.1f - result_host) > 0.1f) {
std::cout << "Test failed. Expected CPU Result ~= 15.1" << std::endl;
return 1;
}

queue deviceQueue;
buffer<float, 1> buf{data, 3};

deviceQueue.submit([&](handler &cgh) {
accessor numbers{buf, cgh, read_write};
cgh.single_task([=]() { numbers[2] = foo(numbers[0], numbers[1]); });
});

host_accessor hostOutAcc{buf, read_only};
float result_device = hostOutAcc[2];
std::cout << "GPU Result = " << result_device << std::endl;
if (std::abs(result_host - result_device) > 0.1f) {
std::cout << "Test failed. CPU Result !~= GPU result" << std::endl;
return 1;
}

return 0;
}
15 changes: 8 additions & 7 deletions SYCL/BFloat16/bfloat16_type.cpp
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
// UNSUPPORTED: cuda || hip
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
// TODO currently the feature isn't supported on most of the devices
// need to enable the test when the aspect and device_if feature are
// introduced
// RUNx: %CPU_RUN_PLACEHOLDER %t.out
// RUNx: %GPU_RUN_PLACEHOLDER %t.out
// UNSUPPORTED: hip
// RUN: %if cuda %{%clangxx -fsycl -fsycl-targets=%sycl_triple -Xsycl-target-backend --cuda-gpu-arch=sm_80 %s -o %t.out %}
// TODO enable the below when CI supports >=sm_80
// RUNx: %if cuda %{%GPU_RUN_PLACEHOLDER %t.out %}
// RUN: %clangxx -fsycl %s -o %t.out
// TODO currently the feature isn't supported on FPGA.
// RUN: %CPU_RUN_PLACEHOLDER %t.out
// RUN: %GPU_RUN_PLACEHOLDER %t.out
// RUNx: %ACC_RUN_PLACEHOLDER %t.out

//==----------- bfloat16_type.cpp - SYCL bfloat16 type test ----------------==//
Expand Down
96 changes: 53 additions & 43 deletions SYCL/BFloat16/bfloat16_type.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#include <iostream>
#include <sycl/ext/oneapi/experimental/bfloat16.hpp>
#include <sycl/ext/oneapi/bfloat16.hpp>
#include <sycl/sycl.hpp>

#include <cmath>
Expand All @@ -11,8 +11,7 @@ constexpr size_t N = 100;
template <typename T> void assert_close(const T &C, const float ref) {
for (size_t i = 0; i < N; i++) {
auto diff = C[i] - ref;
assert(std::fabs(static_cast<float>(diff)) <
std::numeric_limits<float>::epsilon());
assert(std::fabs(static_cast<float>(diff)) < 0.1);
}
}

Expand All @@ -21,7 +20,7 @@ void verify_conv_implicit(queue &q, buffer<float, 1> &a, range<1> &r,
q.submit([&](handler &cgh) {
auto A = a.get_access<access::mode::read_write>(cgh);
cgh.parallel_for<class calc_conv>(r, [=](id<1> index) {
sycl::ext::oneapi::experimental::bfloat16 AVal{A[index]};
sycl::ext::oneapi::bfloat16 AVal{A[index]};
A[index] = AVal;
});
});
Expand All @@ -34,9 +33,8 @@ void verify_conv_explicit(queue &q, buffer<float, 1> &a, range<1> &r,
q.submit([&](handler &cgh) {
auto A = a.get_access<access::mode::read_write>(cgh);
cgh.parallel_for<class calc_conv_impl>(r, [=](id<1> index) {
uint16_t AVal =
sycl::ext::oneapi::experimental::bfloat16::from_float(A[index]);
A[index] = sycl::ext::oneapi::experimental::bfloat16::to_float(AVal);
sycl::ext::oneapi::bfloat16 AVal = A[index];
A[index] = float(AVal);
});
});

Expand All @@ -52,9 +50,9 @@ void verify_add(queue &q, buffer<float, 1> &a, buffer<float, 1> &b, range<1> &r,
auto B = b.get_access<access::mode::read>(cgh);
auto C = c.get_access<access::mode::write>(cgh);
cgh.parallel_for<class calc_add_expl>(r, [=](id<1> index) {
sycl::ext::oneapi::experimental::bfloat16 AVal{A[index]};
sycl::ext::oneapi::experimental::bfloat16 BVal{B[index]};
sycl::ext::oneapi::experimental::bfloat16 CVal = AVal + BVal;
sycl::ext::oneapi::bfloat16 AVal{A[index]};
sycl::ext::oneapi::bfloat16 BVal{B[index]};
sycl::ext::oneapi::bfloat16 CVal = AVal + BVal;
C[index] = CVal;
});
});
Expand All @@ -71,9 +69,9 @@ void verify_sub(queue &q, buffer<float, 1> &a, buffer<float, 1> &b, range<1> &r,
auto B = b.get_access<access::mode::read>(cgh);
auto C = c.get_access<access::mode::write>(cgh);
cgh.parallel_for<class calc_sub>(r, [=](id<1> index) {
sycl::ext::oneapi::experimental::bfloat16 AVal{A[index]};
sycl::ext::oneapi::experimental::bfloat16 BVal{B[index]};
sycl::ext::oneapi::experimental::bfloat16 CVal = AVal - BVal;
sycl::ext::oneapi::bfloat16 AVal{A[index]};
sycl::ext::oneapi::bfloat16 BVal{B[index]};
sycl::ext::oneapi::bfloat16 CVal = AVal - BVal;
C[index] = CVal;
});
});
Expand All @@ -88,8 +86,8 @@ void verify_minus(queue &q, buffer<float, 1> &a, range<1> &r, const float ref) {
auto A = a.get_access<access::mode::read>(cgh);
auto C = c.get_access<access::mode::write>(cgh);
cgh.parallel_for<class calc_minus>(r, [=](id<1> index) {
sycl::ext::oneapi::experimental::bfloat16 AVal{A[index]};
sycl::ext::oneapi::experimental::bfloat16 CVal = -AVal;
sycl::ext::oneapi::bfloat16 AVal{A[index]};
sycl::ext::oneapi::bfloat16 CVal = -AVal;
C[index] = CVal;
});
});
Expand All @@ -106,9 +104,9 @@ void verify_mul(queue &q, buffer<float, 1> &a, buffer<float, 1> &b, range<1> &r,
auto B = b.get_access<access::mode::read>(cgh);
auto C = c.get_access<access::mode::write>(cgh);
cgh.parallel_for<class calc_mul>(r, [=](id<1> index) {
sycl::ext::oneapi::experimental::bfloat16 AVal{A[index]};
sycl::ext::oneapi::experimental::bfloat16 BVal{B[index]};
sycl::ext::oneapi::experimental::bfloat16 CVal = AVal * BVal;
sycl::ext::oneapi::bfloat16 AVal{A[index]};
sycl::ext::oneapi::bfloat16 BVal{B[index]};
sycl::ext::oneapi::bfloat16 CVal = AVal * BVal;
C[index] = CVal;
});
});
Expand All @@ -125,9 +123,9 @@ void verify_div(queue &q, buffer<float, 1> &a, buffer<float, 1> &b, range<1> &r,
auto B = b.get_access<access::mode::read>(cgh);
auto C = c.get_access<access::mode::write>(cgh);
cgh.parallel_for<class calc_div>(r, [=](id<1> index) {
sycl::ext::oneapi::experimental::bfloat16 AVal{A[index]};
sycl::ext::oneapi::experimental::bfloat16 BVal{B[index]};
sycl::ext::oneapi::experimental::bfloat16 CVal = AVal / BVal;
sycl::ext::oneapi::bfloat16 AVal{A[index]};
sycl::ext::oneapi::bfloat16 BVal{B[index]};
sycl::ext::oneapi::bfloat16 CVal = AVal / BVal;
C[index] = CVal;
});
});
Expand All @@ -144,19 +142,18 @@ void verify_logic(queue &q, buffer<float, 1> &a, buffer<float, 1> &b,
auto B = b.get_access<access::mode::read>(cgh);
auto C = c.get_access<access::mode::write>(cgh);
cgh.parallel_for<class logic>(r, [=](id<1> index) {
sycl::ext::oneapi::experimental::bfloat16 AVal{A[index]};
sycl::ext::oneapi::experimental::bfloat16 BVal{B[index]};
sycl::ext::oneapi::bfloat16 AVal{A[index]};
sycl::ext::oneapi::bfloat16 BVal{B[index]};
if (AVal) {
if (AVal > BVal || AVal >= BVal || AVal < BVal || AVal <= BVal ||
!BVal) {
sycl::ext::oneapi::experimental::bfloat16 CVal =
AVal != BVal ? AVal : BVal;
sycl::ext::oneapi::bfloat16 CVal = AVal != BVal ? AVal : BVal;
CVal--;
CVal++;
if (AVal == BVal) {
CVal -= AVal;
CVal *= 3.0;
CVal /= 2.0;
CVal *= 3.0f;
CVal /= 2.0f;
} else
CVal += BVal;
C[index] = CVal;
Expand All @@ -179,9 +176,9 @@ int run_tests() {
return 0;
}

std::vector<float> vec_a(N, 5.0);
std::vector<float> vec_b(N, 2.0);
std::vector<float> vec_b_neg(N, -2.0);
std::vector<float> vec_a(N, 5.0f);
std::vector<float> vec_b(N, 2.0f);
std::vector<float> vec_b_neg(N, -2.0f);

range<1> r(N);
buffer<float, 1> a{vec_a.data(), r};
Expand All @@ -190,19 +187,32 @@ int run_tests() {

queue q{dev};

verify_conv_implicit(q, a, r, 5.0);
verify_conv_explicit(q, a, r, 5.0);
verify_add(q, a, b, r, 7.0);
verify_sub(q, a, b, r, 3.0);
verify_mul(q, a, b, r, 10.0);
verify_div(q, a, b, r, 2.5);
verify_logic(q, a, b, r, 7.0);
verify_add(q, a, b_neg, r, 3.0);
verify_sub(q, a, b_neg, r, 7.0);
verify_minus(q, a, r, -5.0);
verify_mul(q, a, b_neg, r, -10.0);
verify_div(q, a, b_neg, r, -2.5);
verify_logic(q, a, b_neg, r, 3.0);
verify_conv_implicit(q, a, r, 5.0f);
std::cout << "PASS verify_conv_implicit\n";
verify_conv_explicit(q, a, r, 5.0f);
std::cout << "PASS verify_conv_explicit\n";
verify_add(q, a, b, r, 7.0f);
std::cout << "PASS verify_add\n";
verify_sub(q, a, b, r, 3.0f);
std::cout << "PASS verify_sub\n";
verify_mul(q, a, b, r, 10.0f);
std::cout << "PASS verify_mul\n";
verify_div(q, a, b, r, 2.5f);
std::cout << "PASS verify_div\n";
verify_logic(q, a, b, r, 7.0f);
std::cout << "PASS verify_logic\n";
verify_add(q, a, b_neg, r, 3.0f);
std::cout << "PASS verify_add\n";
verify_sub(q, a, b_neg, r, 7.0f);
std::cout << "PASS verify_sub\n";
verify_minus(q, a, r, -5.0f);
std::cout << "PASS verify_minus\n";
verify_mul(q, a, b_neg, r, -10.0f);
std::cout << "PASS verify_mul\n";
verify_div(q, a, b_neg, r, -2.5f);
std::cout << "PASS verify_div\n";
verify_logic(q, a, b_neg, r, 3.0f);
std::cout << "PASS verify_logic\n";

return 0;
}
11 changes: 1 addition & 10 deletions SYCL/BFloat16/bfloat16_type_cuda.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,4 @@

#include "bfloat16_type.hpp"

int main() {
bool has_bfloat16_aspect = false;
for (const auto &plt : sycl::platform::get_platforms()) {
if (plt.has(aspect::ext_oneapi_bfloat16))
has_bfloat16_aspect = true;
}

if (has_bfloat16_aspect)
return run_tests();
}
int main() { return run_tests(); }
Loading

0 comments on commit 74100da

Please sign in to comment.