diff --git a/SYCL/BFloat16/bfloat16_builtins.cpp b/SYCL/BFloat16/bfloat16_builtins.cpp index 9ccf988492d22..262550c5ed930 100644 --- a/SYCL/BFloat16/bfloat16_builtins.cpp +++ b/SYCL/BFloat16/bfloat16_builtins.cpp @@ -12,7 +12,7 @@ #include using namespace sycl; -using namespace sycl::ext::oneapi::experimental; +using namespace sycl::ext::oneapi; constexpr int N = 60; // divisible by all tested array sizes constexpr float bf16_eps = 0.00390625; @@ -222,7 +222,7 @@ bool check(float a, float b) { int main() { queue q; - if (q.get_device().has(aspect::ext_oneapi_bfloat16)) { + if (q.get_device().has(aspect::ext_oneapi_bfloat16_math_functions)) { std::vector a(N), b(N), c(N); int err = 0; diff --git a/SYCL/BFloat16/bfloat16_conversions.cpp b/SYCL/BFloat16/bfloat16_conversions.cpp new file mode 100755 index 0000000000000..ed5d7ae583a09 --- /dev/null +++ b/SYCL/BFloat16/bfloat16_conversions.cpp @@ -0,0 +1,70 @@ +// UNSUPPORTED: hip +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -fsycl-device-code-split=per_kernel %s -o %t.out +// Currently the feature is supported only on CPU and GPU, natively or by +// software emulation. +// RUN: %CPU_RUN_PLACEHOLDER %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out +// RUNx: %ACC_RUN_PLACEHOLDER %t.out + +//==---------- bfloat16_conversions.cpp - SYCL bfloat16 type test ---------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===---------------------------------------------------------------------===// + +#include +#include + +using namespace sycl; + +template T calculate(T a, T b) { + sycl::ext::oneapi::bfloat16 x = -a; + sycl::ext::oneapi::bfloat16 y = b; + sycl::ext::oneapi::bfloat16 z = x + y; + T result = z; + return result; +} + +template int test_device(queue Q) { + T data[3] = {-7.0f, 8.1f, 0.0f}; + + buffer buf{data, 3}; + Q.submit([&](handler &cgh) { + accessor numbers{buf, cgh, read_write}; + cgh.single_task([=]() { numbers[2] = calculate(numbers[0], numbers[1]); }); + }); + + host_accessor hostOutAcc{buf, read_only}; + std::cout << "Device Result = " << hostOutAcc[2] << std::endl; + if (hostOutAcc[2] == 15.125f) + return 0; + return 1; +} + +template int test_host() { + T a{-5.6f}; + T b{-1.1f}; + T result = calculate(a, b); + std::cout << "Host Result = " << result << std::endl; + if (result == 4.5f) + return 0; + return 1; +} + +int main() { + queue Q; + int result; + result = test_host(); + result |= test_host(); + if (Q.get_device().has(aspect::fp16)) + result |= test_device(Q); + result |= test_device(Q); + if (result) + std::cout << "FAIL\n"; + else + std::cout << "PASS\n"; + + return result; +} diff --git a/SYCL/BFloat16/bfloat16_example.cpp b/SYCL/BFloat16/bfloat16_example.cpp new file mode 100755 index 0000000000000..fab3795679b9d --- /dev/null +++ b/SYCL/BFloat16/bfloat16_example.cpp @@ -0,0 +1,83 @@ +/// +/// Check if bfloat16 example works using fallback libraries +/// + +// REQUIRES: opencl-aot, ocloc, cpu, gpu-intel-gen9 +// UNSUPPORTED: cuda +// CUDA is not compatible with SPIR. + +// RUN: %clangxx -fsycl %s -o %t.out +// RUN: %CPU_RUN_PLACEHOLDER %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out + +// RUN: %clangxx -fsycl -fsycl-targets=spir64 %s -o %t.out +// RUN: %CPU_RUN_PLACEHOLDER %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out + +// RUN: %clangxx -fsycl -fsycl-targets=spir64_gen -Xsycl-target-backend "-device gen9" %s -o %t.out +// RUN: %CPU_RUN_PLACEHOLDER %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out + +// RUN: %clangxx -fsycl -fsycl-targets=spir64_gen -Xsycl-target-backend "-device *" %s -o %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out + +// RUN: %clangxx -fsycl -fsycl-targets=spir64,spir64_gen -Xsycl-target-backend=spir64_gen "-device gen9" %s -o %t.out +// RUN: %CPU_RUN_PLACEHOLDER %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out + +// RUN: %clangxx -fsycl -fsycl-targets=spir64_x86_64,spir64_gen -Xsycl-target-backend=spir64_gen "-device gen9" %s -o %t.out +// RUN: %CPU_RUN_PLACEHOLDER %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out + +// RUN: %clangxx -fsycl -fsycl-targets=spir64,spir64_gen -Xsycl-target-backend=spir64_gen "-device pvc" %s -o %t.out +// RUN: %CPU_RUN_PLACEHOLDER %t.out + +// RUN: %clangxx -fsycl -fsycl-targets=spir64_x86_64,spir64_gen -Xsycl-target-backend=spir64_gen "-device pvc" %s -o %t.out +// RUN: %CPU_RUN_PLACEHOLDER %t.out + +#include + +using namespace sycl; +using sycl::ext::oneapi::bfloat16; + +float foo(float a, float b) { + // Convert from float to bfloat16. + bfloat16 A{a}; + bfloat16 B{b}; + + // Convert A and B from bfloat16 to float, do addition on floating-point + // numbers, then convert the result to bfloat16 and store it in C. + bfloat16 C = A + B; + + // Return the result converted from bfloat16 to float. + return C; +} + +int main(int argc, char *argv[]) { + float data[3] = {7.0f, 8.1f, 0.0f}; + + float result_host = foo(7.0f, 8.1f); + std::cout << "CPU Result = " << result_host << std::endl; + if (std::abs(15.1f - result_host) > 0.1f) { + std::cout << "Test failed. Expected CPU Result ~= 15.1" << std::endl; + return 1; + } + + queue deviceQueue; + buffer buf{data, 3}; + + deviceQueue.submit([&](handler &cgh) { + accessor numbers{buf, cgh, read_write}; + cgh.single_task([=]() { numbers[2] = foo(numbers[0], numbers[1]); }); + }); + + host_accessor hostOutAcc{buf, read_only}; + float result_device = hostOutAcc[2]; + std::cout << "GPU Result = " << result_device << std::endl; + if (std::abs(result_host - result_device) > 0.1f) { + std::cout << "Test failed. CPU Result !~= GPU result" << std::endl; + return 1; + } + + return 0; +} diff --git a/SYCL/BFloat16/bfloat16_type.cpp b/SYCL/BFloat16/bfloat16_type.cpp index 25bb8ac15cf70..28f1bf621b670 100644 --- a/SYCL/BFloat16/bfloat16_type.cpp +++ b/SYCL/BFloat16/bfloat16_type.cpp @@ -1,10 +1,11 @@ -// UNSUPPORTED: cuda || hip -// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out -// TODO currently the feature isn't supported on most of the devices -// need to enable the test when the aspect and device_if feature are -// introduced -// RUNx: %CPU_RUN_PLACEHOLDER %t.out -// RUNx: %GPU_RUN_PLACEHOLDER %t.out +// UNSUPPORTED: hip +// RUN: %if cuda %{%clangxx -fsycl -fsycl-targets=%sycl_triple -Xsycl-target-backend --cuda-gpu-arch=sm_80 %s -o %t.out %} +// TODO enable the below when CI supports >=sm_80 +// RUNx: %if cuda %{%GPU_RUN_PLACEHOLDER %t.out %} +// RUN: %clangxx -fsycl %s -o %t.out +// TODO currently the feature isn't supported on FPGA. +// RUN: %CPU_RUN_PLACEHOLDER %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out // RUNx: %ACC_RUN_PLACEHOLDER %t.out //==----------- bfloat16_type.cpp - SYCL bfloat16 type test ----------------==// diff --git a/SYCL/BFloat16/bfloat16_type.hpp b/SYCL/BFloat16/bfloat16_type.hpp index 97e1ccb7fcf01..570755bf677af 100644 --- a/SYCL/BFloat16/bfloat16_type.hpp +++ b/SYCL/BFloat16/bfloat16_type.hpp @@ -1,5 +1,5 @@ #include -#include +#include #include #include @@ -11,8 +11,7 @@ constexpr size_t N = 100; template void assert_close(const T &C, const float ref) { for (size_t i = 0; i < N; i++) { auto diff = C[i] - ref; - assert(std::fabs(static_cast(diff)) < - std::numeric_limits::epsilon()); + assert(std::fabs(static_cast(diff)) < 0.1); } } @@ -21,7 +20,7 @@ void verify_conv_implicit(queue &q, buffer &a, range<1> &r, q.submit([&](handler &cgh) { auto A = a.get_access(cgh); cgh.parallel_for(r, [=](id<1> index) { - sycl::ext::oneapi::experimental::bfloat16 AVal{A[index]}; + sycl::ext::oneapi::bfloat16 AVal{A[index]}; A[index] = AVal; }); }); @@ -34,9 +33,8 @@ void verify_conv_explicit(queue &q, buffer &a, range<1> &r, q.submit([&](handler &cgh) { auto A = a.get_access(cgh); cgh.parallel_for(r, [=](id<1> index) { - uint16_t AVal = - sycl::ext::oneapi::experimental::bfloat16::from_float(A[index]); - A[index] = sycl::ext::oneapi::experimental::bfloat16::to_float(AVal); + sycl::ext::oneapi::bfloat16 AVal = A[index]; + A[index] = float(AVal); }); }); @@ -52,9 +50,9 @@ void verify_add(queue &q, buffer &a, buffer &b, range<1> &r, auto B = b.get_access(cgh); auto C = c.get_access(cgh); cgh.parallel_for(r, [=](id<1> index) { - sycl::ext::oneapi::experimental::bfloat16 AVal{A[index]}; - sycl::ext::oneapi::experimental::bfloat16 BVal{B[index]}; - sycl::ext::oneapi::experimental::bfloat16 CVal = AVal + BVal; + sycl::ext::oneapi::bfloat16 AVal{A[index]}; + sycl::ext::oneapi::bfloat16 BVal{B[index]}; + sycl::ext::oneapi::bfloat16 CVal = AVal + BVal; C[index] = CVal; }); }); @@ -71,9 +69,9 @@ void verify_sub(queue &q, buffer &a, buffer &b, range<1> &r, auto B = b.get_access(cgh); auto C = c.get_access(cgh); cgh.parallel_for(r, [=](id<1> index) { - sycl::ext::oneapi::experimental::bfloat16 AVal{A[index]}; - sycl::ext::oneapi::experimental::bfloat16 BVal{B[index]}; - sycl::ext::oneapi::experimental::bfloat16 CVal = AVal - BVal; + sycl::ext::oneapi::bfloat16 AVal{A[index]}; + sycl::ext::oneapi::bfloat16 BVal{B[index]}; + sycl::ext::oneapi::bfloat16 CVal = AVal - BVal; C[index] = CVal; }); }); @@ -88,8 +86,8 @@ void verify_minus(queue &q, buffer &a, range<1> &r, const float ref) { auto A = a.get_access(cgh); auto C = c.get_access(cgh); cgh.parallel_for(r, [=](id<1> index) { - sycl::ext::oneapi::experimental::bfloat16 AVal{A[index]}; - sycl::ext::oneapi::experimental::bfloat16 CVal = -AVal; + sycl::ext::oneapi::bfloat16 AVal{A[index]}; + sycl::ext::oneapi::bfloat16 CVal = -AVal; C[index] = CVal; }); }); @@ -106,9 +104,9 @@ void verify_mul(queue &q, buffer &a, buffer &b, range<1> &r, auto B = b.get_access(cgh); auto C = c.get_access(cgh); cgh.parallel_for(r, [=](id<1> index) { - sycl::ext::oneapi::experimental::bfloat16 AVal{A[index]}; - sycl::ext::oneapi::experimental::bfloat16 BVal{B[index]}; - sycl::ext::oneapi::experimental::bfloat16 CVal = AVal * BVal; + sycl::ext::oneapi::bfloat16 AVal{A[index]}; + sycl::ext::oneapi::bfloat16 BVal{B[index]}; + sycl::ext::oneapi::bfloat16 CVal = AVal * BVal; C[index] = CVal; }); }); @@ -125,9 +123,9 @@ void verify_div(queue &q, buffer &a, buffer &b, range<1> &r, auto B = b.get_access(cgh); auto C = c.get_access(cgh); cgh.parallel_for(r, [=](id<1> index) { - sycl::ext::oneapi::experimental::bfloat16 AVal{A[index]}; - sycl::ext::oneapi::experimental::bfloat16 BVal{B[index]}; - sycl::ext::oneapi::experimental::bfloat16 CVal = AVal / BVal; + sycl::ext::oneapi::bfloat16 AVal{A[index]}; + sycl::ext::oneapi::bfloat16 BVal{B[index]}; + sycl::ext::oneapi::bfloat16 CVal = AVal / BVal; C[index] = CVal; }); }); @@ -144,19 +142,18 @@ void verify_logic(queue &q, buffer &a, buffer &b, auto B = b.get_access(cgh); auto C = c.get_access(cgh); cgh.parallel_for(r, [=](id<1> index) { - sycl::ext::oneapi::experimental::bfloat16 AVal{A[index]}; - sycl::ext::oneapi::experimental::bfloat16 BVal{B[index]}; + sycl::ext::oneapi::bfloat16 AVal{A[index]}; + sycl::ext::oneapi::bfloat16 BVal{B[index]}; if (AVal) { if (AVal > BVal || AVal >= BVal || AVal < BVal || AVal <= BVal || !BVal) { - sycl::ext::oneapi::experimental::bfloat16 CVal = - AVal != BVal ? AVal : BVal; + sycl::ext::oneapi::bfloat16 CVal = AVal != BVal ? AVal : BVal; CVal--; CVal++; if (AVal == BVal) { CVal -= AVal; - CVal *= 3.0; - CVal /= 2.0; + CVal *= 3.0f; + CVal /= 2.0f; } else CVal += BVal; C[index] = CVal; @@ -179,9 +176,9 @@ int run_tests() { return 0; } - std::vector vec_a(N, 5.0); - std::vector vec_b(N, 2.0); - std::vector vec_b_neg(N, -2.0); + std::vector vec_a(N, 5.0f); + std::vector vec_b(N, 2.0f); + std::vector vec_b_neg(N, -2.0f); range<1> r(N); buffer a{vec_a.data(), r}; @@ -190,19 +187,32 @@ int run_tests() { queue q{dev}; - verify_conv_implicit(q, a, r, 5.0); - verify_conv_explicit(q, a, r, 5.0); - verify_add(q, a, b, r, 7.0); - verify_sub(q, a, b, r, 3.0); - verify_mul(q, a, b, r, 10.0); - verify_div(q, a, b, r, 2.5); - verify_logic(q, a, b, r, 7.0); - verify_add(q, a, b_neg, r, 3.0); - verify_sub(q, a, b_neg, r, 7.0); - verify_minus(q, a, r, -5.0); - verify_mul(q, a, b_neg, r, -10.0); - verify_div(q, a, b_neg, r, -2.5); - verify_logic(q, a, b_neg, r, 3.0); + verify_conv_implicit(q, a, r, 5.0f); + std::cout << "PASS verify_conv_implicit\n"; + verify_conv_explicit(q, a, r, 5.0f); + std::cout << "PASS verify_conv_explicit\n"; + verify_add(q, a, b, r, 7.0f); + std::cout << "PASS verify_add\n"; + verify_sub(q, a, b, r, 3.0f); + std::cout << "PASS verify_sub\n"; + verify_mul(q, a, b, r, 10.0f); + std::cout << "PASS verify_mul\n"; + verify_div(q, a, b, r, 2.5f); + std::cout << "PASS verify_div\n"; + verify_logic(q, a, b, r, 7.0f); + std::cout << "PASS verify_logic\n"; + verify_add(q, a, b_neg, r, 3.0f); + std::cout << "PASS verify_add\n"; + verify_sub(q, a, b_neg, r, 7.0f); + std::cout << "PASS verify_sub\n"; + verify_minus(q, a, r, -5.0f); + std::cout << "PASS verify_minus\n"; + verify_mul(q, a, b_neg, r, -10.0f); + std::cout << "PASS verify_mul\n"; + verify_div(q, a, b_neg, r, -2.5f); + std::cout << "PASS verify_div\n"; + verify_logic(q, a, b_neg, r, 3.0f); + std::cout << "PASS verify_logic\n"; return 0; } diff --git a/SYCL/BFloat16/bfloat16_type_cuda.cpp b/SYCL/BFloat16/bfloat16_type_cuda.cpp index 30d1f122a2534..81c4a08f128e4 100644 --- a/SYCL/BFloat16/bfloat16_type_cuda.cpp +++ b/SYCL/BFloat16/bfloat16_type_cuda.cpp @@ -12,13 +12,4 @@ #include "bfloat16_type.hpp" -int main() { - bool has_bfloat16_aspect = false; - for (const auto &plt : sycl::platform::get_platforms()) { - if (plt.has(aspect::ext_oneapi_bfloat16)) - has_bfloat16_aspect = true; - } - - if (has_bfloat16_aspect) - return run_tests(); -} +int main() { return run_tests(); } diff --git a/SYCL/BFloat16/bfloat_hw.cpp b/SYCL/BFloat16/bfloat_hw.cpp index 29d63c7fa9b3e..bb1a2a1e1f7b9 100644 --- a/SYCL/BFloat16/bfloat_hw.cpp +++ b/SYCL/BFloat16/bfloat_hw.cpp @@ -16,7 +16,7 @@ using get_uint_type_of_size = typename std::conditional_t< std::conditional_t>>>; -using bfloat16 = sycl::ext::oneapi::experimental::bfloat16; +using bfloat16 = sycl::ext::oneapi::bfloat16; using Bfloat16StorageT = get_uint_type_of_size; bool test(float Val, Bfloat16StorageT Bits) { diff --git a/SYCL/Basic/aspects.cpp b/SYCL/Basic/aspects.cpp index 2f9938b58d018..bb1adf0bbdf1e 100644 --- a/SYCL/Basic/aspects.cpp +++ b/SYCL/Basic/aspects.cpp @@ -54,8 +54,8 @@ int main() { if (plt.has(aspect::fp64)) { std::cout << " fp64" << std::endl; } - if (plt.has(aspect::ext_oneapi_bfloat16)) { - std::cout << " ext_oneapi_bfloat16" << std::endl; + if (plt.has(aspect::ext_oneapi_bfloat16_math_functions)) { + std::cout << " ext_oneapi_bfloat16_math_functions" << std::endl; } if (plt.has(aspect::int64_base_atomics)) { std::cout << " base atomic operations" << std::endl; diff --git a/SYCL/ESIMD/api/bin_and_cmp_ops_heavy.cpp b/SYCL/ESIMD/api/bin_and_cmp_ops_heavy.cpp index ca0da71199d67..e150081005512 100644 --- a/SYCL/ESIMD/api/bin_and_cmp_ops_heavy.cpp +++ b/SYCL/ESIMD/api/bin_and_cmp_ops_heavy.cpp @@ -30,7 +30,7 @@ using namespace sycl; using namespace sycl::ext::intel::esimd; -using bfloat16 = sycl::ext::oneapi::experimental::bfloat16; +using bfloat16 = sycl::ext::oneapi::bfloat16; using tfloat32 = sycl::ext::intel::experimental::esimd::tfloat32; template class TestID; diff --git a/SYCL/ESIMD/api/replicate_smoke.cpp b/SYCL/ESIMD/api/replicate_smoke.cpp index 159b842a94be3..d8d4d1eb9baeb 100644 --- a/SYCL/ESIMD/api/replicate_smoke.cpp +++ b/SYCL/ESIMD/api/replicate_smoke.cpp @@ -23,7 +23,7 @@ using namespace sycl; using namespace sycl::ext::intel::esimd; -using bfloat16 = sycl::ext::oneapi::experimental::bfloat16; +using bfloat16 = sycl::ext::oneapi::bfloat16; using tfloat32 = sycl::ext::intel::experimental::esimd::tfloat32; template struct char_to_int { diff --git a/SYCL/ESIMD/api/simd_copy_to_from.cpp b/SYCL/ESIMD/api/simd_copy_to_from.cpp index 95be590cb73b8..d3f21cce9ee86 100644 --- a/SYCL/ESIMD/api/simd_copy_to_from.cpp +++ b/SYCL/ESIMD/api/simd_copy_to_from.cpp @@ -38,7 +38,7 @@ using namespace sycl; using namespace sycl::ext::intel; using namespace sycl::ext::intel::esimd; -using bfloat16 = sycl::ext::oneapi::experimental::bfloat16; +using bfloat16 = sycl::ext::oneapi::bfloat16; using tfloat32 = sycl::ext::intel::experimental::esimd::tfloat32; template diff --git a/SYCL/ESIMD/api/simd_subscript_operator.cpp b/SYCL/ESIMD/api/simd_subscript_operator.cpp index d081640de4aa9..79d1c75482e02 100644 --- a/SYCL/ESIMD/api/simd_subscript_operator.cpp +++ b/SYCL/ESIMD/api/simd_subscript_operator.cpp @@ -24,7 +24,7 @@ using namespace sycl; using namespace sycl::ext::intel::esimd; -using bfloat16 = sycl::ext::oneapi::experimental::bfloat16; +using bfloat16 = sycl::ext::oneapi::bfloat16; using tfloat32 = sycl::ext::intel::experimental::esimd::tfloat32; template bool test(queue &q) { diff --git a/SYCL/ESIMD/api/simd_view_subscript_operator.cpp b/SYCL/ESIMD/api/simd_view_subscript_operator.cpp index f9f81db148db5..c7da212153581 100644 --- a/SYCL/ESIMD/api/simd_view_subscript_operator.cpp +++ b/SYCL/ESIMD/api/simd_view_subscript_operator.cpp @@ -25,7 +25,7 @@ using namespace sycl; using namespace sycl::ext::intel::esimd; -using bfloat16 = sycl::ext::oneapi::experimental::bfloat16; +using bfloat16 = sycl::ext::oneapi::bfloat16; using tfloat32 = sycl::ext::intel::experimental::esimd::tfloat32; template class TestID; diff --git a/SYCL/ESIMD/api/svm_gather_scatter.cpp b/SYCL/ESIMD/api/svm_gather_scatter.cpp index 09cf3e9d30b70..0775389908998 100644 --- a/SYCL/ESIMD/api/svm_gather_scatter.cpp +++ b/SYCL/ESIMD/api/svm_gather_scatter.cpp @@ -25,7 +25,7 @@ using namespace sycl; using namespace sycl::ext::intel; using namespace sycl::ext::intel::esimd; -using bfloat16 = sycl::ext::oneapi::experimental::bfloat16; +using bfloat16 = sycl::ext::oneapi::bfloat16; using tfloat32 = sycl::ext::intel::experimental::esimd::tfloat32; template bool test(queue &Q) { diff --git a/SYCL/ESIMD/api/unary_ops_heavy.cpp b/SYCL/ESIMD/api/unary_ops_heavy.cpp index 71206d0ed5db0..51891e1da674c 100644 --- a/SYCL/ESIMD/api/unary_ops_heavy.cpp +++ b/SYCL/ESIMD/api/unary_ops_heavy.cpp @@ -30,7 +30,7 @@ using namespace sycl; using namespace sycl::ext::intel::esimd; -using bfloat16 = sycl::ext::oneapi::experimental::bfloat16; +using bfloat16 = sycl::ext::oneapi::bfloat16; template class TestID; diff --git a/SYCL/ESIMD/esimd_test_utils.hpp b/SYCL/ESIMD/esimd_test_utils.hpp index 07c03a32e496f..06df7b4dff522 100644 --- a/SYCL/ESIMD/esimd_test_utils.hpp +++ b/SYCL/ESIMD/esimd_test_utils.hpp @@ -553,7 +553,7 @@ TID(uint32_t) TID(int64_t) TID(uint64_t) TID(half) -TID(sycl::ext::oneapi::experimental::bfloat16) +TID(sycl::ext::oneapi::bfloat16) TID(sycl::ext::intel::experimental::esimd::tfloat32) TID(float) TID(double) diff --git a/SYCL/KernelAndProgram/kernel-bundle-merge-options.hpp b/SYCL/KernelAndProgram/kernel-bundle-merge-options.hpp index a3661cab569c1..7d0c7dcc5f5d6 100644 --- a/SYCL/KernelAndProgram/kernel-bundle-merge-options.hpp +++ b/SYCL/KernelAndProgram/kernel-bundle-merge-options.hpp @@ -35,4 +35,6 @@ int main() { } catch (...) { // Ignore all exceptions } + + return 0; } diff --git a/SYCL/Matrix/element_wise_all_ops_cuda.cpp b/SYCL/Matrix/element_wise_all_ops_cuda.cpp index bc11434cef7e6..35cbef3632b5d 100644 --- a/SYCL/Matrix/element_wise_all_ops_cuda.cpp +++ b/SYCL/Matrix/element_wise_all_ops_cuda.cpp @@ -15,7 +15,7 @@ using namespace sycl; using namespace sycl::ext::oneapi::experimental::matrix; -using sycl::ext::oneapi::experimental::bfloat16; +using sycl::ext::oneapi::bfloat16; #define SG_SZ 32 constexpr size_t nWGperDim = 2; diff --git a/SYCL/Matrix/element_wise_all_ops_cuda_legacy.cpp b/SYCL/Matrix/element_wise_all_ops_cuda_legacy.cpp index 71a7f9fa668b0..5222b8d0255c4 100644 --- a/SYCL/Matrix/element_wise_all_ops_cuda_legacy.cpp +++ b/SYCL/Matrix/element_wise_all_ops_cuda_legacy.cpp @@ -14,7 +14,7 @@ using namespace sycl; using namespace sycl::ext::oneapi::experimental::matrix; -using sycl::ext::oneapi::experimental::bfloat16; +using sycl::ext::oneapi::bfloat16; #define SG_SZ 32 constexpr size_t nWGperDim = 2; diff --git a/SYCL/Matrix/element_wise_wi_marray.cpp b/SYCL/Matrix/element_wise_wi_marray.cpp index 0464f79f57561..558451a3dbcae 100644 --- a/SYCL/Matrix/element_wise_wi_marray.cpp +++ b/SYCL/Matrix/element_wise_wi_marray.cpp @@ -14,7 +14,7 @@ #include using namespace sycl; -using namespace sycl::ext::oneapi::experimental; +using namespace sycl::ext::oneapi; using namespace sycl::ext::oneapi::experimental::matrix; #define SG_SZ 32 diff --git a/SYCL/Matrix/joint_matrix_bfloat16.cpp b/SYCL/Matrix/joint_matrix_bfloat16.cpp index 4d35b5f5a53a4..e665617156f0b 100644 --- a/SYCL/Matrix/joint_matrix_bfloat16.cpp +++ b/SYCL/Matrix/joint_matrix_bfloat16.cpp @@ -16,7 +16,7 @@ using namespace sycl; using namespace sycl::ext::oneapi::experimental::matrix; -using bfloat16 = sycl::ext::oneapi::experimental::bfloat16; +using bfloat16 = sycl::ext::oneapi::bfloat16; #define SG_SZ 16 diff --git a/SYCL/Matrix/joint_matrix_bfloat16_32x64.cpp b/SYCL/Matrix/joint_matrix_bfloat16_32x64.cpp index 483b15b567f4a..456b040b3659f 100644 --- a/SYCL/Matrix/joint_matrix_bfloat16_32x64.cpp +++ b/SYCL/Matrix/joint_matrix_bfloat16_32x64.cpp @@ -18,7 +18,7 @@ using namespace sycl; using namespace sycl::ext::oneapi::experimental::matrix; -using bfloat16 = sycl::ext::oneapi::experimental::bfloat16; +using bfloat16 = sycl::ext::oneapi::bfloat16; #define SG_SZ 16 @@ -144,13 +144,13 @@ int main() { for (int j = 0; j < MATRIX_K; j++) { // bfloat16 is created using unsigned short since conversion from float to // bfloat16 is not supported on the host side yet - A[i][j] = bfloat16::from_bits(make_bf16(1.0f * (i + j))); + A[i][j] = make_bf16(1.0f * (i + j)); Aref[i][j] = make_bf16(1.0f * (i + j)); } } for (int i = 0; i < MATRIX_K / 2; i++) { for (int j = 0; j < MATRIX_N * 2; j++) { - B[i][j] = bfloat16::from_bits((make_bf16(2.0f * i + 3.0f * j))); + B[i][j] = make_bf16(2.0f * i + 3.0f * j); Bref[i][j] = make_bf16(2.0f * i + 3.0f * j); } } diff --git a/SYCL/Matrix/joint_matrix_bfloat16_use.cpp b/SYCL/Matrix/joint_matrix_bfloat16_use.cpp index 5c110336e3fa7..aa6412195e107 100644 --- a/SYCL/Matrix/joint_matrix_bfloat16_use.cpp +++ b/SYCL/Matrix/joint_matrix_bfloat16_use.cpp @@ -17,7 +17,7 @@ #include using namespace sycl::ext::oneapi::experimental::matrix; -using bfloat16 = sycl::ext::oneapi::experimental::bfloat16; +using bfloat16 = sycl::ext::oneapi::bfloat16; #define SG_SZ 16 diff --git a/SYCL/Matrix/joint_matrix_tensorcores_legacy.cpp b/SYCL/Matrix/joint_matrix_tensorcores_legacy.cpp index d98a389f1ede9..84b399fadf152 100644 --- a/SYCL/Matrix/joint_matrix_tensorcores_legacy.cpp +++ b/SYCL/Matrix/joint_matrix_tensorcores_legacy.cpp @@ -11,7 +11,7 @@ #include using namespace sycl; -using namespace sycl::ext::oneapi::experimental; +using namespace sycl::ext::oneapi; using namespace sycl::ext::oneapi::experimental::matrix; constexpr float bf16_eps = 0.00390625;