Skip to content

Commit

Permalink
exclude call of div() with MKL 2023.1.0
Browse files Browse the repository at this point in the history
  • Loading branch information
antonwolfy committed Jun 12, 2023
1 parent aa09ab5 commit 1b4b671
Show file tree
Hide file tree
Showing 3 changed files with 22 additions and 65 deletions.
8 changes: 4 additions & 4 deletions .github/workflows/conda-package.yml
Original file line number Diff line number Diff line change
Expand Up @@ -194,10 +194,10 @@ jobs:
# TODO: run the whole scope once the issues on CPU are resolved
- name: Run tests
run: |
python -m pytest -q -ra --disable-warnings -vv -s ${{ env.TEST_SCOPE }}
python -m pytest -q -ra --disable-warnings -vv ${{ env.TEST_SCOPE }}
working-directory: ${{ env.tests-path }}
env:
SYCL_QUEUE_THREAD_POOL_SIZE: 16
SYCL_QUEUE_THREAD_POOL_SIZE: 6

test_windows:
name: Test ['windows-latest', python='${{ matrix.python }}']
Expand Down Expand Up @@ -333,10 +333,10 @@ jobs:
# TODO: run the whole scope once the issues on CPU are resolved
- name: Run tests
run: |
python -m pytest -q -ra --disable-warnings -vv -s ${{ env.TEST_SCOPE }}
python -m pytest -q -ra --disable-warnings -vv ${{ env.TEST_SCOPE }}
working-directory: ${{ env.tests-path }}
env:
SYCL_QUEUE_THREAD_POOL_SIZE: 16
SYCL_QUEUE_THREAD_POOL_SIZE: 6

upload:
name: Upload ['${{ matrix.os }}', python='${{ matrix.python }}']
Expand Down
13 changes: 0 additions & 13 deletions dpnp/backend/extensions/vm/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -55,9 +55,7 @@ else()
target_compile_options(${python_module_name} PRIVATE
-fno-approx-func
-fno-finite-math-only
-no-ipo
)
target_link_options(${python_module_name} PRIVATE -no-ipo)
endif()

target_link_options(${python_module_name} PUBLIC -fsycl-device-code-split=per_kernel)
Expand All @@ -72,17 +70,6 @@ endif()

target_link_libraries(${python_module_name} PUBLIC MKL::MKL_DPCPP)

target_link_libraries(${python_module_name} PUBLIC oneDPL)

if (UNIX)
# needed for STL headers with GCC < 11
target_compile_definitions(${python_module_name} PUBLIC _GLIBCXX_USE_TBB_PAR_BACKEND=0)
endif()

target_compile_definitions(${python_module_name} PUBLIC PSTL_USE_PARALLEL_POLICIES=0)
# work-around for Windows at exit crash with predefined policies
target_compile_definitions(${python_module_name} PUBLIC ONEDPL_USE_PREDEFINED_POLICIES=0)

install(TARGETS ${python_module_name}
DESTINATION "dpnp/backend/extensions/vm"
)
66 changes: 18 additions & 48 deletions dpnp/backend/extensions/vm/div.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,46 +64,16 @@ static sycl::event div_impl(sycl::queue exec_q,
{
type_utils::validate_type_for_device<T>(exec_q);

std::cerr << "enter div_impl" << std::endl;
const T* a = reinterpret_cast<const T*>(in_a);
const T* b = reinterpret_cast<const T*>(in_b);
T* y = reinterpret_cast<T*>(out_y);

const T* _a = reinterpret_cast<const T*>(in_a);
const T* _b = reinterpret_cast<const T*>(in_b);
T* _y = reinterpret_cast<T*>(out_y);

std::cerr << "casting is done" << std::endl;

T* a = sycl::malloc_device<T>(n, exec_q);
T* b = sycl::malloc_device<T>(n, exec_q);
T* y = sycl::malloc_device<T>(n, exec_q);

std::cerr << "malloc is done" << std::endl;

exec_q.copy(_a, a, n).wait();
exec_q.copy(_b, b, n).wait();
exec_q.copy(_y, y, n).wait();

std::cerr << "copy is done" << std::endl;

sycl::event ev = mkl_vm::div(exec_q,
return mkl_vm::div(exec_q,
n, // number of elements to be calculated
a, // pointer `a` containing 1st input vector of size n
b, // pointer `b` containing 2nd input vector of size n
y, // pointer `y` to the output vector of size n
depends);
ev.wait();

std::cerr << "div is done" << std::endl;

exec_q.copy(y, _y, n).wait();

std::cerr << "copy is done" << std::endl;

sycl::free(a, exec_q);
sycl::free(b, exec_q);
sycl::free(y, exec_q);

std::cerr << "leaving div_impl" << std::endl;
return sycl::event();
}

std::pair<sycl::event, sycl::event> div(sycl::queue exec_q,
Expand Down Expand Up @@ -205,20 +175,9 @@ std::pair<sycl::event, sycl::event> div(sycl::queue exec_q,
throw py::value_error("No div implementation defined");
}
sycl::event sum_ev = div_fn(exec_q, src_nelems, src1_data, src2_data, dst_data, depends);
// sum_ev.wait();

// int* dummy = sycl::malloc_device<int>(1, exec_q);
// sycl::event cleanup_ev = exec_q.submit([&](sycl::handler& cgh) {
// // cgh.depends_on(sum_ev);
// auto ctx = exec_q.get_context();
// cgh.host_task([dummy, ctx]() {
// // dummy host task to pass into keep_args_alive
// sycl::free(dummy, ctx);
// });
// });

// sycl::event ht_ev = dpctl::utils::keep_args_alive(exec_q, {src1, src2, dst}, {sum_ev});
// return std::make_pair(ht_ev, sum_ev);

sycl::event ht_ev = dpctl::utils::keep_args_alive(exec_q, {src1, src2, dst}, {sum_ev});
return std::make_pair(ht_ev, sum_ev);
return std::make_pair(sycl::event(), sycl::event());
}

Expand All @@ -227,6 +186,7 @@ bool can_call_div(sycl::queue exec_q,
dpctl::tensor::usm_ndarray src2,
dpctl::tensor::usm_ndarray dst)
{
#if INTEL_MKL_VERSION >= 20230002
// check type_nums
int src1_typenum = src1.get_typenum();
int src2_typenum = src2.get_typenum();
Expand Down Expand Up @@ -325,6 +285,16 @@ bool can_call_div(sycl::queue exec_q,
return false;
}
return true;
#else
// In OneMKL 2023.1.0 the call of oneapi::mkl::vm::div() is going to dead lock
// inside ~usm_wrapper_to_host()->{...; q_->wait_and_throw(); ...}

(void)exec_q;
(void)src1;
(void)src2;
(void)dst;
return false;
#endif // INTEL_MKL_VERSION >= 20230002
}

template <typename fnT, typename T>
Expand Down

0 comments on commit 1b4b671

Please sign in to comment.