Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,10 @@ if(ENABLE_MKLCPU_BACKEND
OR ENABLE_CURAND_BACKEND)
list(APPEND DOMAINS_LIST "rng")
endif()
if(ENABLE_MKLGPU_BACKEND
OR ENABLE_MKLCPU_BACKEND)
list(APPEND DOMAINS_LIST "dft")
endif()

# Define required CXX compilers before project
if(CMAKE_CXX_COMPILER OR NOT ONEMKL_SYCL_IMPLEMENTATION STREQUAL "dpc++")
Expand Down
31 changes: 31 additions & 0 deletions examples/dft/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
#===============================================================================
# Copyright 2022 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions
# and limitations under the License.
#
#
# SPDX-License-Identifier: Apache-2.0
#===============================================================================

# Note: compile-time example uses both MKLCPU and CURAND backends, therefore
# cmake in the sub-directory will only build it if CURAND backend is enabled
add_subdirectory(compile_time_dispatching)

# Note: compile-time example uses both MKLCPU and CUSOLVER backends, therefore
# cmake in the sub-directory will only build it if CUSOLVER backend is enabled
# add_subdirectory(compile_time_dispatching)

# runtime compilation is only possible with dynamic libraries
# if (BUILD_SHARED_LIBS)
# add_subdirectory(run_time_dispatching)
# endif()
49 changes: 49 additions & 0 deletions examples/dft/compile_time_dispatching/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
#===============================================================================
# Copyright 2022 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions
# and limitations under the License.
#
#
# SPDX-License-Identifier: Apache-2.0
#===============================================================================

#Build object from all sources
set(DFTI_CT_SOURCES "")
if(ENABLE_MKLCPU_BACKEND)
list(APPEND DFTI_CT_SOURCES "complex_fwd_usm_mklcpu")
endif()

if(domain STREQUAL "dft" AND ENABLE_MKLCPU_BACKEND)
find_library(OPENCL_LIBRARY NAMES OpenCL)
message(STATUS "Found OpenCL: ${OPENCL_LIBRARY}")
endif()

foreach(dfti_ct_sources ${DFTI_CT_SOURCES})
add_executable(example_${domain}_${dfti_ct_sources} ${dfti_ct_sources}.cpp)
target_include_directories(example_${domain}_${dfti_ct_sources}
PUBLIC ${PROJECT_SOURCE_DIR}/examples/include
PUBLIC ${PROJECT_SOURCE_DIR}/include
PUBLIC ${CMAKE_BINARY_DIR}/bin
)
if(domain STREQUAL "dft" AND ENABLE_MKLCPU_BACKEND)
add_dependencies(example_${domain}_${dfti_ct_sources} onemkl_${domain}_mklcpu)
list(APPEND ONEMKL_LIBRARIES_${domain} onemkl_${domain}_mklcpu)
target_link_libraries(example_${domain}_${dfti_ct_sources} PUBLIC ${OPENCL_LIBRARY})
endif()
target_link_libraries(example_${domain}_${dfti_ct_sources} PUBLIC
${ONEMKL_LIBRARIES_${domain}}
ONEMKL::SYCL::SYCL
)
# Register example as ctest
add_test(NAME ${domain}/EXAMPLE/CT/${dfti_ct_sources} COMMAND example_${domain}_${dfti_ct_sources})
endforeach(dfti_ct_sources)
155 changes: 155 additions & 0 deletions examples/dft/compile_time_dispatching/complex_fwd_usm_mklcpu.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
/*******************************************************************************
* Copyright 2022 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions
* and limitations under the License.
*
*
* SPDX-License-Identifier: Apache-2.0
*******************************************************************************/

/*
*
* Content:
* This example demonstrates use of oneapi::mkl::dft::getrf and
* oneapi::mkl::dft::getrs to perform LU factorization and compute
* the solution on both an Intel cpu device and NVIDIA cpu device.
*
* This example demonstrates only single precision (float) data type
* for matrix data
*
*******************************************************************************/

// STL includes
#include <algorithm>
#include <cstdlib>
#include <iostream>
#include <vector>

// oneMKL/SYCL includes
#if __has_include(<sycl/sycl.hpp>)
#include <sycl/sycl.hpp>
#else
#include <CL/sycl.hpp>
#endif
#include "oneapi/mkl.hpp"

// local includes
#include "example_helper.hpp"

void run_getrs_example(const sycl::device& cpu_device) {
// Matrix sizes and leading dimensions
constexpr std::size_t N = 10;
std::int64_t rs[3] {0, N, 1};


// Catch asynchronous exceptions for cpu and cpu
auto cpu_error_handler = [&](sycl::exception_list exceptions) {
for (auto const& e : exceptions) {
try {
std::rethrow_exception(e);
}
catch (sycl::exception const& e) {
// Handle not dft related exceptions that happened during asynchronous call
std::cerr
<< "Caught asynchronous SYCL exception on cpu device during GETRF or GETRS:"
<< std::endl;
std::cerr << "\t" << e.what() << std::endl;
}
}
std::exit(2);
};

std::cout << "DFTI example" << std::endl;
//
// Preparation on cpu
//
sycl::queue cpu_queue(cpu_device, cpu_error_handler);
sycl::context cpu_context = cpu_queue.get_context();
sycl::event cpu_getrf_done;

double *x_usm = (double*) malloc_shared(N*2*sizeof(double), cpu_queue.get_device(), cpu_queue.get_context());

// enabling
oneapi::mkl::dft::descriptor<oneapi::mkl::dft::precision::DOUBLE, oneapi::mkl::dft::domain::COMPLEX> desc(N);
oneapi::mkl::dft::descriptor<oneapi::mkl::dft::precision::DOUBLE, oneapi::mkl::dft::domain::COMPLEX> desc_vector({N,N});
desc.set_value(oneapi::mkl::dft::config_param::BACKWARD_SCALE, (double)(1.0/N));
desc.set_value(oneapi::mkl::dft::config_param::NUMBER_OF_TRANSFORMS, 4);
desc_vector.set_value(oneapi::mkl::dft::config_param::INPUT_STRIDES, rs);
desc.set_value(oneapi::mkl::dft::config_param::FWD_DISTANCE, N);
desc.set_value(oneapi::mkl::dft::config_param::BWD_DISTANCE, N);
desc.set_value(oneapi::mkl::dft::config_param::PLACEMENT, oneapi::mkl::dft::config_value::NOT_INPLACE);
// [compile time] desc.commit(oneapi::mkl::backend_selector<oneapi::mkl::backend::mklcpu>{ cpu_queue });
// [run time] desc.commit(cpu_queue);
// oneapi::mkl::dft::compute_forward(desc, x_usm);
}

//
// Description of example setup, apis used and supported floating point type precisions
//

void print_example_banner() {
std::cout << "" << std::endl;
std::cout << "########################################################################"
<< std::endl;
std::cout
<< "# DFTI complex in-place forward transform for USM/Buffer API's example: "
<< std::endl;
std::cout << "# " << std::endl;
std::cout << "# Using APIs:" << std::endl;
std::cout << "# USM/BUffer forward complex in-place" << std::endl;
std::cout << "# " << std::endl;
std::cout << "# Using single precision (float) data type" << std::endl;
std::cout << "# " << std::endl;
std::cout << "# Device will be selected during runtime." << std::endl;
std::cout << "# The environment variable SYCL_DEVICE_FILTER can be used to specify"
<< std::endl;
std::cout << "# Using single precision (float) data type" << std::endl;
std::cout << "# " << std::endl;
std::cout << "# Running on both Intel cpu and NVIDIA cpu devices" << std::endl;
std::cout << "# " << std::endl;
std::cout << "########################################################################"
<< std::endl;
std::cout << std::endl;
}

//
// Main entry point for example.
//
int main(int argc, char** argv) {
print_example_banner();

try {
sycl::device cpu_dev((sycl::cpu_selector()));
std::cout << "Running DFT Complex forward inplace USM example" << std::endl;
std::cout << "Running with single precision real data type on:" << std::endl;
std::cout << "\tcpu device :" << cpu_dev.get_info<sycl::info::device::name>() << std::endl;

run_getrs_example(cpu_dev);
std::cout << "DFT Complex USM example ran OK on MKLcpu" << std::endl;
}
catch (sycl::exception const& e) {
// Handle not dft related exceptions that happened during synchronous call
std::cerr << "Caught synchronous SYCL exception:" << std::endl;
std::cerr << "\t" << e.what() << std::endl;
std::cerr << "\tSYCL error code: " << e.code().value() << std::endl;
return 1;
}
catch (std::exception const& e) {
// Handle not SYCL related exceptions that happened during synchronous call
std::cerr << "Caught synchronous std::exception:" << std::endl;
std::cerr << "\t" << e.what() << std::endl;
return 1;
}

return 0;
}
67 changes: 67 additions & 0 deletions examples/dft/run_time_dispatching/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
#===============================================================================
# Copyright 2022 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions
# and limitations under the License.
#
#
# SPDX-License-Identifier: Apache-2.0
#===============================================================================

# NOTE: user needs to set env var SYCL_DEVICE_FILTER to use runtime example (no need to specify backend when building with CMake)

# Build object from all example sources
set(DFT_RT_SOURCES "complex_fwd_usm")

# Set up for the right backend for run-time dispatching examples
# If users build more than one backend (i.e. mklcpu and mklgpu, or mklcpu and CUDA), they may need to
# overwrite SYCL_DEVICE_FILTER in their environment to run on the desired backend
set(DEVICE_FILTERS "")
if(ENABLE_MKLCPU_BACKEND)
list(APPEND DEVICE_FILTERS "cpu")
endif()
# RNG only supports mklcpu backend on Windows
if(UNIX AND ENABLE_MKLGPU_BACKEND)
list(APPEND DEVICE_FILTERS "gpu")
endif()

message(STATUS "SYCL_DEVICE_FILTER will be set to the following value(s): [${DEVICE_FILTERS}] for run-time dispatching examples")

foreach(dft_rt_sources ${DFT_RT_SOURCES})
add_executable(example_${domain}_${dft_rt_sources} ${dft_rt_sources}.cpp)
target_include_directories(example_${domain}_${dft_rt_sources}
PUBLIC ${PROJECT_SOURCE_DIR}/examples/include
PUBLIC ${PROJECT_SOURCE_DIR}/include
PUBLIC ${CMAKE_BINARY_DIR}/bin
)

add_dependencies(example_${domain}_${dft_rt_sources} onemkl)

if (USE_ADD_SYCL_TO_TARGET_INTEGRATION)
add_sycl_to_target(TARGET example_${domain}_${dft_rt_sources} SOURCES ${DFT_RT_SOURCES})
endif()

target_link_libraries(example_${domain}_${dft_rt_sources} PUBLIC
onemkl
ONEMKL::SYCL::SYCL
${CMAKE_DL_LIBS}
)

# Register example as ctest
foreach(device_filter ${DEVICE_FILTERS})
add_test(NAME ${domain}/EXAMPLE/RT/${dft_rt_sources}/${device_filter} COMMAND example_${domain}_${dft_rt_sources})
set_property(TEST ${domain}/EXAMPLE/RT/${dft_rt_sources}/${device_filter} PROPERTY
ENVIRONMENT LD_LIBRARY_PATH=${CMAKE_BINARY_DIR}/lib:$ENV{LD_LIBRARY_PATH}
ENVIRONMENT SYCL_DEVICE_FILTER=${device_filter})
endforeach(device_filter)

endforeach()
Loading