Skip to content

[SYCL][CUDA] Windows and MSVC support for CUDA backend #4345

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Sep 13, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 34 additions & 14 deletions clang/lib/Driver/ToolChains/Cuda.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,9 @@ CudaInstallationDetector::CudaInstallationDetector(
SmallVector<Candidate, 4> Candidates;

// In decreasing order so we prefer newer versions to older versions.
std::initializer_list<const char *> Versions = {"8.0", "7.5", "7.0"};
std::initializer_list<const char *> Versions = {
"11.4", "11.3", "11.2", "11.1", "10.2", "10.1", "10.0",
"9.2", "9.1", "9.0", "8.0", "7.5", "7.0"};
auto &FS = D.getVFS();

if (Args.hasArg(clang::driver::options::OPT_cuda_path_EQ)) {
Expand Down Expand Up @@ -187,18 +189,29 @@ CudaInstallationDetector::CudaInstallationDetector(
if (CheckLibDevice && !FS.exists(LibDevicePath))
continue;

// On Linux, we have both lib and lib64 directories, and we need to choose
// based on our triple. On MacOS, we have only a lib directory.
//
// It's sufficient for our purposes to be flexible: If both lib and lib64
// exist, we choose whichever one matches our triple. Otherwise, if only
// lib exists, we use it.
if (HostTriple.isArch64Bit() && FS.exists(InstallPath + "/lib64"))
LibPath = InstallPath + "/lib64";
else if (FS.exists(InstallPath + "/lib"))
LibPath = InstallPath + "/lib";
else
continue;
if (HostTriple.isOSWindows()) {
if (HostTriple.isArch64Bit() && FS.exists(InstallPath + "/lib/x64"))
LibPath = InstallPath + "/lib/x64";
else if (FS.exists(InstallPath + "/lib/Win32"))
LibPath = InstallPath + "/lib/Win32";
else if (FS.exists(InstallPath + "/lib"))
LibPath = InstallPath + "/lib";
else
continue;
} else {
// On Linux, we have both lib and lib64 directories, and we need to choose
// based on our triple. On MacOS, we have only a lib directory.
//
// It's sufficient for our purposes to be flexible: If both lib and lib64
// exist, we choose whichever one matches our triple. Otherwise, if only
// lib exists, we use it.
if (HostTriple.isArch64Bit() && FS.exists(InstallPath + "/lib64"))
LibPath = InstallPath + "/lib64";
else if (FS.exists(InstallPath + "/lib"))
LibPath = InstallPath + "/lib";
else
continue;
}

Version = CudaVersion::UNKNOWN;
if (auto CudaHFile = FS.getBufferForFile(InstallPath + "/include/cuda.h"))
Expand Down Expand Up @@ -703,12 +716,19 @@ void CudaToolChain::addClangTargetOptions(
llvm::sys::path::append(WithInstallPath, Twine("../../../share/clc"));
LibraryPaths.emplace_back(WithInstallPath.c_str());

// Select remangled libclc variant. 64-bit longs default, 32-bit longs on
// Windows
std::string LibSpirvTargetName =
"remangled-l64-signed_char.libspirv-nvptx64--nvidiacl.bc";
if (HostTC.getTriple().isOSWindows())
LibSpirvTargetName =
"remangled-l32-signed_char.libspirv-nvptx64--nvidiacl.bc";

for (StringRef LibraryPath : LibraryPaths) {
SmallString<128> LibSpirvTargetFile(LibraryPath);
llvm::sys::path::append(LibSpirvTargetFile, LibSpirvTargetName);
if (llvm::sys::fs::exists(LibSpirvTargetFile)) {
if (llvm::sys::fs::exists(LibSpirvTargetFile) ||
DriverArgs.hasArg(options::OPT__HASH_HASH_HASH)) {
LibSpirvFile = std::string(LibSpirvTargetFile.str());
break;
}
Expand Down
7 changes: 7 additions & 0 deletions clang/test/Driver/cuda-nvptx-target.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
// RUN: %clang -### -fsycl -fsycl-targets=nvptx64-nvidia-cuda-sycldevice -nocudalib -target x86_64-unknown-windows-msvc %s 2> %t.win.out
// RUN: FileCheck %s --check-prefixes=CHECK-WINDOWS --input-file %t.win.out
// CHECK-WINDOWS: remangled-l32-signed_char.libspirv-nvptx64--nvidiacl.bc
//
// RUN: %clang -### -fsycl -fsycl-targets=nvptx64-nvidia-cuda-sycldevice -nocudalib -target x86_64-unknown-linux-gnu %s 2> %t.lnx.out
// RUN: FileCheck %s --check-prefixes=CHECK-LINUX --input-file %t.lnx.out
// CHECK-LINUX: remangled-l64-signed_char.libspirv-nvptx64--nvidiacl.bc
40 changes: 20 additions & 20 deletions libclc/cmake/modules/HandleInLLVMTree.cmake
Original file line number Diff line number Diff line change
@@ -1,25 +1,25 @@
macro(configure_in_llvm_tree)
set(LLVM_CLANG ${LLVM_RUNTIME_OUTPUT_INTDIR}/clang)
set(LLVM_AS ${LLVM_RUNTIME_OUTPUT_INTDIR}/llvm-as)
set(LLVM_LINK ${LLVM_RUNTIME_OUTPUT_INTDIR}/llvm-link)
set(LLVM_OPT ${LLVM_RUNTIME_OUTPUT_INTDIR}/opt)
set(LIBCLC_REMANGLER ${LLVM_RUNTIME_OUTPUT_INTDIR}/libclc-remangler)
set(LLVM_CLANG ${LLVM_RUNTIME_OUTPUT_INTDIR}/clang${CMAKE_EXECUTABLE_SUFFIX})
set(LLVM_AS ${LLVM_RUNTIME_OUTPUT_INTDIR}/llvm-as${CMAKE_EXECUTABLE_SUFFIX})
set(LLVM_LINK ${LLVM_RUNTIME_OUTPUT_INTDIR}/llvm-link${CMAKE_EXECUTABLE_SUFFIX})
set(LLVM_OPT ${LLVM_RUNTIME_OUTPUT_INTDIR}/opt${CMAKE_EXECUTABLE_SUFFIX})
set(LIBCLC_REMANGLER ${LLVM_RUNTIME_OUTPUT_INTDIR}/libclc-remangler${CMAKE_EXECUTABLE_SUFFIX})

if (NOT EXISTS ${LLVM_RUNTIME_OUTPUT_INTDIR}/clang)
file(WRITE ${LLVM_RUNTIME_OUTPUT_INTDIR}/clang "" )
endif (NOT EXISTS ${LLVM_RUNTIME_OUTPUT_INTDIR}/clang)
if (NOT EXISTS ${LLVM_RUNTIME_OUTPUT_INTDIR}/llvm-as)
file(WRITE ${LLVM_RUNTIME_OUTPUT_INTDIR}/llvm-as "" )
endif (NOT EXISTS ${LLVM_RUNTIME_OUTPUT_INTDIR}/llvm-as)
if (NOT EXISTS ${LLVM_RUNTIME_OUTPUT_INTDIR}/llvm-link)
file(WRITE ${LLVM_RUNTIME_OUTPUT_INTDIR}/llvm-link "" )
endif (NOT EXISTS ${LLVM_RUNTIME_OUTPUT_INTDIR}/llvm-link)
if (NOT EXISTS ${LLVM_RUNTIME_OUTPUT_INTDIR}/opt)
file(WRITE ${LLVM_RUNTIME_OUTPUT_INTDIR}/opt "" )
endif (NOT EXISTS ${LLVM_RUNTIME_OUTPUT_INTDIR}/opt)
if (NOT EXISTS ${LLVM_RUNTIME_OUTPUT_INTDIR}/libclc-remangler)
file(WRITE ${LLVM_RUNTIME_OUTPUT_INTDIR}/libclc-remangler "" )
endif (NOT EXISTS ${LLVM_RUNTIME_OUTPUT_INTDIR}/libclc-remangler)
if (NOT EXISTS ${LLVM_RUNTIME_OUTPUT_INTDIR}/clang${CMAKE_EXECUTABLE_SUFFIX})
file(WRITE ${LLVM_RUNTIME_OUTPUT_INTDIR}/clang${CMAKE_EXECUTABLE_SUFFIX} "" )
endif (NOT EXISTS ${LLVM_RUNTIME_OUTPUT_INTDIR}/clang${CMAKE_EXECUTABLE_SUFFIX})
if (NOT EXISTS ${LLVM_RUNTIME_OUTPUT_INTDIR}/llvm-as${CMAKE_EXECUTABLE_SUFFIX})
file(WRITE ${LLVM_RUNTIME_OUTPUT_INTDIR}/llvm-as${CMAKE_EXECUTABLE_SUFFIX} "" )
endif (NOT EXISTS ${LLVM_RUNTIME_OUTPUT_INTDIR}/llvm-as${CMAKE_EXECUTABLE_SUFFIX})
if (NOT EXISTS ${LLVM_RUNTIME_OUTPUT_INTDIR}/llvm-link${CMAKE_EXECUTABLE_SUFFIX})
file(WRITE ${LLVM_RUNTIME_OUTPUT_INTDIR}/llvm-link${CMAKE_EXECUTABLE_SUFFIX} "" )
endif (NOT EXISTS ${LLVM_RUNTIME_OUTPUT_INTDIR}/llvm-link${CMAKE_EXECUTABLE_SUFFIX})
if (NOT EXISTS ${LLVM_RUNTIME_OUTPUT_INTDIR}/opt${CMAKE_EXECUTABLE_SUFFIX})
file(WRITE ${LLVM_RUNTIME_OUTPUT_INTDIR}/opt${CMAKE_EXECUTABLE_SUFFIX} "" )
endif (NOT EXISTS ${LLVM_RUNTIME_OUTPUT_INTDIR}/opt${CMAKE_EXECUTABLE_SUFFIX})
if (NOT EXISTS ${LLVM_RUNTIME_OUTPUT_INTDIR}/libclc-remangler${CMAKE_EXECUTABLE_SUFFIX})
file(WRITE ${LLVM_RUNTIME_OUTPUT_INTDIR}/libclc-remangler${CMAKE_EXECUTABLE_SUFFIX} "" )
endif (NOT EXISTS ${LLVM_RUNTIME_OUTPUT_INTDIR}/libclc-remangler${CMAKE_EXECUTABLE_SUFFIX})

# Assume all works well
# We can't test the compilers as they haven't been built yet
Expand Down
12 changes: 12 additions & 0 deletions libclc/utils/prepare-builtins.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,18 @@ int main(int argc, char **argv) {
if (NamedMDNode *OCLVersion = M->getNamedMetadata("opencl.ocl.version"))
M->eraseNamedMetadata(OCLVersion);

// wchar_size flag can cause a mismatch between libclc libraries and
// modules using them. Since wchar is not used by libclc we drop the flag
if (M->getModuleFlag("wchar_size")) {
SmallVector<Module::ModuleFlagEntry, 4> ModuleFlags;
M->getModuleFlagsMetadata(ModuleFlags);
M->getModuleFlagsMetadata()->clearOperands();
for (const Module::ModuleFlagEntry ModuleFlag : ModuleFlags)
if (ModuleFlag.Key->getString() != "wchar_size")
M->addModuleFlag(ModuleFlag.Behavior, ModuleFlag.Key->getString(),
ModuleFlag.Val);
}

// Set linkage of every external definition to linkonce_odr.
for (Module::iterator i = M->begin(), e = M->end(); i != e; ++i) {
if (!i->isDeclaration() && i->getLinkage() == GlobalValue::ExternalLinkage)
Expand Down
2 changes: 1 addition & 1 deletion sycl/include/CL/sycl/detail/pi.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,13 @@
#pragma once

#include <CL/sycl/backend_types.hpp>
#include <CL/sycl/detail/common.hpp>
#include <CL/sycl/detail/export.hpp>
#include <CL/sycl/detail/os_util.hpp>
#include <CL/sycl/detail/pi.h>

#include <cassert>
#include <cstdint>
#include <memory>
#include <sstream>
#include <string>
#include <vector>
Expand Down
36 changes: 31 additions & 5 deletions sycl/plugins/cuda/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,19 @@ find_package(CUDA 10.1 REQUIRED)
# Make imported library global to use it within the project.
add_library(cudadrv SHARED IMPORTED GLOBAL)

set_target_properties(
cudadrv PROPERTIES
IMPORTED_LOCATION ${CUDA_CUDA_LIBRARY}
INTERFACE_INCLUDE_DIRECTORIES ${CUDA_INCLUDE_DIRS}
)
if (WIN32)
set_target_properties(
cudadrv PROPERTIES
IMPORTED_IMPLIB ${CUDA_CUDA_LIBRARY}
INTERFACE_INCLUDE_DIRECTORIES ${CUDA_INCLUDE_DIRS}
)
else()
set_target_properties(
cudadrv PROPERTIES
IMPORTED_LOCATION ${CUDA_CUDA_LIBRARY}
INTERFACE_INCLUDE_DIRECTORIES ${CUDA_INCLUDE_DIRS}
)
endif()

add_library(pi_cuda SHARED
"${sycl_inc_dir}/CL/sycl/detail/pi.h"
Expand All @@ -37,6 +45,24 @@ target_link_libraries(pi_cuda
cudadrv
)

if (MSVC)
# by defining __SYCL_BUILD_SYCL_DLL, we can use __declspec(dllexport)
# which are individually tagged for all pi* symbols in pi.h
target_compile_definitions(pi_cuda PRIVATE __SYCL_BUILD_SYCL_DLL)
else()
# we set the visibility of all symbols 'hidden' by default.
# In pi.h file, we set exported symbols with visibility==default individually
target_compile_options(pi_cuda PUBLIC -fvisibility=hidden)

# This script file is used to allow exporting pi* symbols only.
# All other symbols are regarded as local (hidden)
set(linker_script "${CMAKE_CURRENT_SOURCE_DIR}/../ld-version-script.txt")

# Filter symbols based on the scope defined in the script file,
# and export pi* function symbols in the library.
target_link_libraries(pi_cuda PRIVATE "-Wl,--version-script=${linker_script}")
endif()

add_common_options(pi_cuda)

install(TARGETS pi_cuda
Expand Down
12 changes: 8 additions & 4 deletions sycl/plugins/cuda/pi_cuda.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -686,10 +686,10 @@ pi_result cuda_piPlatformsGet(pi_uint32 num_entries, pi_platform *platforms,
static pi_uint32 numPlatforms = 1;
static _pi_platform platformId;

if (num_entries == 0 and platforms != nullptr) {
if (num_entries == 0 && platforms != nullptr) {
return PI_INVALID_VALUE;
}
if (platforms == nullptr and num_platforms == nullptr) {
if (platforms == nullptr && num_platforms == nullptr) {
return PI_INVALID_VALUE;
}

Expand Down Expand Up @@ -4509,7 +4509,7 @@ pi_result cuda_piextUSMFree(pi_context context, void *ptr) {
CU_POINTER_ATTRIBUTE_MEMORY_TYPE};
result = PI_CHECK_ERROR(cuPointerGetAttributes(
2, attributes, attribute_values, (CUdeviceptr)ptr));
assert(type == CU_MEMORYTYPE_DEVICE or type == CU_MEMORYTYPE_HOST);
assert(type == CU_MEMORYTYPE_DEVICE || type == CU_MEMORYTYPE_HOST);
if (is_managed || type == CU_MEMORYTYPE_DEVICE) {
// Memory allocated with cuMemAlloc and cuMemAllocManaged must be freed
// with cuMemFree
Expand Down Expand Up @@ -4713,7 +4713,7 @@ pi_result cuda_piextUSMGetMemAllocInfo(pi_context context, const void *ptr,
}
result = PI_CHECK_ERROR(cuPointerGetAttribute(
&value, CU_POINTER_ATTRIBUTE_MEMORY_TYPE, (CUdeviceptr)ptr));
assert(value == CU_MEMORYTYPE_DEVICE or value == CU_MEMORYTYPE_HOST);
assert(value == CU_MEMORYTYPE_DEVICE || value == CU_MEMORYTYPE_HOST);
if (value == CU_MEMORYTYPE_DEVICE) {
// pointer to device memory
return getInfo(param_value_size, param_value, param_value_size_ret,
Expand All @@ -4725,7 +4725,11 @@ pi_result cuda_piextUSMGetMemAllocInfo(pi_context context, const void *ptr,
PI_MEM_TYPE_HOST);
}
// should never get here
#ifdef _MSC_VER
__assume(0);
#else
__builtin_unreachable();
#endif
return getInfo(param_value_size, param_value, param_value_size_ret,
PI_MEM_TYPE_UNKNOWN);
}
Expand Down
8 changes: 5 additions & 3 deletions sycl/unittests/pi/TestGetPlugin.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,11 @@
#include <algorithm>
#include <detail/plugin.hpp>
#include <functional>
#include <optional>

namespace pi {
inline cl::sycl::detail::plugin *initializeAndGet(cl::sycl::backend backend) {
inline std::optional<cl::sycl::detail::plugin>
initializeAndGet(cl::sycl::backend backend) {
auto plugins = cl::sycl::detail::pi::initialize();
auto it = std::find_if(plugins.begin(), plugins.end(),
[=](cl::sycl::detail::plugin p) -> bool {
Expand All @@ -20,9 +22,9 @@ inline cl::sycl::detail::plugin *initializeAndGet(cl::sycl::backend backend) {
std::string msg = GetBackendString(backend);
msg += " PI plugin not found!";
std::cerr << "Warning: " << msg << " Tests using it will be skipped.\n";
return nullptr;
return std::nullopt;
}
return &*it;
return std::optional<cl::sycl::detail::plugin>(*it);
}

inline std::vector<cl::sycl::detail::plugin> initializeAndRemoveInvalid() {
Expand Down
4 changes: 2 additions & 2 deletions sycl/unittests/pi/cuda/test_base_objects.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,11 @@ using namespace cl::sycl;

class CudaBaseObjectsTest : public ::testing::Test {
protected:
detail::plugin *plugin = pi::initializeAndGet(backend::cuda);
std::optional<detail::plugin> plugin = pi::initializeAndGet(backend::cuda);

void SetUp() override {
// skip the tests if the CUDA backend is not available
if (!plugin) {
if (!plugin.has_value()) {
GTEST_SKIP();
}
}
Expand Down
6 changes: 3 additions & 3 deletions sycl/unittests/pi/cuda/test_commands.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ using namespace cl::sycl;
struct CudaCommandsTest : public ::testing::Test {

protected:
detail::plugin *plugin = pi::initializeAndGet(backend::cuda);
std::optional<detail::plugin> plugin = pi::initializeAndGet(backend::cuda);

pi_platform platform_;
pi_device device_;
Expand All @@ -30,7 +30,7 @@ struct CudaCommandsTest : public ::testing::Test {

void SetUp() override {
// skip the tests if the CUDA backend is not available
if (!plugin) {
if (!plugin.has_value()) {
GTEST_SKIP();
}

Expand Down Expand Up @@ -65,7 +65,7 @@ struct CudaCommandsTest : public ::testing::Test {
}

void TearDown() override {
if (plugin) {
if (plugin.has_value()) {
plugin->call<detail::PiApiKind::piQueueRelease>(queue_);
plugin->call<detail::PiApiKind::piContextRelease>(context_);
}
Expand Down
4 changes: 2 additions & 2 deletions sycl/unittests/pi/cuda/test_contexts.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,14 +25,14 @@ using namespace cl::sycl;
struct CudaContextsTest : public ::testing::Test {

protected:
detail::plugin *plugin = pi::initializeAndGet(backend::cuda);
std::optional<detail::plugin> plugin = pi::initializeAndGet(backend::cuda);

pi_platform platform_;
pi_device device_;

void SetUp() override {
// skip the tests if the CUDA backend is not available
if (!plugin) {
if (!plugin.has_value()) {
GTEST_SKIP();
}

Expand Down
6 changes: 3 additions & 3 deletions sycl/unittests/pi/cuda/test_device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,15 +21,15 @@ using namespace cl::sycl;
struct CudaDeviceTests : public ::testing::Test {

protected:
detail::plugin *plugin = pi::initializeAndGet(backend::cuda);
std::optional<detail::plugin> plugin = pi::initializeAndGet(backend::cuda);

pi_platform platform_;
pi_device device_;
pi_context context_;

void SetUp() override {
// skip the tests if the CUDA backend is not available
if (!plugin) {
if (!plugin.has_value()) {
GTEST_SKIP();
}

Expand All @@ -56,7 +56,7 @@ struct CudaDeviceTests : public ::testing::Test {
}

void TearDown() override {
if (plugin) {
if (plugin.has_value()) {
plugin->call<detail::PiApiKind::piDeviceRelease>(device_);
plugin->call<detail::PiApiKind::piContextRelease>(context_);
}
Expand Down
6 changes: 3 additions & 3 deletions sycl/unittests/pi/cuda/test_kernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,15 +24,15 @@ using namespace cl::sycl;
struct CudaKernelsTest : public ::testing::Test {

protected:
detail::plugin *plugin = pi::initializeAndGet(backend::cuda);
std::optional<detail::plugin> plugin = pi::initializeAndGet(backend::cuda);
pi_platform platform_;
pi_device device_;
pi_context context_;
pi_queue queue_;

void SetUp() override {
// skip the tests if the CUDA backend is not available
if (!plugin) {
if (!plugin.has_value()) {
GTEST_SKIP();
}

Expand Down Expand Up @@ -65,7 +65,7 @@ struct CudaKernelsTest : public ::testing::Test {
}

void TearDown() override {
if (plugin) {
if (plugin.has_value()) {
plugin->call<detail::PiApiKind::piDeviceRelease>(device_);
plugin->call<detail::PiApiKind::piQueueRelease>(queue_);
plugin->call<detail::PiApiKind::piContextRelease>(context_);
Expand Down
Loading