Skip to content

Bolt 9bb68d #272

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 18 commits into from
Closed
Show file tree
Hide file tree
Changes from 4 commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
9b3ef19
[BOLT] add BOLT repository
shintaro-iwasaki Oct 27, 2020
bbbd39b
[BOLT] support BOLT
shintaro-iwasaki Oct 27, 2020
3f39329
[BOLT] Merge commit 'a6f6c94b6ac996a3ca356d3ad2d4febb6be786ef' into b…
shintaro-iwasaki Oct 27, 2020
2cafe2c
Merge commit 'f2400f024d323bc9000a4c126f2008a8b58fb4a0' into bolt-a6f6c9
shintaro-iwasaki Oct 27, 2020
6e6fe40
Merge pull request #1 from shintaro-iwasaki/bolt-a6f6c9
shintaro-iwasaki Oct 28, 2020
258e0f7
[BOLT] Merge commit 'c3d5df7a7e33ed45011489f1c6aaf47fe9216274' into b…
shintaro-iwasaki Oct 28, 2020
76d428f
[BOLT/test] mark some "untied" tests as unsupported with clang
shintaro-iwasaki Oct 28, 2020
bfb5528
Merge commit '6aa7228a629d81af78d4f701b7defb701f4b9283' into bolt-c3d5df
shintaro-iwasaki Oct 28, 2020
1173d08
Merge pull request #2 from shintaro-iwasaki/bolt-c3d5df
shintaro-iwasaki Oct 28, 2020
29437d3
[BOLT] Merge commit '1eaffd515aa517071625d9d8eecd45e91515444e' into b…
shintaro-iwasaki Oct 28, 2020
a1ffa53
[BOLT/test] mark OpenMP 5.0 tests as unsupported
shintaro-iwasaki Oct 28, 2020
54ca9d1
Merge commit '23419bfd1c8f26617bda47e6d4732dcbfe0c09a3' into bolt-1eaffd
shintaro-iwasaki Oct 28, 2020
9334d9b
Merge pull request #3 from shintaro-iwasaki/bolt-1eaffd
shintaro-iwasaki Oct 28, 2020
e517158
[BOLT] Merge commit 'ddec6316d319181792db8593d1a081013933e501' into b…
shintaro-iwasaki Oct 28, 2020
c0aa918
Merge commit '90a9f97cbda3bef63d9866d300b73b8ccf65c7f5' into bolt-ddec63
shintaro-iwasaki Oct 28, 2020
c2121e4
Merge pull request #4 from shintaro-iwasaki/bolt-ddec63
shintaro-iwasaki Oct 28, 2020
554b44f
Merge commit '24d0ef0f503f8230f115df049ee0ccd067f0881b' into bolt-9bb68d
shintaro-iwasaki Nov 13, 2020
3f9626b
[BOLT] Merge commit 'ddec6316d319181792db8593d1a081013933e501' into b…
shintaro-iwasaki Nov 13, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions bolt/libomptarget/plugins/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ endif()
endmacro()

add_subdirectory(aarch64)
add_subdirectory(amdgpu)
add_subdirectory(cuda)
add_subdirectory(ppc64)
add_subdirectory(ppc64le)
Expand Down
89 changes: 89 additions & 0 deletions bolt/libomptarget/plugins/amdgpu/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
##===----------------------------------------------------------------------===##
#
# The LLVM Compiler Infrastructure
#
# This file is dual licensed under the MIT and the University of Illinois Open
# Source Licenses. See LICENSE.txt for details.
#
##===----------------------------------------------------------------------===##
#
# Build a plugin for an AMDGPU machine if available.
#
##===----------------------------------------------------------------------===##

################################################################################

# as of rocm-3.7, hsa is installed with cmake packages and kmt is found via hsa
find_package(hsa-runtime64 QUIET 1.2.0 HINTS ${CMAKE_INSTALL_PREFIX} PATHS /opt/rocm)
if (NOT ${hsa-runtime64_FOUND})
libomptarget_say("Not building HSA plugin: hsa-runtime64 not found")
return()
endif()

if(NOT LIBOMPTARGET_DEP_LIBELF_FOUND)
libomptarget_say("Not building AMDGPU plugin: LIBELF not found")
return()
endif()

if(NOT CMAKE_SYSTEM_PROCESSOR MATCHES "(x86_64)|(ppc64le)|(aarch64)$" AND CMAKE_SYSTEM_NAME MATCHES "Linux")
libomptarget_say("Not building amdgpu plugin: only support amdgpu in Linux x86_64, ppc64le, or aarch64 hosts.")
return()
endif()
libomptarget_say("Building amdgpu offloading plugin")

################################################################################
# Define the suffix for the runtime messaging dumps.
add_definitions(-DTARGET_NAME=AMDGPU)
if(CMAKE_SYSTEM_PROCESSOR MATCHES "(ppc64le)|(aarch64)$")
add_definitions(-DLITTLEENDIAN_CPU=1)
endif()

if(CMAKE_BUILD_TYPE MATCHES Debug)
add_definitions(-DDEBUG)
endif()

include_directories(
${CMAKE_CURRENT_SOURCE_DIR}/impl
)

add_library(bolt-omptarget.rtl.amdgpu SHARED
impl/atmi.cpp
impl/atmi_interop_hsa.cpp
impl/data.cpp
impl/machine.cpp
impl/system.cpp
impl/utils.cpp
impl/msgpack.cpp
src/rtl.cpp
)

# Install plugin under the lib destination folder.
# When we build for debug, OPENMP_LIBDIR_SUFFIX get set to -debug
install(TARGETS bolt-omptarget.rtl.amdgpu LIBRARY DESTINATION "lib${OPENMP_LIBDIR_SUFFIX}")

# Install aliases
get_target_property(BOLT_LIBOMPTARGET_LIBRARY_DIR bolt-omptarget.rtl.amdgpu LIBRARY_OUTPUT_DIRECTORY)
if(BOLT_LIBOMPTARGET_LIBRARY_DIR)
add_custom_command(TARGET bolt-omptarget.rtl.amdgpu POST_BUILD
COMMAND ${CMAKE_COMMAND} -E create_symlink ${CMAKE_SHARED_LIBRARY_PREFIX}bolt-omptarget.rtl.amdgpu${CMAKE_SHARED_LIBRARY_SUFFIX}
${CMAKE_SHARED_LIBRARY_PREFIX}omptarget.rtl.amdgpu${CMAKE_SHARED_LIBRARY_SUFFIX}
WORKING_DIRECTORY ${BOLT_LIBOMPTARGET_LIBRARY_DIR}
)
endif()
install(CODE "execute_process(COMMAND \"\${CMAKE_COMMAND}\" -E create_symlink \"${CMAKE_SHARED_LIBRARY_PREFIX}bolt-omptarget.rtl.amdgpu${CMAKE_SHARED_LIBRARY_SUFFIX}\"
\"${CMAKE_SHARED_LIBRARY_PREFIX}omptarget.rtl.amdgpu${CMAKE_SHARED_LIBRARY_SUFFIX}\" WORKING_DIRECTORY
\$ENV{DESTDIR}\${CMAKE_INSTALL_PREFIX}/${OPENMP_INSTALL_LIBDIR})")

set_property(TARGET bolt-omptarget.rtl.amdgpu PROPERTY INSTALL_RPATH "$ORIGIN")
target_link_libraries(
bolt-omptarget.rtl.amdgpu
PRIVATE
hsa-runtime64::hsa-runtime64
pthread dl elf
"-Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/../exports"
"-Wl,-z,defs"
)

# Report to the parent scope that we are building a plugin for amdgpu
set(LIBOMPTARGET_SYSTEM_TARGETS "${LIBOMPTARGET_SYSTEM_TARGETS} amdgcn-amd-amdhsa" PARENT_SCOPE)

45 changes: 45 additions & 0 deletions bolt/libomptarget/plugins/amdgpu/impl/atmi.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
/*===--------------------------------------------------------------------------
* ATMI (Asynchronous Task and Memory Interface)
*
* This file is distributed under the MIT License. See LICENSE.txt for details.
*===------------------------------------------------------------------------*/
#include "rt.h"
/*
* Initialize/Finalize
*/
atmi_status_t atmi_init() { return core::Runtime::Initialize(); }

atmi_status_t atmi_finalize() { return core::Runtime::Finalize(); }

/*
* Machine Info
*/
atmi_machine_t *atmi_machine_get_info() {
return core::Runtime::GetMachineInfo();
}

/*
* Modules
*/
atmi_status_t atmi_module_register_from_memory_to_place(
void *module_bytes, size_t module_size, atmi_place_t place,
atmi_status_t (*on_deserialized_data)(void *data, size_t size,
void *cb_state),
void *cb_state) {
return core::Runtime::getInstance().RegisterModuleFromMemory(
module_bytes, module_size, place, on_deserialized_data, cb_state);
}

/*
* Data
*/
atmi_status_t atmi_memcpy(hsa_signal_t sig, void *dest, const void *src,
size_t size) {
return core::Runtime::Memcpy(sig, dest, src, size);
}

atmi_status_t atmi_free(void *ptr) { return core::Runtime::Memfree(ptr); }

atmi_status_t atmi_malloc(void **ptr, size_t size, atmi_mem_place_t place) {
return core::Runtime::Malloc(ptr, size, place);
}
194 changes: 194 additions & 0 deletions bolt/libomptarget/plugins/amdgpu/impl/atmi.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,194 @@
/*===--------------------------------------------------------------------------
* ATMI (Asynchronous Task and Memory Interface)
*
* This file is distributed under the MIT License. See LICENSE.txt for details.
*===------------------------------------------------------------------------*/
#ifndef INCLUDE_ATMI_H_
#define INCLUDE_ATMI_H_

#define ROCM_VERSION_MAJOR 3
#define ROCM_VERSION_MINOR 2

/** \defgroup enumerations Enumerated Types
* @{
*/

/**
* @brief Status codes.
*/
typedef enum atmi_status_t {
/**
* The function has been executed successfully.
*/
ATMI_STATUS_SUCCESS = 0,
/**
* A undocumented error has occurred.
*/
ATMI_STATUS_UNKNOWN = 1,
/**
* A generic error has occurred.
*/
ATMI_STATUS_ERROR = 2,
} atmi_status_t;

/**
* @brief Device Types.
*/
typedef enum atmi_devtype_s {
ATMI_DEVTYPE_CPU = 0x0001,
ATMI_DEVTYPE_iGPU = 0x0010, // Integrated GPU
ATMI_DEVTYPE_dGPU = 0x0100, // Discrete GPU
ATMI_DEVTYPE_GPU = ATMI_DEVTYPE_iGPU | ATMI_DEVTYPE_dGPU, // Any GPU
ATMI_DEVTYPE_ALL = 0x111 // Union of all device types
} atmi_devtype_t;

/**
* @brief Memory Access Type.
*/
typedef enum atmi_memtype_s {
ATMI_MEMTYPE_FINE_GRAINED = 0,
ATMI_MEMTYPE_COARSE_GRAINED = 1,
ATMI_MEMTYPE_ANY
} atmi_memtype_t;

/**
* @brief ATMI Memory Fences for Tasks.
*/
typedef enum atmi_task_fence_scope_s {
/**
* No memory fence applied; external fences have to be applied around the task
* launch/completion.
*/
ATMI_FENCE_SCOPE_NONE = 0,
/**
* The fence is applied to the device.
*/
ATMI_FENCE_SCOPE_DEVICE = 1,
/**
* The fence is applied to the entire system.
*/
ATMI_FENCE_SCOPE_SYSTEM = 2
} atmi_task_fence_scope_t;

/** @} */

/** \defgroup common Common ATMI Structures
* @{
*/

/**
* @brief ATMI Compute Place
*/
typedef struct atmi_place_s {
/**
* The node in a cluster where computation should occur.
* Default is node_id = 0 for local computations.
*/
unsigned int node_id;
/**
* Device type: CPU, GPU or DSP
*/
atmi_devtype_t type;
/**
* The device ordinal number ordered by runtime; -1 for any
*/
int device_id;
} atmi_place_t;

/**
* @brief ATMI Memory Place
*/
typedef struct atmi_mem_place_s {
/**
* The node in a cluster where computation should occur.
* Default is node_id = 0 for local computations.
*/
unsigned int node_id;
/**
* Device type: CPU, GPU or DSP
*/
atmi_devtype_t dev_type;
/**
* The device ordinal number ordered by runtime; -1 for any
*/
int dev_id;
// atmi_memtype_t mem_type; // Fine grained or Coarse grained
/**
* The memory space/region ordinal number ordered by runtime; -1 for any
*/
int mem_id;
} atmi_mem_place_t;

/**
* @brief ATMI Memory Space/region Structure
*/
typedef struct atmi_memory_s {
/**
* Memory capacity
*/
unsigned long int capacity;
/**
* Memory type
*/
atmi_memtype_t type;
} atmi_memory_t;

/**
* @brief ATMI Device Structure
*/
typedef struct atmi_device_s {
/**
* Device type: CPU, GPU or DSP
*/
atmi_devtype_t type;
/**
* Array of memory spaces/regions that are accessible
* from this device.
*/
atmi_memory_t *memories;
} atmi_device_t;

/**
* @brief ATMI Machine Structure
*/
typedef struct atmi_machine_s {
/**
* The number of devices categorized by the device type
*/
unsigned int device_count_by_type[ATMI_DEVTYPE_ALL];
/**
* The device structures categorized by the device type
*/
atmi_device_t *devices_by_type[ATMI_DEVTYPE_ALL];
} atmi_machine_t;

// Below are some helper macros that can be used to setup
// some of the ATMI data structures.
#define ATMI_PLACE_CPU(node, cpu_id) \
{ .node_id = node, .type = ATMI_DEVTYPE_CPU, .device_id = cpu_id }
#define ATMI_PLACE_GPU(node, gpu_id) \
{ .node_id = node, .type = ATMI_DEVTYPE_GPU, .device_id = gpu_id }
#define ATMI_MEM_PLACE_CPU(node, cpu_id) \
{ \
.node_id = node, .dev_type = ATMI_DEVTYPE_CPU, .dev_id = cpu_id, \
.mem_id = -1 \
}
#define ATMI_MEM_PLACE_GPU(node, gpu_id) \
{ \
.node_id = node, .dev_type = ATMI_DEVTYPE_GPU, .dev_id = gpu_id, \
.mem_id = -1 \
}
#define ATMI_MEM_PLACE_CPU_MEM(node, cpu_id, cpu_mem_id) \
{ \
.node_id = node, .dev_type = ATMI_DEVTYPE_CPU, .dev_id = cpu_id, \
.mem_id = cpu_mem_id \
}
#define ATMI_MEM_PLACE_GPU_MEM(node, gpu_id, gpu_mem_id) \
{ \
.node_id = node, .dev_type = ATMI_DEVTYPE_GPU, .dev_id = gpu_id, \
.mem_id = gpu_mem_id \
}
#define ATMI_MEM_PLACE(d_type, d_id, m_id) \
{ .node_id = 0, .dev_type = d_type, .dev_id = d_id, .mem_id = m_id }

#endif // INCLUDE_ATMI_H_
Loading