From 15107afb11210ae264b050b2acdf34ad32b3b7bd Mon Sep 17 00:00:00 2001 From: Tony Gutierrez Date: Wed, 13 Nov 2024 14:52:07 -0800 Subject: [PATCH] rocr: Generalize driver discovery Generalize the driver discovery and move driver-specific functionality to the concrete driver implementations. Currently, this process is tightly coupled to the hsakmt which is GPU and OS specific. Change-Id: Ie1c53fef407a71b5ec4c6eaf3a3ed00871184409 --- libhsakmt/include/hsakmt/hsakmttypes.h | 2 + .../core/driver/kfd/amd_kfd_driver.cpp | 110 +++++- .../core/driver/xdna/amd_xdna_driver.cpp | 43 ++- .../hsa-runtime/core/inc/amd_aie_aql_queue.h | 2 + .../core/inc/amd_available_drivers.h | 53 +++ runtime/hsa-runtime/core/inc/amd_kfd_driver.h | 19 +- .../hsa-runtime/core/inc/amd_xdna_driver.h | 9 +- runtime/hsa-runtime/core/inc/driver.h | 38 ++- runtime/hsa-runtime/core/inc/runtime.h | 7 +- .../core/runtime/amd_aie_aql_queue.cpp | 1 + .../hsa-runtime/core/runtime/amd_topology.cpp | 323 ++++++++---------- runtime/hsa-runtime/core/runtime/runtime.cpp | 10 +- 12 files changed, 397 insertions(+), 220 deletions(-) create mode 100644 runtime/hsa-runtime/core/inc/amd_available_drivers.h diff --git a/libhsakmt/include/hsakmt/hsakmttypes.h b/libhsakmt/include/hsakmt/hsakmttypes.h index 926bdaea1..4d1ce1520 100644 --- a/libhsakmt/include/hsakmt/hsakmttypes.h +++ b/libhsakmt/include/hsakmt/hsakmttypes.h @@ -267,6 +267,8 @@ typedef struct _HsaNodeProperties // e.g a "discrete HSA GPU" HSAuint32 NumFComputeCores; // # of HSA throughtput (= GPU) FCompute cores ("SIMD") present in a node. // This value is 0 if no FCompute cores are present (e.g. pure "CPU node"). + HSAuint32 NumNeuralCores; // # of HSA neural processing units (= AIE) present in a + // node. This value is 0 if there are no NeuralCores. HSAuint32 NumMemoryBanks; // # of discoverable memory bank affinity properties on this "H-NUMA" node. HSAuint32 NumCaches; // # of discoverable cache affinity properties on this "H-NUMA" node. diff --git a/runtime/hsa-runtime/core/driver/kfd/amd_kfd_driver.cpp b/runtime/hsa-runtime/core/driver/kfd/amd_kfd_driver.cpp index ceda2d6aa..e1c41a2bf 100644 --- a/runtime/hsa-runtime/core/driver/kfd/amd_kfd_driver.cpp +++ b/runtime/hsa-runtime/core/driver/kfd/amd_kfd_driver.cpp @@ -42,34 +42,74 @@ #include "core/inc/amd_kfd_driver.h" -#include - #include #include +#include +#include + #include "hsakmt/hsakmt.h" -#include "core/inc/amd_cpu_agent.h" -#include "core/inc/amd_gpu_agent.h" #include "core/inc/amd_memory_region.h" -#include "core/inc/exceptions.h" #include "core/inc/runtime.h" +extern r_debug _amdgpu_r_debug; + namespace rocr { namespace AMD { KfdDriver::KfdDriver(std::string devnode_name) : core::Driver(core::DriverType::KFD, devnode_name) {} -hsa_status_t KfdDriver::Init() { return HSA_STATUS_SUCCESS; } +hsa_status_t KfdDriver::Init() { + HSAKMT_STATUS ret = + hsaKmtRuntimeEnable(&_amdgpu_r_debug, core::Runtime::runtime_singleton_->flag().debug()); + + if (ret != HSAKMT_STATUS_SUCCESS && ret != HSAKMT_STATUS_NOT_SUPPORTED) return HSA_STATUS_ERROR; + + uint32_t caps_mask = 0; + if (hsaKmtGetRuntimeCapabilities(&caps_mask) != HSAKMT_STATUS_SUCCESS) return HSA_STATUS_ERROR; + + core::Runtime::runtime_singleton_->KfdVersion( + ret != HSAKMT_STATUS_NOT_SUPPORTED, + !!(caps_mask & HSA_RUNTIME_ENABLE_CAPS_SUPPORTS_CORE_DUMP_MASK)); + + if (hsaKmtGetVersion(&version_) != HSAKMT_STATUS_SUCCESS) return HSA_STATUS_ERROR; + + if (version_.KernelInterfaceMajorVersion == kfd_version_major_min && + version_.KernelInterfaceMinorVersion < kfd_version_major_min) + return HSA_STATUS_ERROR; + + core::Runtime::runtime_singleton_->KfdVersion(version_); + + if (version_.KernelInterfaceMajorVersion == 1 && version_.KernelInterfaceMinorVersion == 0) + core::g_use_interrupt_wait = false; + + bool xnack_mode = BindXnackMode(); + core::Runtime::runtime_singleton_->XnackEnabled(xnack_mode); + + return HSA_STATUS_SUCCESS; +} + +hsa_status_t KfdDriver::ShutDown() { + HSAKMT_STATUS ret = hsaKmtRuntimeDisable(); + if (ret != HSAKMT_STATUS_SUCCESS) return HSA_STATUS_ERROR; + + ret = hsaKmtReleaseSystemProperties(); + + if (ret != HSAKMT_STATUS_SUCCESS) return HSA_STATUS_ERROR; + + return Close(); +} -hsa_status_t KfdDriver::DiscoverDriver() { - std::unique_ptr kfd_drv(new KfdDriver("/dev/kfd")); +hsa_status_t KfdDriver::DiscoverDriver(std::unique_ptr& driver) { + auto tmp_driver = std::unique_ptr(new KfdDriver("/dev/kfd")); - if (kfd_drv->Open() == HSA_STATUS_SUCCESS) { - core::Runtime::runtime_singleton_->RegisterDriver(kfd_drv); + if (tmp_driver->Open() == HSA_STATUS_SUCCESS) { + driver = std::move(tmp_driver); return HSA_STATUS_SUCCESS; } + return HSA_STATUS_ERROR; } @@ -87,6 +127,28 @@ hsa_status_t KfdDriver::Close() { : HSA_STATUS_ERROR; } +hsa_status_t KfdDriver::GetSystemProperties(HsaSystemProperties& sys_props) const { + if (hsaKmtReleaseSystemProperties() != HSAKMT_STATUS_SUCCESS) return HSA_STATUS_ERROR; + + if (hsaKmtAcquireSystemProperties(&sys_props) != HSAKMT_STATUS_SUCCESS) return HSA_STATUS_ERROR; + + return HSA_STATUS_SUCCESS; +} + +hsa_status_t KfdDriver::GetNodeProperties(HsaNodeProperties& node_props, uint32_t node_id) const { + if (hsaKmtGetNodeProperties(node_id, &node_props) != HSAKMT_STATUS_SUCCESS) + return HSA_STATUS_ERROR; + return HSA_STATUS_SUCCESS; +} + +hsa_status_t KfdDriver::GetEdgeProperties(std::vector& io_link_props, + uint32_t node_id) const { + if (hsaKmtGetNodeIoLinkProperties(node_id, io_link_props.size(), io_link_props.data()) != + HSAKMT_STATUS_SUCCESS) + return HSA_STATUS_ERROR; + return HSA_STATUS_SUCCESS; +} + hsa_status_t KfdDriver::GetAgentProperties(core::Agent &agent) const { return HSA_STATUS_SUCCESS; } @@ -300,5 +362,33 @@ void KfdDriver::MakeKfdMemoryUnresident(const void *mem) { hsaKmtUnmapMemoryToGPU(const_cast(mem)); } +bool KfdDriver::BindXnackMode() { + // Get users' preference for Xnack mode of ROCm platform. + HSAint32 mode = core::Runtime::runtime_singleton_->flag().xnack(); + bool config_xnack = (mode != Flag::XNACK_REQUEST::XNACK_UNCHANGED); + + // Indicate to driver users' preference for Xnack mode + // Call to driver can fail and is a supported feature + HSAKMT_STATUS status = HSAKMT_STATUS_ERROR; + if (config_xnack) { + status = hsaKmtSetXNACKMode(mode); + if (status == HSAKMT_STATUS_SUCCESS) { + return (mode != Flag::XNACK_DISABLE); + } + } + + // Get Xnack mode of devices bound by driver. This could happen + // when a call to SET Xnack mode fails or user has no particular + // preference + status = hsaKmtGetXNACKMode(&mode); + if (status != HSAKMT_STATUS_SUCCESS) { + debug_print( + "KFD does not support xnack mode query.\nROCr must assume " + "xnack is disabled.\n"); + return false; + } + return (mode != Flag::XNACK_DISABLE); +} + } // namespace AMD } // namespace rocr diff --git a/runtime/hsa-runtime/core/driver/xdna/amd_xdna_driver.cpp b/runtime/hsa-runtime/core/driver/xdna/amd_xdna_driver.cpp index b684b69f9..d9b8c6b3d 100644 --- a/runtime/hsa-runtime/core/driver/xdna/amd_xdna_driver.cpp +++ b/runtime/hsa-runtime/core/driver/xdna/amd_xdna_driver.cpp @@ -61,23 +61,19 @@ namespace AMD { XdnaDriver::XdnaDriver(std::string devnode_name) : core::Driver(core::DriverType::XDNA, devnode_name) {} -XdnaDriver::~XdnaDriver() { FreeDeviceHeap(); } - -hsa_status_t XdnaDriver::DiscoverDriver() { +hsa_status_t XdnaDriver::DiscoverDriver(std::unique_ptr& driver) { const int max_minor_num(64); const std::string devnode_prefix("/dev/accel/accel"); for (int i = 0; i < max_minor_num; ++i) { - std::unique_ptr xdna_drv( - new XdnaDriver(devnode_prefix + std::to_string(i))); - if (xdna_drv->Open() == HSA_STATUS_SUCCESS) { - if (xdna_drv->QueryKernelModeDriver( - core::DriverQuery::GET_DRIVER_VERSION) == HSA_STATUS_SUCCESS) { - static_cast(xdna_drv.get())->Init(); - core::Runtime::runtime_singleton_->RegisterDriver(xdna_drv); + auto tmp_driver = std::unique_ptr(new XdnaDriver(devnode_prefix + std::to_string(i))); + if (tmp_driver->Open() == HSA_STATUS_SUCCESS) { + if (tmp_driver->QueryKernelModeDriver(core::DriverQuery::GET_DRIVER_VERSION) == + HSA_STATUS_SUCCESS) { + driver = std::move(tmp_driver); return HSA_STATUS_SUCCESS; } else { - xdna_drv->Close(); + tmp_driver->Close(); } } } @@ -91,6 +87,8 @@ uint64_t XdnaDriver::GetDevHeapByteSize() { hsa_status_t XdnaDriver::Init() { return InitDeviceHeap(); } +hsa_status_t XdnaDriver::ShutDown() { return FreeDeviceHeap(); } + hsa_status_t XdnaDriver::QueryKernelModeDriver(core::DriverQuery query) { switch (query) { case core::DriverQuery::GET_DRIVER_VERSION: @@ -121,6 +119,25 @@ hsa_status_t XdnaDriver::Close() { return HSA_STATUS_SUCCESS; } +hsa_status_t XdnaDriver::GetSystemProperties(HsaSystemProperties& sys_props) const { + sys_props.NumNodes = 1; + return HSA_STATUS_SUCCESS; +} + +hsa_status_t XdnaDriver::GetNodeProperties(HsaNodeProperties& node_props, uint32_t node_id) const { + /// @todo XDNA driver currently only supports single-node AIE + /// devices over PCIe. Update this once we can get topology + /// information dynamically from the sysfs. + node_props.NumNeuralCores = 1; + node_props.NumIOLinks = 0; + return HSA_STATUS_SUCCESS; +} + +hsa_status_t XdnaDriver::GetEdgeProperties(std::vector& io_link_props, + uint32_t node_id) const { + return HSA_STATUS_SUCCESS; +} + hsa_status_t XdnaDriver::GetAgentProperties(core::Agent &agent) const { if (agent.device_type() != core::Agent::DeviceType::kAmdAieDevice) { return HSA_STATUS_ERROR_INVALID_AGENT; @@ -284,8 +301,8 @@ hsa_status_t XdnaDriver::QueryDriverVersion() { return HSA_STATUS_ERROR; } - version_.major = aie_version.major; - version_.minor = aie_version.minor; + version_.KernelInterfaceMajorVersion = aie_version.major; + version_.KernelInterfaceMinorVersion = aie_version.minor; return HSA_STATUS_SUCCESS; } diff --git a/runtime/hsa-runtime/core/inc/amd_aie_aql_queue.h b/runtime/hsa-runtime/core/inc/amd_aie_aql_queue.h index c0b14db26..270146c64 100644 --- a/runtime/hsa-runtime/core/inc/amd_aie_aql_queue.h +++ b/runtime/hsa-runtime/core/inc/amd_aie_aql_queue.h @@ -53,6 +53,8 @@ namespace rocr { namespace AMD { +class XdnaDriver; + /// @brief Encapsulates HW AIE AQL Command Processor functionality. It /// provides the interface for things such as doorbells, queue read and /// write pointers, and a buffer. diff --git a/runtime/hsa-runtime/core/inc/amd_available_drivers.h b/runtime/hsa-runtime/core/inc/amd_available_drivers.h new file mode 100644 index 000000000..2a0f6a955 --- /dev/null +++ b/runtime/hsa-runtime/core/inc/amd_available_drivers.h @@ -0,0 +1,53 @@ +//////////////////////////////////////////////////////////////////////////////// +// +// The University of Illinois/NCSA +// Open Source License (NCSA) +// +// Copyright (c) 2024, Advanced Micro Devices, Inc. All rights reserved. +// +// Developed by: +// +// AMD Research and AMD HSA Software Development +// +// Advanced Micro Devices, Inc. +// +// www.amd.com +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to +// deal with the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimers. +// - Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimers in +// the documentation and/or other materials provided with the distribution. +// - Neither the names of Advanced Micro Devices, Inc, +// nor the names of its contributors may be used to endorse or promote +// products derived from this Software without specific prior written +// permission. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR +// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +// DEALINGS WITH THE SOFTWARE. +// +//////////////////////////////////////////////////////////////////////////////// + +#ifndef HSA_RUNTME_CORE_INC_AMD_AVAILABLE_DRIVERS_H_ +#define HSA_RUNTME_CORE_INC_AMD_AVAILABLE_DRIVERS_H_ + +#ifdef __linux__ + +#include "core/inc/amd_kfd_driver.h" +#include "core/inc/amd_xdna_driver.h" + +#endif + +#endif // header guard diff --git a/runtime/hsa-runtime/core/inc/amd_kfd_driver.h b/runtime/hsa-runtime/core/inc/amd_kfd_driver.h index ee53995a6..cd634a92b 100644 --- a/runtime/hsa-runtime/core/inc/amd_kfd_driver.h +++ b/runtime/hsa-runtime/core/inc/amd_kfd_driver.h @@ -43,6 +43,7 @@ #ifndef HSA_RUNTIME_CORE_INC_AMD_KFD_DRIVER_H_ #define HSA_RUNTIME_CORE_INC_AMD_KFD_DRIVER_H_ +#include #include #include "hsakmt/hsakmt.h" @@ -64,12 +65,17 @@ class KfdDriver final : public core::Driver { public: KfdDriver(std::string devnode_name); - static hsa_status_t DiscoverDriver(); + static hsa_status_t DiscoverDriver(std::unique_ptr& driver); hsa_status_t Init() override; + hsa_status_t ShutDown() override; hsa_status_t QueryKernelModeDriver(core::DriverQuery query) override; hsa_status_t Open() override; hsa_status_t Close() override; + hsa_status_t GetSystemProperties(HsaSystemProperties& sys_props) const override; + hsa_status_t GetNodeProperties(HsaNodeProperties& node_props, uint32_t node_id) const override; + hsa_status_t GetEdgeProperties(std::vector& io_link_props, + uint32_t node_id) const override; hsa_status_t GetAgentProperties(core::Agent &agent) const override; hsa_status_t GetMemoryProperties(uint32_t node_id, @@ -98,6 +104,17 @@ class KfdDriver final : public core::Driver { /// @brief Unpin memory. static void MakeKfdMemoryUnresident(const void *mem); + + /// @brief Query for user preference and use that to determine Xnack mode + /// of ROCm system. Return true if Xnack mode is ON or false if OFF. Xnack + /// mode of a system is orthogonal to devices that do not support Xnack mode. + /// It is legal for a system with Xnack ON to have devices that do not support + /// Xnack functionality. + static bool BindXnackMode(); + + // Minimum acceptable KFD version numbers. + static const uint32_t kfd_version_major_min = 0; + static const uint32_t kfd_version_minor_min = 99; }; } // namespace AMD diff --git a/runtime/hsa-runtime/core/inc/amd_xdna_driver.h b/runtime/hsa-runtime/core/inc/amd_xdna_driver.h index feaa0588a..34af182d0 100644 --- a/runtime/hsa-runtime/core/inc/amd_xdna_driver.h +++ b/runtime/hsa-runtime/core/inc/amd_xdna_driver.h @@ -129,14 +129,15 @@ inline uint32_t GetOperandCount(uint32_t arg_count) { class XdnaDriver final : public core::Driver { public: XdnaDriver(std::string devnode_name); - ~XdnaDriver(); + ~XdnaDriver() = default; - static hsa_status_t DiscoverDriver(); + static hsa_status_t DiscoverDriver(std::unique_ptr& driver); /// @brief Returns the size of the dev heap in bytes. static uint64_t GetDevHeapByteSize(); hsa_status_t Init() override; + hsa_status_t ShutDown() override; hsa_status_t QueryKernelModeDriver(core::DriverQuery query) override; std::unordered_map& GetHandleMappings(); @@ -144,6 +145,10 @@ class XdnaDriver final : public core::Driver { hsa_status_t Open() override; hsa_status_t Close() override; + hsa_status_t GetSystemProperties(HsaSystemProperties& sys_props) const override; + hsa_status_t GetNodeProperties(HsaNodeProperties& node_props, uint32_t node_id) const override; + hsa_status_t GetEdgeProperties(std::vector& io_link_props, + uint32_t node_id) const override; hsa_status_t GetAgentProperties(core::Agent &agent) const override; hsa_status_t GetMemoryProperties(uint32_t node_id, diff --git a/runtime/hsa-runtime/core/inc/driver.h b/runtime/hsa-runtime/core/inc/driver.h index b621b8d6f..c998a410b 100644 --- a/runtime/hsa-runtime/core/inc/driver.h +++ b/runtime/hsa-runtime/core/inc/driver.h @@ -47,6 +47,7 @@ #include #include "core/inc/memory_region.h" +#include "hsakmt/hsakmttypes.h" #include "inc/hsa.h" namespace rocr { @@ -54,11 +55,6 @@ namespace core { class Queue; -struct DriverVersionInfo { - uint32_t major; - uint32_t minor; -}; - enum class DriverQuery { GET_DRIVER_VERSION }; enum class DriverType { XDNA = 0, KFD, NUM_DRIVER_TYPES }; @@ -77,6 +73,14 @@ class Driver { /// @brief Initialize the driver's state after opening. virtual hsa_status_t Init() = 0; + /// @brief Release the driver's resources and close the kernel-mode + /// driver. + virtual hsa_status_t ShutDown() = 0; + + /// @brief Get driver version information. + /// @retval DriverVersionInfo containing the driver's version information. + const HsaVersionInfo& Version() const { return version_; } + /// @brief Query the kernel-model driver. /// @retval HSA_STATUS_SUCCESS if the kernel-model driver query was /// successful. @@ -90,9 +94,19 @@ class Driver { /// @retval HSA_STATUS_SUCCESS if the driver was opened successfully. virtual hsa_status_t Close() = 0; - /// @brief Get driver version information. - /// @retval DriverVersionInfo containing the driver's version information. - const DriverVersionInfo &Version() const { return version_; } + /// @brief Get the system properties for nodes managed by this driver. + virtual hsa_status_t GetSystemProperties(HsaSystemProperties& sys_props) const = 0; + + /// @brief Get the properties for a specific node managed by this driver. + virtual hsa_status_t GetNodeProperties(HsaNodeProperties& node_props, uint32_t node_id) const = 0; + + /// @brief Get the edge (IO link) properties of a specific node (that is + /// managed by this driver) in the topology graph. + /// @param[out] io_link_props IO link properties of the node specified by \p + /// node_id. + /// @param[in] node_id ID of the node whose link properties are being queried. + virtual hsa_status_t GetEdgeProperties(std::vector& io_link_props, + uint32_t node_id) const = 0; /// @brief Get the properties of a specific agent and initialize the agent /// object. @@ -131,11 +145,11 @@ class Driver { const DriverType kernel_driver_type_; protected: - DriverVersionInfo version_{std::numeric_limits::max(), - std::numeric_limits::max()}; + HsaVersionInfo version_{std::numeric_limits::max(), + std::numeric_limits::max()}; - const std::string devnode_name_; - int fd_ = -1; + const std::string devnode_name_; + int fd_ = -1; }; } // namespace core diff --git a/runtime/hsa-runtime/core/inc/runtime.h b/runtime/hsa-runtime/core/inc/runtime.h index bb493924c..0057d7c81 100644 --- a/runtime/hsa-runtime/core/inc/runtime.h +++ b/runtime/hsa-runtime/core/inc/runtime.h @@ -63,8 +63,7 @@ #include "core/inc/hsa_ext_amd_impl.h" #include "core/inc/agent.h" -#include "core/inc/amd_kfd_driver.h" -#include "core/inc/amd_xdna_driver.h" +#include "core/inc/driver.h" #include "core/inc/exceptions.h" #include "core/inc/interrupt_signal.h" #include "core/inc/memory_region.h" @@ -159,7 +158,7 @@ class Runtime { /// @brief Insert agent into the driver list. /// @param [in] driver Unique pointer to the driver object. - void RegisterDriver(std::unique_ptr &driver); + void RegisterDriver(std::unique_ptr driver); /// @brief Delete all agent objects from ::agents_. void DestroyAgents(); @@ -494,6 +493,8 @@ class Runtime { return **driver; } + std::vector>& AgentDrivers() { return agent_drivers_; } + protected: static void AsyncEventsLoop(void*); static void AsyncIPCSockServerConnLoop(void*); diff --git a/runtime/hsa-runtime/core/runtime/amd_aie_aql_queue.cpp b/runtime/hsa-runtime/core/runtime/amd_aie_aql_queue.cpp index fb823a683..ad63b9bac 100644 --- a/runtime/hsa-runtime/core/runtime/amd_aie_aql_queue.cpp +++ b/runtime/hsa-runtime/core/runtime/amd_aie_aql_queue.cpp @@ -55,6 +55,7 @@ #include +#include "core/inc/amd_xdna_driver.h" #include "core/inc/queue.h" #include "core/inc/runtime.h" #include "core/inc/signal.h" diff --git a/runtime/hsa-runtime/core/runtime/amd_topology.cpp b/runtime/hsa-runtime/core/runtime/amd_topology.cpp index 28856bf9f..10a310b48 100644 --- a/runtime/hsa-runtime/core/runtime/amd_topology.cpp +++ b/runtime/hsa-runtime/core/runtime/amd_topology.cpp @@ -3,7 +3,7 @@ // The University of Illinois/NCSA // Open Source License (NCSA) // -// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2014-2024, Advanced Micro Devices, Inc. All rights reserved. // // Developed by: // @@ -41,24 +41,29 @@ //////////////////////////////////////////////////////////////////////////////// #include "core/inc/amd_topology.h" -#include "core/inc/amd_filter_device.h" #include #include -#include -#include -#include -#include -#include +#include -#ifndef NDBEUG +#ifndef NDEBUG #include #endif -#include "hsakmt/hsakmt.h" +#include +#include +#include +#include +#include +#include +#include + +#include #include "core/inc/amd_aie_agent.h" +#include "core/inc/amd_available_drivers.h" #include "core/inc/amd_cpu_agent.h" +#include "core/inc/amd_filter_device.h" #include "core/inc/amd_gpu_agent.h" #include "core/inc/amd_memory_region.h" #include "core/inc/runtime.h" @@ -68,58 +73,47 @@ extern r_debug _amdgpu_r_debug; namespace rocr { namespace AMD { -// Minimum acceptable KFD version numbers -static const uint kKfdVersionMajor = 0; -static const uint kKfdVersionMinor = 99; - -void DiscoverDrivers(bool &gpu_found, bool &aie_found) { - // Open connection to GPU and AIE kernel drivers. - gpu_found = (KfdDriver::DiscoverDriver() == HSA_STATUS_SUCCESS); - aie_found = (XdnaDriver::DiscoverDriver() == HSA_STATUS_SUCCESS); -} +// Anonymous namespace. +namespace { +#if _WIN32 +constexpr size_t num_drivers = 0; +#elif __linux__ +constexpr size_t num_drivers = 2; +#endif -// Query for user preference and use that to determine Xnack mode of ROCm system. -// Return true if Xnack mode is ON or false if OFF. Xnack mode of a system is -// orthogonal to devices that do not support Xnack mode. It is legal for a -// system with Xnack ON to have devices that do not support Xnack functionality. -bool BindXnackMode() { - // Get users' preference for Xnack mode of ROCm platform - HSAint32 mode; - mode = core::Runtime::runtime_singleton_->flag().xnack(); - bool config_xnack = - (core::Runtime::runtime_singleton_->flag().xnack() != Flag::XNACK_REQUEST::XNACK_UNCHANGED); - - // Indicate to driver users' preference for Xnack mode - // Call to driver can fail and is a supported feature - HSAKMT_STATUS status = HSAKMT_STATUS_ERROR; - if (config_xnack) { - status = hsaKmtSetXNACKMode(mode); - if (status == HSAKMT_STATUS_SUCCESS) { - return mode; - } - } +const std::array&)>, num_drivers> + discover_driver_funcs = { +#ifdef __linux__ + KfdDriver::DiscoverDriver, XdnaDriver::DiscoverDriver +#endif +}; - // Get Xnack mode of devices bound by driver. This could happen - // when a call to SET Xnack mode fails or user has no particular - // preference - status = hsaKmtGetXNACKMode((HSAint32*)&mode); - if(status != HSAKMT_STATUS_SUCCESS) { - debug_print("KFD does not support xnack mode query.\nROCr must assume xnack is disabled.\n"); - return false; +void DiscoverDrivers() { + for (const auto& discover_driver_fn : discover_driver_funcs) { + std::unique_ptr driver; + hsa_status_t ret = discover_driver_fn(driver); + + if (ret != HSA_STATUS_SUCCESS) continue; + + core::Runtime::runtime_singleton_->RegisterDriver(std::move(driver)); } - return mode; } -CpuAgent* DiscoverCpu(HSAuint32 node_id, HsaNodeProperties& node_prop) { - if (node_prop.NumCPUCores == 0) { - return nullptr; +bool InitializeDriver(std::unique_ptr& driver) { + MAKE_NAMED_SCOPE_GUARD(driver_guard, [&]() { driver->Close(); }); + + if (driver->Init() != HSA_STATUS_SUCCESS) { + return false; } + driver_guard.Dismiss(); + return true; +} + +void DiscoverCpu(HSAuint32 node_id, HsaNodeProperties& node_prop) { CpuAgent* cpu = new CpuAgent(node_id, node_prop); cpu->Enable(); core::Runtime::runtime_singleton_->RegisterAgent(cpu, true); - - return cpu; } GpuAgent* DiscoverGpu(HSAuint32 node_id, HsaNodeProperties& node_prop, bool xnack_mode, @@ -178,21 +172,20 @@ GpuAgent* DiscoverGpu(HSAuint32 node_id, HsaNodeProperties& node_prop, bool xnac return gpu; } -AieAgent *DiscoverAie() { - AieAgent *aie = new AieAgent(0); +void DiscoverAie(uint32_t node_id, HsaNodeProperties& node_prop) { + AieAgent* aie = new AieAgent(node_id); core::Runtime::runtime_singleton_->RegisterAgent(aie, true); - return aie; } -void RegisterLinkInfo(uint32_t node_id, uint32_t num_link) { +void RegisterLinkInfo(const std::unique_ptr& driver, uint32_t node_id, + uint32_t num_link) { // Register connectivity links for this agent to the runtime. if (num_link == 0) { return; } std::vector links(num_link); - if (HSAKMT_STATUS_SUCCESS != - hsaKmtGetNodeIoLinkProperties(node_id, num_link, &links[0])) { + if (HSA_STATUS_SUCCESS != driver->GetEdgeProperties(links, node_id)) { return; } @@ -259,19 +252,20 @@ void RegisterLinkInfo(uint32_t node_id, uint32_t num_link) { /** * Process the list of Gpus that are surfaced to user */ -static void SurfaceGpuList(std::vector& gpu_list, bool xnack_mode, bool enabled) { +void SurfaceGpuList(std::vector& gpu_list, bool xnack_mode, bool enabled) { // Process user visible Gpu devices const int32_t invalidIdx = -1; int32_t list_sz = gpu_list.size(); HsaNodeProperties node_prop = {0}; + const auto& gpu_driver = core::Runtime::runtime_singleton_->AgentDriver(core::DriverType::KFD); for (int32_t idx = 0; idx < list_sz; idx++) { if (gpu_list[idx] == invalidIdx) { break; } // Obtain properties of the node - HSAKMT_STATUS err_val = hsaKmtGetNodeProperties(gpu_list[idx], &node_prop); - assert(err_val == HSAKMT_STATUS_SUCCESS && "Error in getting Node Properties"); + hsa_status_t ret = gpu_driver.GetNodeProperties(node_prop, gpu_list[idx]); + assert(ret == HSA_STATUS_SUCCESS && "Error in getting Node Properties"); // Instantiate a Gpu device. The IO links // of this node have already been registered @@ -280,116 +274,122 @@ static void SurfaceGpuList(std::vector& gpu_list, bool xnack_mode, bool } } -/// @brief Calls Kfd thunk to get the snapshot of the topology of the system, -/// which includes associations between, node, devices, memory and caches. -void BuildTopology() { - HsaVersionInfo kfd_version; - if (hsaKmtGetVersion(&kfd_version) != HSAKMT_STATUS_SUCCESS) { - return; - } - - if (kfd_version.KernelInterfaceMajorVersion == kKfdVersionMajor && - kfd_version.KernelInterfaceMinorVersion < kKfdVersionMinor) { - return; - } - - // Disable KFD event support when using open source KFD - if (kfd_version.KernelInterfaceMajorVersion == 1 && - kfd_version.KernelInterfaceMinorVersion == 0) { - core::g_use_interrupt_wait = false; - } - - core::Runtime::runtime_singleton_->KfdVersion(kfd_version); - - HsaSystemProperties props; - hsaKmtReleaseSystemProperties(); - - if (hsaKmtAcquireSystemProperties(&props) != HSAKMT_STATUS_SUCCESS) { - return; - } - - core::Runtime::runtime_singleton_->SetLinkCount(props.NumNodes); - - // Query if env ROCR_VISIBLE_DEVICES is defined. If defined - // determine number and order of GPU devices to be surfaced +/// @brief Calls into the user-mode driver for each node to build the topology +/// of the system. +/// +/// @details Topology information includes information about each node in the +/// topology graph, which includes agents, IO links, memory, and caches. +bool BuildTopology() { + auto rt = core::Runtime::runtime_singleton_; + std::unordered_map driver_sys_props; + size_t link_count = 0; + /// @todo Currently we can filter out GPU devices using the + /// ROCR_VISIBLE_DEVICES environment variable. Eventually this + /// should be updated to allow for filtering other agents like + /// AIEs. RvdFilter rvdFilter; int32_t invalidIdx = -1; uint32_t visibleCnt = 0; std::vector gpu_usr_list; std::vector gpu_disabled; bool filter = RvdFilter::FilterDevices(); - if (filter) { - rvdFilter.BuildRvdTokenList(); - rvdFilter.BuildDeviceUuidList(props.NumNodes); - visibleCnt = rvdFilter.BuildUsrDeviceList(); - for (int32_t idx = 0; idx < visibleCnt; idx++) { - gpu_usr_list.push_back(invalidIdx); - } + + // Get the system properties (i.e., node count) from each driver + // then update the runtime's link count before traversing each + // driver's individual nodes. + for (const auto& driver : rt->AgentDrivers()) { + driver->GetSystemProperties(driver_sys_props[driver->kernel_driver_type_]); + + if (!driver_sys_props[driver->kernel_driver_type_].NumNodes) continue; + + link_count += driver_sys_props[driver->kernel_driver_type_].NumNodes; } - // Discover agents on every node in the platform. - int32_t kfdIdx = 0; - for (HSAuint32 node_id = 0; node_id < props.NumNodes; node_id++) { - HsaNodeProperties node_prop = {0}; - if (hsaKmtGetNodeProperties(node_id, &node_prop) != HSAKMT_STATUS_SUCCESS) { - continue; + rt->SetLinkCount(link_count); + + // Traverse each driver's nodes and discover their agents. + for (const auto& driver : core::Runtime::runtime_singleton_->AgentDrivers()) { + if (driver_sys_props.find(driver->kernel_driver_type_) == driver_sys_props.end()) return false; + + const HsaSystemProperties& sys_props = driver_sys_props[driver->kernel_driver_type_]; + + // Query if env ROCR_VISIBLE_DEVICES is defined. If defined + // determine number and order of GPU devices to be surfaced. + if (filter && driver->kernel_driver_type_ == core::DriverType::KFD) { + rvdFilter.BuildRvdTokenList(); + rvdFilter.BuildDeviceUuidList(sys_props.NumNodes); + visibleCnt = rvdFilter.BuildUsrDeviceList(); + for (int32_t idx = 0; idx < visibleCnt; idx++) { + gpu_usr_list.push_back(invalidIdx); + } } - // Instantiate a Cpu device - const CpuAgent* cpu = DiscoverCpu(node_id, node_prop); - assert(((node_prop.NumCPUCores == 0) || (cpu != nullptr)) && "CPU device failed discovery."); - - // Current node is either a dGpu or Apu and might belong - // to user visible list. Process node if present in usr - // visible list, continue if not found - if (node_prop.NumFComputeCores != 0) { - if (filter) { - int32_t devRank = rvdFilter.GetUsrDeviceRank(kfdIdx); - if (devRank != (-1)) { - gpu_usr_list[devRank] = node_id; + // Discover agents on every node in the platform. + int32_t kfdIdx = 0; + for (HSAuint32 node_id = 0; node_id < sys_props.NumNodes; node_id++) { + HsaNodeProperties node_props = {0}; + if (driver->GetNodeProperties(node_props, node_id) != HSA_STATUS_SUCCESS) { + return false; + } + + if (node_props.NumCPUCores) { + // Node has CPU cores so instantiate a CPU agent. + DiscoverCpu(node_id, node_props); + } + + if (node_props.NumNeuralCores) { + // Node has AIE cores so instantiate an AIE agent. + DiscoverAie(node_id, node_props); + } + + // Current node is either a dGpu or Apu and might belong + // to user visible list. Process node if present in usr + // visible list, continue if not found + if (node_props.NumFComputeCores != 0) { + if (filter) { + int32_t devRank = rvdFilter.GetUsrDeviceRank(kfdIdx); + if (devRank != (-1)) { + gpu_usr_list[devRank] = node_id; + } else { + gpu_disabled.push_back(node_id); + } } else { - gpu_disabled.push_back(node_id); + gpu_usr_list.push_back(node_id); } - } else { - gpu_usr_list.push_back(node_id); + kfdIdx++; } - kfdIdx++; - } - // Register IO links of node without regard to - // it being visible to user or not. It is not - // possible to access links of nodes that are - // not visible - RegisterLinkInfo(node_id, node_prop.NumIOLinks); + // Register IO links of node without regard to + // it being visible to user or not. It is not + // possible to access links of nodes that are + // not visible + RegisterLinkInfo(driver, node_id, node_props.NumIOLinks); + } } - // Determine the Xnack mode to be bound for system - bool xnack_mode = BindXnackMode(); - core::Runtime::runtime_singleton_->XnackEnabled(xnack_mode); - // Instantiate ROCr objects to encapsulate Gpu devices - SurfaceGpuList(gpu_usr_list, xnack_mode, true); - SurfaceGpuList(gpu_disabled, xnack_mode, false); + SurfaceGpuList(gpu_usr_list, rt->XnackEnabled(), true); + SurfaceGpuList(gpu_disabled, rt->XnackEnabled(), false); // Parse HSA_CU_MASK with GPU and CU count limits. - uint32_t maxGpu = core::Runtime::runtime_singleton_->gpu_agents().size(); + uint32_t maxGpu = rt->gpu_agents().size(); uint32_t maxCu = 0; uint32_t cus; - for (auto& gpu : core::Runtime::runtime_singleton_->gpu_agents()) { + for (auto& gpu : rt->gpu_agents()) { gpu->GetInfo((hsa_agent_info_t)HSA_AMD_AGENT_INFO_COMPUTE_UNIT_COUNT, &cus); maxCu = Max(maxCu, cus); } - const_cast(core::Runtime::runtime_singleton_->flag()).parse_masks(maxGpu, maxCu); + const_cast(rt->flag()).parse_masks(maxGpu, maxCu); // Register destination agents that can SDMA gang copy for source agents - for (auto& src_gpu : core::Runtime::runtime_singleton_->gpu_agents()) { + for (auto& src_gpu : rt->gpu_agents()) { uint32_t src_id = src_gpu->node_id(); - for (auto& dst_gpu : core::Runtime::runtime_singleton_->gpu_agents()) { + for (auto& dst_gpu : rt->gpu_agents()) { uint32_t dst_id = dst_gpu->node_id(); uint32_t gang_factor = 1, rec_sdma_eng_id_mask = 0; if (src_id != dst_id) { - auto linfo = core::Runtime::runtime_singleton_->GetLinkInfo(src_id, dst_id); + auto linfo = rt->GetLinkInfo(src_id, dst_id); // Ganging can only be done over xGMI and is either fixed or variable // based on topology information: // Weight of 13 - Intra-socket GPU link in multi-partition mode @@ -398,7 +398,7 @@ void BuildTopology() { if (linfo.info.link_type == HSA_AMD_LINK_INFO_TYPE_XGMI) { // Temporary work-around, disable SDMA ganging on non-APUs in non-SPX modes // Check xGMI APU status - const bool isXgmiApu = reinterpret_cast(src_gpu)->is_xgmi_cpu_gpu(); + const bool isXgmiApu = static_cast(src_gpu)->is_xgmi_cpu_gpu(); if (linfo.info.numa_distance == 13 || linfo.info.numa_distance == 41) gang_factor = isXgmiApu ? 2 : 1; else if (linfo.info.numa_distance == 15 && linfo.info.min_bandwidth) @@ -415,48 +415,27 @@ void BuildTopology() { ((AMD::GpuAgent*)src_gpu)->RegisterRecSdmaEngIdMaskPeer(*dst_gpu, rec_sdma_eng_id_mask); } } + return true; } +} // Anonymous namespace bool Load() { - bool gpu_found = false; - bool aie_found = false; + DiscoverDrivers(); - DiscoverDrivers(gpu_found, aie_found); + if (core::Runtime::runtime_singleton_->AgentDrivers().empty()) return false; - if (!(gpu_found || aie_found)) { - return false; + for (auto& d : core::Runtime::runtime_singleton_->AgentDrivers()) { + if (!InitializeDriver(d)) return false; } - if (gpu_found) { - MAKE_NAMED_SCOPE_GUARD(kfd, [&]() { hsaKmtCloseKFD(); }); - - // Build topology table. - BuildTopology(); - - HSAKMT_STATUS err = hsaKmtRuntimeEnable( - &_amdgpu_r_debug, core::Runtime::runtime_singleton_->flag().debug()); - if ((err != HSAKMT_STATUS_SUCCESS) && (err != HSAKMT_STATUS_NOT_SUPPORTED)) - return false; - HSAuint32 caps_mask; - hsaKmtGetRuntimeCapabilities(&caps_mask); - core::Runtime::runtime_singleton_->KfdVersion( - err != HSAKMT_STATUS_NOT_SUPPORTED, - !!(caps_mask & HSA_RUNTIME_ENABLE_CAPS_SUPPORTS_CORE_DUMP_MASK)); - - kfd.Dismiss(); - } - - if (aie_found) { - DiscoverAie(); - } - - return true; + return BuildTopology(); } bool Unload() { - hsaKmtRuntimeDisable(); - - hsaKmtReleaseSystemProperties(); + for (auto& driver : core::Runtime::runtime_singleton_->AgentDrivers()) { + hsa_status_t ret = driver->ShutDown(); + if (ret != HSA_STATUS_SUCCESS) return false; + } return true; } diff --git a/runtime/hsa-runtime/core/runtime/runtime.cpp b/runtime/hsa-runtime/core/runtime/runtime.cpp index 1471261f0..37af826d4 100644 --- a/runtime/hsa-runtime/core/runtime/runtime.cpp +++ b/runtime/hsa-runtime/core/runtime/runtime.cpp @@ -230,7 +230,7 @@ void Runtime::RegisterAgent(Agent* agent, bool Enabled) { } // Register driver. -void Runtime::RegisterDriver(std::unique_ptr &driver) { +void Runtime::RegisterDriver(std::unique_ptr driver) { agent_drivers_.push_back(std::move(driver)); } @@ -258,10 +258,6 @@ void Runtime::DestroyAgents() { } void Runtime::DestroyDrivers() { - for (auto &d : agent_drivers_) { - d->Close(); - } - agent_drivers_.clear(); } @@ -2136,9 +2132,9 @@ void Runtime::Unload() { CloseTools(); - DestroyDrivers(); - AMD::Unload(); + + DestroyDrivers(); } void Runtime::LoadExtensions() {