GPU: Add Device::deviceVendor() function and fix #3416 (#3418)

## Summary This adds a function that returns the GPU's vendor. ## Additional background This function is only really relevant for SYCL (although only Intel GPUs are officially supported with the SYCL backend AFAIK). For CUDA and HIP it is inferred at compile time. This function is used to disable SIGSEGV handling by default on Intel GPUs to fix #3416. In theory, we could only default disable SIGSEGV handling for specific Intel GPU architectures (e.g. using the experimental [oneAPI SYCL Device Architecture extension](https://github.com/intel/llvm/blob/48be219e238ca2354a1e9e9989752aa7a60b5627/sycl/doc/extensions/experimental/sycl_ext_oneapi_device_architecture.asciidoc)) since integrated Intel GPUs are not affected by #3416 but this would need to be updated every time Intel releases a new GPU architecture which still uses SIGSEGV for managed memory. It is simpler to disable it by default on all Intel GPUs.
AMReX-Codes · Jul 12, 2023 · 59a3106 · 59a3106
1 parent 6dfdd48
commit 59a3106
Show file tree

Hide file tree

Showing 3 changed files with 21 additions and 5 deletions.
diff --git a/Src/Base/AMReX.cpp b/Src/Base/AMReX.cpp
@@ -494,11 +494,10 @@ amrex::Initialize (int& argc, char**& argv, bool build_parm_parse,
         pp.queryAdd("abort_on_unused_inputs", system::abort_on_unused_inputs);
 
 #ifdef AMREX_USE_SYCL
-        // Disable SIGSEGV handling by default for certain Intel GPUs,
-        // because it is currently used by their managed memory
-        // implementation.
-        if (Gpu::Device::deviceName().find("[0x0bd6]") != std::string::npos || // PVC
-            Gpu::Device::deviceName().find("[0x020f]") != std::string::npos) { // ATS
+        // Disable SIGSEGV handling by default for Intel GPUs, because it is
+        // currently used by their managed memory implementation with discrete
+        // GPUs
+        if (Gpu::Device::deviceVendor().find("Intel") != std::string::npos) {
             system::handle_sigsegv = 0;
         }
 #endif

diff --git a/Src/Base/AMReX_GpuDevice.H b/Src/Base/AMReX_GpuDevice.H
@@ -27,6 +27,7 @@ using gpuDeviceProp_t = cudaDeviceProp;
 #elif defined(AMREX_USE_SYCL)
     struct gpuDeviceProp_t {
         std::string name;
+        std::string vendor; // SYCL only (inferred for CUDA and HIP)
         std::size_t totalGlobalMem;
         std::size_t sharedMemPerBlock;
         int multiProcessorCount;
@@ -141,6 +142,20 @@ public:
     static int devicePropMinor () noexcept { return device_prop.minor; }
 #endif
 
+    static std::string deviceVendor() noexcept
+    {
+#if defined(AMREX_USE_HIP) && defined(__HIP_PLATFORM_AMD__)
+        return std::string("AMD");
+#elif defined(AMREX_USE_CUDA) || (defined(AMREX_USE_HIP) && defined(__HIP_PLATFORM_NVIDIA__))
+        // Using HIP on NVIDIA GPUs isn't currently supported by AMReX
+        return std::string("NVIDIA");
+#elif defined(AMREX_USE_SYCL)
+        return device_prop.vendor;
+#else
+        return std::string("Unknown");
+#endif
+    }
+
     static std::size_t freeMemAvailable ();
     static void profilerStart ();
     static void profilerStop ();

diff --git a/Src/Base/AMReX_GpuDevice.cpp b/Src/Base/AMReX_GpuDevice.cpp
@@ -467,6 +467,7 @@ Device::initialize_gpu ()
     { // device property
         auto const& d = *sycl_device;
         device_prop.name = d.get_info<sycl::info::device::name>();
+        device_prop.vendor = d.get_info<sycl::info::device::vendor>();
         device_prop.totalGlobalMem = d.get_info<sycl::info::device::global_mem_size>();
         device_prop.sharedMemPerBlock = d.get_info<sycl::info::device::local_mem_size>();
         device_prop.multiProcessorCount = d.get_info<sycl::info::device::max_compute_units>();
@@ -489,6 +490,7 @@ Device::initialize_gpu ()
         {
             amrex::Print() << "Device Properties:\n"
                            << "  name: " << device_prop.name << "\n"
+                           << "  vendor: " << device_prop.vendor << "\n"
                            << "  totalGlobalMem: " << device_prop.totalGlobalMem << "\n"
                            << "  sharedMemPerBlock: " << device_prop.sharedMemPerBlock << "\n"
                            << "  multiProcessorCount: " << device_prop.multiProcessorCount << "\n"