intel · bader · Aug 20, 2021 · Jul 8, 2021 · Jul 23, 2021 · Aug 19, 2021
@@ -424,6 +424,11 @@ skipped.
 If CUDA support has been built, it is tested only if there are CUDA devices
 available.
 
+If testing with ROCm for AMD make sure to specify the GPU being used
+by adding `-Xsycl-target-backend=amdgcn-amd-amdhsa-sycldevice
+--offload-arch=<target>` to the CMake variable
+`SYCL_CLANG_EXTRA_FLAGS`.
+
 #### Run DPC++ E2E test suite
 
 Follow instructions from the link below to build and run tests:

@@ -75,6 +75,34 @@ if(SYCL_BUILD_PI_CUDA)
   add_custom_target(check-sycl-cuda)
   add_dependencies(check-sycl-cuda check-sycl-ptx)
   add_dependencies(check-sycl check-sycl-cuda)
+endif()
+
+if(SYCL_BUILD_PI_ROCM)
+  add_custom_target(check-sycl-rocm)
+  if("${SYCL_BUILD_PI_ROCM_PLATFORM}" STREQUAL "NVIDIA")
+    add_lit_testsuite(check-sycl-rocm-ptx "Running device-agnostic SYCL regression tests for ROCm NVidia PTX"
+      ${CMAKE_CURRENT_BINARY_DIR}
+      ARGS ${RT_TEST_ARGS}
+      PARAMS "SYCL_TRIPLE=nvptx64-nvidia-cuda-sycldevice;SYCL_PLUGIN=rocm"
+      DEPENDS ${SYCL_TEST_DEPS}
+      EXCLUDE_FROM_CHECK_ALL
+    )
+
+    add_dependencies(check-sycl-rocm check-sycl-rocm-ptx)
+  elseif("${SYCL_BUILD_PI_ROCM_PLATFORM}" STREQUAL "AMD")
+    add_lit_testsuite(check-sycl-rocm-gcn "Running device-agnostic SYCL regression tests for ROCm AMDGCN"
+      ${CMAKE_CURRENT_BINARY_DIR}
+      ARGS ${RT_TEST_ARGS}
+      PARAMS "SYCL_TRIPLE=amdgcn-amd-amdhsa-sycldevice;SYCL_PLUGIN=rocm"
+      DEPENDS ${SYCL_TEST_DEPS}
+      EXCLUDE_FROM_CHECK_ALL
+    )
+
+    add_dependencies(check-sycl-rocm check-sycl-rocm-gcn)
+  else()
+    message(FATAL_ERROR "SYCL_BUILD_PI_ROCM_PLATFORM must be set to either 'AMD' or 'NVIDIA' (set to: '${SYCL_BUILD_PI_ROCM_PLATFORM}')")
+  endif()
 
+  add_dependencies(check-sycl check-sycl-rocm)
 endif()
 add_subdirectory(on-device)
@@ -3,6 +3,10 @@
 
 // CUDA does not support printf.
 // UNSUPPORTED: cuda
+//
+// Hits an assertion with AMD:
+// XFAIL: rocm_amd
+
 #include <CL/sycl.hpp>
 
 #include <cassert>

@@ -11,6 +11,9 @@
 // Cuda does not support intrinsics generated by the ESIMD compilation path:
 // UNSUPPORTED: cuda
 //
+// Linking issues with AMD:
+// XFAIL: rocm_amd
+
 #include <CL/sycl.hpp>
 #include <iostream>
 #include <sycl/ext/intel/experimental/esimd.hpp>

@@ -1,6 +1,10 @@
 // RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -I . -o %t.out
 // Group operations are not supported on host device. The test checks that
 // compilation succeeded.
+//
+// Missing __spirv_GroupIAdd, __spirv_GroupAll, __spirv_GroupBroadcast,
+// __spirv_GroupAny, __spirv_GroupSMin on AMD:
+// XFAIL: rocm_amd
 
 // TODO: enable compile+runtime checks for operations defined in SPIR-V 1.3.
 // That requires either adding a switch to clang (-spirv-max-version=1.3) or

@@ -103,6 +103,15 @@
 if triple == 'nvptx64-nvidia-cuda-sycldevice':
     config.available_features.add('cuda')
 
+if triple == 'amdgcn-amd-amdhsa-sycldevice':
+    config.available_features.add('rocm_amd')
+    # For AMD the specific GPU has to be specified with --offload-arch
+    if not re.match('.*--offload-arch.*', config.sycl_clang_extra_flags):
+        raise Exception("Error: missing --offload-arch flag when trying to "  \
+                        "run lit tests for AMD GPU, please add "              \
+                        "`-Xsycl-target-backend=amdgcn-amd-amdhsa-sycldevice --offload-arch=<target>` to " \
+                        "the CMake variable SYCL_CLANG_EXTRA_FLAGS")
+
 # Set timeout for test = 10 mins
 try:
     import psutil

@@ -40,3 +40,17 @@ if(SYCL_BUILD_PI_CUDA)
     add_dependencies(check-sycl-cuda check-sycl-cuda-on-device)
   endif()
 endif()
+
+if(SYCL_BUILD_PI_ROCM)
+  add_lit_testsuite(check-sycl-rocm-on-device "Running the SYCL regression tests for ROCm"
+    ${CMAKE_CURRENT_BINARY_DIR}
+    ARGS ${RT_TEST_ARGS}
+    PARAMS "SYCL_PLUGIN=rocm"
+    DEPENDS ${SYCL_TEST_DEPS}
+    EXCLUDE_FROM_CHECK_ALL
+  )
+  set_target_properties(check-sycl-rocm-on-device PROPERTIES FOLDER "SYCL ROCm device tests")
+  if(TARGET check-sycl-rocm)
+    add_dependencies(check-sycl-rocm check-sycl-rocm-on-device)
+  endif()
+endif()
@@ -3,6 +3,9 @@
 // RUN: %CPU_RUN_PLACEHOLDER %t.out
 // RUN: %GPU_RUN_PLACEHOLDER %t.out
 // RUN: %ACC_RUN_PLACEHOLDER %t.out
+//
+// Missing __spirv_GroupIAdd on AMD:
+// XFAIL: rocm_amd
 
 #include <CL/sycl.hpp>
 #include <numeric>

@@ -1,5 +1,9 @@
 // RUN: %clangxx -fsycl %s -o %t.out
 // RUN: env SYCL_DEVICE_FILTER=%sycl_be %t.out
+//
+// Hip is missing some of the parameters tested here so it fails with ROCm for
+// NVIDIA
+// XFAIL: rocm_nvidia
 
 //==--------------- aspects.cpp - SYCL device test ------------------------==//
 //

@@ -2,6 +2,8 @@
 // RUN: %t.out
 
 // UNSUPPORTED: cuda
+// UNSUPPORTED: rocm_nvidia
+// UNSUPPORTED: rocm_amd
 
 #include <sycl/sycl.hpp>
 

@@ -1,5 +1,8 @@
 // RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
 // RUN: %CPU_RUN_PLACEHOLDER %t.out
+//
+// Hits an assert in the Lower Work Group Scope Code pass on AMD:
+// XFAIL: rocm_amd
 
 // This test checks all possible scenarios of running single_task, parallel_for
 // and parallel_for_work_group to verify that this code compiles and runs

@@ -1,5 +1,8 @@
 // RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
 // RUN: %CPU_RUN_PLACEHOLDER %t.out
+//
+// Hits an assert in the Lower Work Group Scope Code pass on AMD:
+// XFAIL: rocm_amd
 
 // This test checks correctness of compiling and running of application with
 // kernel lambdas containing kernel_handler arguments and w/o usage of

@@ -1,6 +1,10 @@
 // REQUIRES: ocloc, gpu, TEMPORARY_DISABLED
 // UNSUPPORTED: cuda
 // CUDA is not compatible with SPIR.
+//
+// UNSUPPORTED: rocm_nvidia
+// UNSUPPORTED: rocm_amd
+// ROCm is not compatible with SPIR.
 
 // RUN: %clangxx -fsycl -fsycl-targets=spir64_gen-unknown-unknown-sycldevice -Xsycl-target-backend=spir64_gen-unknown-unknown-sycldevice "-device *" %S/Inputs/common.cpp -o %t.out
 // RUN: %GPU_RUN_PLACEHOLDER %t.out

@@ -4,6 +4,8 @@
 //
 // REQUIRES: gpu
 // UNSUPPORTED: cuda
+// UNSUPPORTED: rocm_nvidia
+// UNSUPPORTED: rocm_amd
 
 //==--------- intel-ext-device.cpp - SYCL device test ------------==//
 //

@@ -2,6 +2,9 @@
 // RUN: %CPU_RUN_PLACEHOLDER %t.out
 // RUN: %GPU_RUN_PLACEHOLDER %t.out
 // RUN: %ACC_RUN_PLACEHOLDER %t.out
+//
+// Missing __spirv_GroupAll on AMD:
+// XFAIL: rocm_amd
 
 #include "support.h"
 #include <CL/sycl.hpp>

@@ -2,6 +2,9 @@
 // RUN: %CPU_RUN_PLACEHOLDER %t.out
 // RUN: %GPU_RUN_PLACEHOLDER %t.out
 // RUN: %ACC_RUN_PLACEHOLDER %t.out
+//
+// Missing __spirv_GroupAny on AMD:
+// XFAIL: rocm_amd
 
 #include "support.h"
 #include <CL/sycl.hpp>

@@ -2,6 +2,10 @@
 // RUN: %CPU_RUN_PLACEHOLDER %t.out
 // RUN: %GPU_RUN_PLACEHOLDER %t.out
 // RUN: %ACC_RUN_PLACEHOLDER %t.out
+//
+// Missing __spirv_GroupIAdd, __spirv_GroupBroadcast, __spirv_GroupSMin and
+// __spirv_GroupSMax on AMD:
+// XFAIL: rocm_amd
 
 // TODO: enable compile+runtime checks for operations defined in SPIR-V 1.3.
 // That requires either adding a switch to clang (-spirv-max-version=1.3) or

@@ -2,6 +2,9 @@
 // RUN: %CPU_RUN_PLACEHOLDER %t.out
 // RUN: %GPU_RUN_PLACEHOLDER %t.out
 // RUN: %ACC_RUN_PLACEHOLDER %t.out
+//
+// Missing __spirv_GroupBroadcast on AMD:
+// XFAIL: rocm_amd
 
 #include "support.h"
 #include <CL/sycl.hpp>

@@ -2,6 +2,10 @@
 // RUN: %CPU_RUN_PLACEHOLDER %t.out
 // RUN: %GPU_RUN_PLACEHOLDER %t.out
 // RUN: %ACC_RUN_PLACEHOLDER %t.out
+//
+// Missing __spirv_GroupIAdd, __spirv_GroupBroadcast, __spirv_GroupSMin and
+// __spirv_GroupSMax on AMD:
+// XFAIL: rocm_amd
 
 // TODO: enable compile+runtime checks for operations defined in SPIR-V 1.3.
 // That requires either adding a switch to clang (-spirv-max-version=1.3) or

@@ -2,6 +2,9 @@
 // RUN: %CPU_RUN_PLACEHOLDER %t.out
 // RUN: %GPU_RUN_PLACEHOLDER %t.out
 // RUN: %ACC_RUN_PLACEHOLDER %t.out
+//
+// Missing __spirv_GroupAll and __spirv_GroupAny on AMD:
+// XFAIL: rocm_amd
 
 #include "support.h"
 #include <CL/sycl.hpp>

@@ -3,6 +3,9 @@
 // RUN: %GPU_RUN_PLACEHOLDER %t.out
 // RUN: %ACC_RUN_PLACEHOLDER %t.out
 //
+// Missing __spirv_SubgroupId, __spirv_SubgroupMaxSize, __spirv_SubgroupShuffle* on AMD:
+// XFAIL: rocm_amd
+//
 //==------------ permute_select.cpp -*- C++ -*-----------------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

@@ -2,6 +2,9 @@
 // RUN: %CPU_RUN_PLACEHOLDER %t.out
 // RUN: %GPU_RUN_PLACEHOLDER %t.out
 // RUN: %ACC_RUN_PLACEHOLDER %t.out
+//
+// Missinsg __spirv_GroupIAdd, __spirv_GroupSMin and __spirv_GroupSMax on AMD:
+// XFAIL: rocm_amd
 
 // TODO: enable compile+runtime checks for operations defined in SPIR-V 1.3.
 // That requires either adding a switch to clang (-spirv-max-version=1.3) or

@@ -3,6 +3,9 @@
 // RUN: %GPU_RUN_PLACEHOLDER %t.out
 // RUN: %ACC_RUN_PLACEHOLDER %t.out
 //
+// Missing __spirv_SubgroupId, __spirv_SubgroupMaxSize, __spirv_SubgroupShuffle* on AMD:
+// XFAIL: rocm_amd
+//
 //==------------ shift_left_right.cpp -*- C++ -*----------------------------==//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

@@ -105,6 +105,8 @@
 def getDeviceCount(device_type):
     is_cuda = False;
     is_level_zero = False;
+    is_rocm_amd = False;
+    is_rocm_nvidia = False;
     process = subprocess.Popen([get_device_count_by_type_path, device_type, backend],
         stdout=subprocess.PIPE)
     (output, err) = process.communicate()
@@ -130,11 +132,15 @@ def getDeviceCount(device_type):
             is_cuda = True;
         if re.match(r".*level zero", result[1]):
             is_level_zero = True;
+        if re.match(r".*rocm-amd", result[1]):
+            is_rocm_amd = True;
+        if re.match(r".*rocm-nvidia", result[1]):
+            is_rocm_nvidia = True;
 
     if err:
         lit_config.warning("getDeviceCount {TYPE} {BACKEND} stderr:{ERR}".format(
             TYPE=device_type, BACKEND=backend, ERR=err))
-    return [value,is_cuda,is_level_zero]
+    return [value,is_cuda,is_level_zero,is_rocm_amd,is_rocm_nvidia]
 
 # check if compiler supports CL command line options
 cl_options=False
@@ -220,7 +226,9 @@ def getDeviceCount(device_type):
 
 cuda = False
 level_zero = False
-[gpu_count, cuda, level_zero] = getDeviceCount("gpu")
+rocm_amd = False
+rocm_nvidia = False
+[gpu_count, cuda, level_zero, rocm_amd, rocm_nvidia] = getDeviceCount("gpu")
 
 if gpu_count > 0:
     found_at_least_one_device = True
@@ -232,6 +240,16 @@ def getDeviceCount(device_type):
        config.available_features.add('cuda')
     elif level_zero:
        config.available_features.add('level_zero')
+    elif rocm_amd:
+       config.available_features.add('rocm_amd')
+       # For AMD the specific GPU has to be specified with --offload-arch
+       if not re.match('.*--offload-arch.*', config.sycl_clang_extra_flags):
+        raise Exception("Error: missing --offload-arch flag when trying to "  \
+                        "run lit tests for AMD GPU, please add "              \
+                        "`-Xsycl-target-backend=amdgcn-amd-amdhsa-sycldevice --offload-arch=<target>` to " \
+                        "the CMake variable SYCL_CLANG_EXTRA_FLAGS")
+    elif rocm_nvidia:
+       config.available_features.add('rocm_nvidia')
 
     if platform.system() == "Linux":
         gpu_run_on_linux_substitute = "env SYCL_DEVICE_FILTER={SYCL_PLUGIN}:gpu,host ".format(SYCL_PLUGIN=backend)
@@ -261,8 +279,10 @@ def getDeviceCount(device_type):
 if not cuda and not level_zero and found_at_least_one_device:
     config.available_features.add('opencl')
 
-if cuda:
+if cuda or rocm_nvidia:
     config.substitutions.append( ('%sycl_triple',  "nvptx64-nvidia-cuda-sycldevice" ) )
+elif rocm_amd:
+    config.substitutions.append( ('%sycl_triple',  "amdgcn-amd-amdhsa-sycldevice" ) )
 else:
     config.substitutions.append( ('%sycl_triple',  "spir64-unknown-unknown-sycldevice" ) )
 

@@ -2,6 +2,9 @@
 // RUN: %CPU_RUN_PLACEHOLDER %t.out
 // RUN: %GPU_RUN_PLACEHOLDER %t.out
 // RUN: %ACC_RUN_PLACEHOLDER %t.out
+//
+// Fails to release USM pointer on ROCm for NVIDIA
+// XFAIL: rocm_nvidia
 
 #include <numeric>
 #include <sycl/sycl.hpp>

@@ -4,6 +4,8 @@
 
 // XFAIL: level_zero
 // UNSUPPORTED: cuda
+// UNSUPPORTED: rocm_nvidia
+// UNSUPPORTED: rocm_amd
 
 #include <CL/sycl.hpp>
 

@@ -70,6 +70,11 @@ TEST(KernelBundle, GetKernelBundleFromKernel) {
     return;
   }
 
+  if (Plt.get_backend() == sycl::backend::rocm) {
+    std::cout << "Test is not supported on ROCm platform, skipping\n";
+    return;
+  }
+
   sycl::unittest::PiMock Mock{Plt};
   setupDefaultMockAPIs(Mock);
 

@@ -82,6 +82,11 @@ TEST(SpecConstDefaultValues, DefaultValuesAreSet) {
     return;
   }
 
+  if (Plt.get_backend() == sycl::backend::rocm) {
+    std::cerr << "Test is not supported on ROCm platform, skipping\n";
+    return;
+  }
+
   sycl::unittest::PiMock Mock{Plt};
   setupDefaultMockAPIs(Mock);
 
@@ -116,6 +121,11 @@ TEST(SpecConstDefaultValues, DefaultValuesAreOverriden) {
     return;
   }
 
+  if (Plt.get_backend() == sycl::backend::rocm) {
+    std::cerr << "Test is not supported on ROCm platform, skipping\n";
+    return;
+  }
+
   sycl::unittest::PiMock Mock{Plt};
   setupDefaultMockAPIs(Mock);