Move cuda check into utils (#3074)

Co-authored-by: Jeff Rasley <jerasley@microsoft.com>
microsoft · Mar 24, 2023 · b3ec1c9 · b3ec1c9
1 parent 090d49e
commit b3ec1c9
Show file tree

Hide file tree

Showing 2 changed files with 22 additions and 17 deletions.
diff --git a/tests/unit/ops/sparse_attention/test_sparse_attention.py b/tests/unit/ops/sparse_attention/test_sparse_attention.py
@@ -10,6 +10,7 @@
 import deepspeed
 from deepspeed.accelerator import get_accelerator
 from deepspeed.ops.op_builder import SparseAttnBuilder
+from unit.util import skip_on_arch, skip_on_cuda
 
 if not deepspeed.ops.__compatible_ops__[SparseAttnBuilder.NAME]:
     pytest.skip("sparse attention op is not compatible on this system",
@@ -130,26 +131,14 @@ def init_softmax_inputs(Z, H, M, N, scale, rho, block, dtype, dense_x=True, layo
     return layout, x, dx, bool_attn_mask, fp_attn_mask, kp_mask
 
 
-def _skip_on_cuda_compatability():
-    if deepspeed.accelerator.get_accelerator().device_name() == 'cuda':
-        if torch.cuda.get_device_capability()[0] < 7:
-            pytest.skip("needs higher compute capability than 7")
-        cuda_major = int(torch.version.cuda.split('.')[0]) * 10
-        cuda_minor = int(torch.version.cuda.split('.')[1])
-        cuda_version = cuda_major + cuda_minor
-        if (cuda_version != 101 and cuda_version != 102) and \
-                (cuda_version != 111 and cuda_version != 110):
-            pytest.skip("requires cuda 10.1 or 10.2 or 11.0 or 11.1")
-    else:
-        assert deepspeed.accelerator.get_accelerator().device_name() == 'xpu'
-        return
-
-
 @pytest.mark.parametrize("block", [16, 32])
 @pytest.mark.parametrize("width", [256, 576])
 @pytest.mark.parametrize("dtype", [torch.float16, torch.float32])
 def test_softmax(block, width, dtype):
-    _skip_on_cuda_compatability()
+    valid_cuda_versions = [101, 102, 110, 111]
+    skip_on_arch(min_arch=7)
+    skip_on_cuda(valid_cuda=valid_cuda_versions)
+
     Z = 2
     H = 4
     scale = 0.4
@@ -256,7 +245,10 @@ def init_matmul_inputs(Z, H, M, N, K, rho, mode, trans_a, trans_b, block, dtype,
 
 @pytest.mark.parametrize("block, dtype, mode, trans_a, trans_b", testdata)
 def test_matmul(block, dtype, mode, trans_a, trans_b):
-    _skip_on_cuda_compatability()
+    valid_cuda_versions = [101, 102, 110, 111]
+    skip_on_arch(min_arch=7)
+    skip_on_cuda(valid_cuda=valid_cuda_versions)
+
     Z = 3
     H = 2
     M = 128
@@ -266,6 +258,7 @@ def test_matmul(block, dtype, mode, trans_a, trans_b):
     x, w, dy, shape, layout = init_matmul_inputs(Z, H, M, N, K, rho, mode, trans_a, trans_b, block, dtype, layout=None)
     ref_y, ref_dx, ref_dw = run_matmul_reference(x.clone(), w.clone(), mode, trans_a, trans_b, layout, block, dy)
     st_y, st_dx, st_dw = run_matmul_sparse(x.clone(), w.clone(), mode, trans_a, trans_b, layout, block, dy)
+
     assert allclose(ref_y, st_y)
     assert allclose(ref_dx, st_dx)
     assert allclose(ref_dw, st_dw)
diff --git a/tests/unit/util.py b/tests/unit/util.py
@@ -15,6 +15,18 @@ def skip_on_arch(min_arch=7):
         return
 
 
+def skip_on_cuda(valid_cuda):
+    split_version = lambda x: map(int, x.split('.')[:2])
+    if deepspeed.accelerator.get_accelerator().device_name() == 'cuda':
+        CUDA_MAJOR, CUDA_MINOR = split_version(torch_info['cuda_version'])
+        CUDA_VERSION = (CUDA_MAJOR * 10) + CUDA_MINOR
+        if valid_cuda.count(CUDA_VERSION) == 0:
+            pytest.skip(f"requires cuda versions {valid_cuda}")
+    else:
+        assert deepspeed.accelerator.get_accelerator().device_name() == 'xpu'
+        return
+
+
 def required_torch_version():
     TORCH_MAJOR = int(torch.__version__.split('.')[0])
     TORCH_MINOR = int(torch.__version__.split('.')[1])