rapidsai · rapids-bot · Jul 1, 2021 · May 12, 2021 · May 31, 2021 · Jun 1, 2021
@@ -16,10 +16,12 @@
 
 import pytest
 import os
+import subprocess
 
 import numpy as np
 import cupy as cp
 
+from math import ceil
 from sklearn.datasets import fetch_20newsgroups
 from sklearn.datasets import fetch_california_housing
 from sklearn.datasets import make_classification as skl_make_clas
@@ -30,6 +32,9 @@
 
 def pytest_configure(config):
     cp.cuda.set_allocator(None)
+    # max_gpu_memory: Capacity of the GPU memory in GB
+    pytest.max_gpu_memory = get_gpu_memory()
+    pytest.adapt_stress_test = 'CUML_ADAPT_STRESS_TESTS' in os.environ
 
 
 @pytest.fixture(scope="module")
@@ -146,3 +151,20 @@ def exact_shap_classification_dataset():
                                     test_size=3,
                                     random_state_generator=42,
                                     random_state_train_test_split=42)
+
+
+def get_gpu_memory():
+    bash_command = "nvidia-smi --query-gpu=memory.total --format=csv"
+    output = subprocess.check_output(bash_command,
+                                     shell=True).decode("utf-8")
+    lines = output.split("\n")
+    lines.pop(0)
+    gpus_memory = []
+    for line in lines:
+        tokens = line.split(" ")
+        if len(tokens) > 1:
+            gpus_memory.append(int(tokens[0]))
+    gpus_memory.sort()
+    max_gpu_memory = ceil(gpus_memory[-1] / 1024)
+
+    return max_gpu_memory
@@ -1,4 +1,4 @@
-# Copyright (c) 2019, NVIDIA CORPORATION.
+# Copyright (c) 2019-2021, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -30,6 +30,14 @@
 def test_pca_fit(data_info, input_type, client):
 
     nrows, ncols, n_parts = data_info
+    if nrows == int(9e6) and pytest.max_gpu_memory < 48:
+        if pytest.adapt_stress_test:
+            nrows = nrows * pytest.max_gpu_memory // 256
+            ncols = ncols * pytest.max_gpu_memory // 256
+        else:
+            pytest.skip("Insufficient GPU memory for this test."
+                        "Re-run with 'CUML_ADAPT_STRESS_TESTS=True'")
+
     from cuml.dask.decomposition import TruncatedSVD as daskTPCA
     from sklearn.decomposition import TruncatedSVD
 

@@ -41,6 +41,13 @@
                                        stress_param("int32")])
 def test_dbscan(datatype, use_handle, nrows, ncols,
                 max_mbytes_per_batch, out_dtype):
+    if nrows == 500000 and pytest.max_gpu_memory < 32:
+        if pytest.adapt_stress_test:
+            nrows = nrows * pytest.max_gpu_memory // 32
+        else:
+            pytest.skip("Insufficient GPU memory for this test. "
+                        "Re-run with 'CUML_ADAPT_STRESS_TESTS=True'")
+
     n_samples = nrows
     n_feats = ncols
     X, y = make_blobs(n_samples=n_samples, cluster_std=0.01,
@@ -117,6 +124,13 @@ def test_dbscan_precomputed(datatype, nrows, max_mbytes_per_batch, out_dtype):
 # Vary the eps to get a range of core point counts
 @pytest.mark.parametrize('eps', [0.05, 0.1, 0.5])
 def test_dbscan_sklearn_comparison(name, nrows, eps):
+    if nrows == 500000 and name == 'blobs' and pytest.max_gpu_memory < 32:
+        if pytest.adapt_stress_test:
+            nrows = nrows * pytest.max_gpu_memory // 32
+        else:
+            pytest.skip("Insufficient GPU memory for this test."
+                        "Re-run with 'CUML_ADAPT_STRESS_TESTS=True'")
+
     default_base = {'quantile': .2,
                     'eps': eps,
                     'damping': .9,

@@ -1,4 +1,4 @@
-# Copyright (c) 2020, NVIDIA CORPORATION.
+# Copyright (c) 2020-2021, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -21,7 +21,7 @@
     array_equal,
     unit_param,
     quality_param,
-    stress_param,
+    stress_param
 )
 
 from sklearn.datasets import load_boston
@@ -115,6 +115,12 @@ def test_lars_model(datatype, nrows, column_info, precompute, normalize):
 @pytest.mark.parametrize("precompute", [True, False])
 def test_lars_collinear(datatype, nrows, column_info, precompute):
     ncols, n_info = column_info
+    if nrows == 500000 and ncols == 1000 and pytest.max_gpu_memory < 32:
+        if pytest.adapt_stress_test:
+            nrows = nrows * pytest.max_gpu_memory // 32
+        else:
+            pytest.skip("Insufficient GPU memory for this test."
+                        "Re-run with 'CUML_ADAPT_STRESS_TESTS=True'")
 
     X_train, X_test, y_train, y_test = make_regression_dataset(
         datatype, nrows, ncols, n_info

@@ -1,4 +1,4 @@
-# Copyright (c) 2019, NVIDIA CORPORATION.
+# Copyright (c) 2019-2021, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -37,6 +37,13 @@
         '500000-1000-500-f32', '500000-1000-500-f64'])
 def make_dataset(request):
     nrows, ncols, n_info, datatype = request.param
+    if nrows == 500000 and datatype == np.float64 and \
+            pytest.max_gpu_memory < 32:
+        if pytest.adapt_stress_test:
+            nrows = nrows * pytest.max_gpu_memory // 32
+        else:
+            pytest.skip("Insufficient GPU memory for this test."
+                        "Re-run with 'CUML_ADAPT_STRESS_TESTS=True'")
     X, y = make_regression(n_samples=nrows, n_informative=n_info,
                            n_features=ncols, random_state=0)
     X = cp.array(X).astype(datatype)

@@ -113,9 +113,16 @@ def test_pca_defaults(n_samples, n_features, sparse):
                          stress_param('blobs')])
 def test_pca_fit_then_transform(datatype, input_type,
                                 name, use_handle):
+    blobs_n_samples = 500000
+    if name == 'blobs' and pytest.max_gpu_memory < 32:
+        if pytest.adapt_stress_test:
+            blobs_n_samples = int(blobs_n_samples * pytest.max_gpu_memory / 32)
+        else:
+            pytest.skip("Insufficient GPU memory for this test."
+                        "Re-run with 'CUML_ADAPT_STRESS_TESTS=True'")
 
     if name == 'blobs':
-        X, y = make_blobs(n_samples=500000,
+        X, y = make_blobs(n_samples=blobs_n_samples,
                           n_features=1000, random_state=0)
 
     elif name == 'iris':
@@ -154,9 +161,17 @@ def test_pca_fit_then_transform(datatype, input_type,
                          stress_param('blobs')])
 def test_pca_fit_transform(datatype, input_type,
                            name, use_handle):
+    blobs_n_samples = 500000
+
+    if name == 'blobs' and pytest.max_gpu_memory < 32:
+        if pytest.adapt_stress_test:
+            blobs_n_samples = int(blobs_n_samples * pytest.max_gpu_memory / 32)
+        else:
+            pytest.skip("Insufficient GPU memory for this test."
+                        "Re-run with 'CUML_ADAPT_STRESS_TESTS=True'")
 
     if name == 'blobs':
-        X, y = make_blobs(n_samples=500000,
+        X, y = make_blobs(n_samples=blobs_n_samples,
                           n_features=1000, random_state=0)
 
     elif name == 'iris':
@@ -223,6 +238,12 @@ def test_pca_inverse_transform(datatype, input_type,
 @pytest.mark.parametrize('return_sparse', [True, False])
 @pytest.mark.parametrize('cupy_input', [True, False])
 def test_sparse_pca_inputs(nrows, ncols, whiten, return_sparse, cupy_input):
+    if ncols == 20000 and pytest.max_gpu_memory < 48:
+        if pytest.adapt_stress_test:
+            ncols = int(ncols * pytest.max_gpu_memory / 48)
+        else:
+            pytest.skip("Insufficient GPU memory for this test."
+                        "Re-run with 'CUML_ADAPT_STRESS_TESTS=True'")
 
     if return_sparse:
         pytest.skip("Loss of information in converting to cupy sparse csr")

@@ -200,6 +200,16 @@ def assert_model(pickled_model, X_test):
                                        stress_param([500000, 1000, 500])])
 @pytest.mark.parametrize('fit_intercept', [True, False])
 def test_regressor_pickle(tmpdir, datatype, keys, data_size, fit_intercept):
+    if data_size[0] == 500000 and datatype == np.float64 and \
+            ("LogisticRegression" in keys or "Ridge" in keys) and \
+            pytest.max_gpu_memory < 32:
+        if pytest.adapt_stress_test:
+            data_size[0] = data_size[0] * pytest.max_gpu_memory // 640
+            data_size[1] = data_size[1] * pytest.max_gpu_memory // 640
+            data_size[2] = data_size[2] * pytest.max_gpu_memory // 640
+        else:
+            pytest.skip("Insufficient GPU memory for this test."
+                        "Re-run with 'CUML_ADAPT_STRESS_TESTS=True'")
     result = {}
 
     def create_mod():
@@ -384,6 +394,14 @@ def test_unfit_clone(model_name):
 @pytest.mark.parametrize('data_info', [unit_param([500, 20, 10, 5]),
                                        stress_param([500000, 1000, 500, 50])])
 def test_neighbors_pickle(tmpdir, datatype, keys, data_info):
+    if data_info[0] == 500000 and pytest.max_gpu_memory < 32 and \
+            ("KNeighborsClassifier" in keys or "KNeighborsRegressor" in keys):
+        if pytest.adapt_stress_test:
+            data_info[0] = data_info[0] * pytest.max_gpu_memory // 32
+        else:
+            pytest.skip("Insufficient GPU memory for this test."
+                        "Re-run with 'CUML_ADAPT_STRESS_TESTS=True'")
+
     result = {}
 
     def create_mod():
@@ -414,6 +432,13 @@ def assert_model(pickled_model, X_test):
                                                      50])])
 @pytest.mark.parametrize('keys', k_neighbors_models.keys())
 def test_k_neighbors_classifier_pickle(tmpdir, datatype, data_info, keys):
+    if data_info[0] == 500000 and "NearestNeighbors" in keys and \
+            pytest.max_gpu_memory < 32:
+        if pytest.adapt_stress_test:
+            data_info[0] = data_info[0] * pytest.max_gpu_memory // 32
+        else:
+            pytest.skip("Insufficient GPU memory for this test."
+                        "Re-run with 'CUML_ADAPT_STRESS_TESTS=True'")
     result = {}
 
     def create_mod():
@@ -476,6 +501,12 @@ def assert_model(loaded_model, X):
 @pytest.mark.parametrize('data_size', [unit_param([500, 20, 10]),
                                        stress_param([500000, 1000, 500])])
 def test_dbscan_pickle(tmpdir, datatype, keys, data_size):
+    if data_size[0] == 500000 and pytest.max_gpu_memory < 32:
+        if pytest.adapt_stress_test:
+            data_size[0] = data_size[0] * pytest.max_gpu_memory // 32
+        else:
+            pytest.skip("Insufficient GPU memory for this test."
+                        "Re-run with 'CUML_ADAPT_STRESS_TESTS=True'")
     result = {}
 
     def create_mod():