Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Reduce dependencies on numba. #1761

Merged
merged 9 commits into from
Dec 20, 2024
Prev Previous commit
Next Next commit
Revert test changes (keep Numba as a hard dependency in tests).
bdice committed Dec 17, 2024
commit 6729399ee37f208bc08987e0b480b59ed19a4896
11 changes: 0 additions & 11 deletions python/rmm/rmm/tests/conftest.py
Original file line number Diff line number Diff line change
@@ -12,23 +12,12 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import functools

import pytest
from cuda.bindings import runtime

import rmm
import rmm.statistics


@functools.cache
def system_memory_supported():
return rmm._cuda.gpu.getDeviceAttribute(
runtime.cudaDeviceAttr.cudaDevAttrPageableMemoryAccess,
rmm._cuda.gpu.getDevice(),
)


@pytest.fixture(scope="function", autouse=True)
def rmm_auto_reinitialize():
# Run the test
118 changes: 81 additions & 37 deletions python/rmm/rmm/tests/test_rmm.py
Original file line number Diff line number Diff line change
@@ -22,23 +22,30 @@

import numpy as np
import pytest
from conftest import system_memory_supported
from cuda.bindings import runtime
from numba import cuda

import rmm
import rmm._cuda.stream
from rmm.allocators.cupy import rmm_cupy_allocator
from rmm.allocators.numba import RMMNumbaManager
from rmm.pylibrmm.logger import level_enum

cuda.set_memory_manager(RMMNumbaManager)

def array_tester(dtype, nelem):
cuda = pytest.importorskip("numba.cuda")
_SYSTEM_MEMORY_SUPPORTED = rmm._cuda.gpu.getDeviceAttribute(
runtime.cudaDeviceAttr.cudaDevAttrPageableMemoryAccess,
rmm._cuda.gpu.getDevice(),
)


def array_tester(dtype, nelem, alloc):
# data
h_in = np.full(nelem, 3.2, dtype)
h_result = np.empty(nelem, dtype)

d_in = cuda.to_device(h_in)
d_result = cuda.device_array_like(d_in)
d_in = alloc.to_device(h_in)
d_result = alloc.device_array_like(d_in)

d_result.copy_to_device(d_in)
h_result = d_result.copy_to_host()
@@ -56,44 +63,48 @@ def array_tester(dtype, nelem):
np.bool_,
]
_nelems = [1, 2, 7, 8, 9, 32, 128]
_allocs = [cuda]


@pytest.mark.parametrize("dtype", _dtypes)
@pytest.mark.parametrize("nelem", _nelems)
def test_rmm_alloc(dtype, nelem):
array_tester(dtype, nelem)
@pytest.mark.parametrize("alloc", _allocs)
def test_rmm_alloc(dtype, nelem, alloc):
array_tester(dtype, nelem, alloc)


# Test all combinations of default/managed and pooled/non-pooled allocation
@pytest.mark.parametrize("dtype", _dtypes)
@pytest.mark.parametrize("nelem", _nelems)
@pytest.mark.parametrize("alloc", _allocs)
@pytest.mark.parametrize(
"managed, pool", list(product([False, True], [False, True]))
)
def test_rmm_modes(dtype, nelem, managed, pool):
def test_rmm_modes(dtype, nelem, alloc, managed, pool):
assert rmm.is_initialized()
array_tester(dtype, nelem)
array_tester(dtype, nelem, alloc)

rmm.reinitialize(pool_allocator=pool, managed_memory=managed)

assert rmm.is_initialized()

array_tester(dtype, nelem)
array_tester(dtype, nelem, alloc)


@pytest.mark.skipif(
not system_memory_supported(),
not _SYSTEM_MEMORY_SUPPORTED,
reason="System memory not supported",
)
@pytest.mark.parametrize("dtype", _dtypes)
@pytest.mark.parametrize("nelem", _nelems)
@pytest.mark.parametrize("alloc", _allocs)
@pytest.mark.parametrize(
"system, pool, headroom",
list(product([False, True], [False, True], [False, True])),
)
def test_rmm_modes_system_memory(dtype, nelem, system, pool, headroom):
def test_rmm_modes_system_memory(dtype, nelem, alloc, system, pool, headroom):
assert rmm.is_initialized()
array_tester(dtype, nelem)
array_tester(dtype, nelem, alloc)

if system:
if headroom:
@@ -110,17 +121,18 @@ def test_rmm_modes_system_memory(dtype, nelem, system, pool, headroom):

assert rmm.is_initialized()

array_tester(dtype, nelem)
array_tester(dtype, nelem, alloc)


@pytest.mark.parametrize("dtype", _dtypes)
@pytest.mark.parametrize("nelem", _nelems)
def test_rmm_csv_log(dtype, nelem, tmpdir):
@pytest.mark.parametrize("alloc", _allocs)
def test_rmm_csv_log(dtype, nelem, alloc, tmpdir):
suffix = ".csv"

base_name = str(tmpdir.join("rmm_log.csv"))
rmm.reinitialize(logging=True, log_file_name=base_name)
array_tester(dtype, nelem)
array_tester(dtype, nelem, alloc)
rmm.mr._flush_logs()

# Need to open separately because the device ID is appended to filename
@@ -267,8 +279,15 @@ def test_rmm_device_buffer_copy_from_host(hb):
np.testing.assert_equal(expected, result)


def test_rmm_device_buffer_copy_from_device():
cuda_ary = rmm.DeviceBuffer.to_device(b"abc")
@pytest.mark.parametrize(
"cuda_ary",
[
lambda: rmm.DeviceBuffer.to_device(b"abc"),
lambda: cuda.to_device(np.array([97, 98, 99], dtype="u1")),
],
)
def test_rmm_device_buffer_copy_from_device(cuda_ary):
cuda_ary = cuda_ary()
db = rmm.DeviceBuffer.to_device(np.zeros(10, dtype="u1"))
db.copy_from_device(cuda_ary)

@@ -325,6 +344,17 @@ def test_rmm_device_buffer_prefetch(pool, managed):
assert_prefetched(db, device)


@pytest.mark.parametrize("stream", [cuda.default_stream(), cuda.stream()])
def test_rmm_pool_numba_stream(stream):
rmm.reinitialize(pool_allocator=True)

stream = rmm._cuda.stream.Stream(stream)
a = rmm.pylibrmm.device_buffer.DeviceBuffer(size=3, stream=stream)

assert a.size == 3
assert a.ptr != 0


def test_rmm_cupy_allocator():
cupy = pytest.importorskip("cupy")

@@ -393,19 +423,21 @@ def test_rmm_pool_cupy_allocator_stream_lifetime():

@pytest.mark.parametrize("dtype", _dtypes)
@pytest.mark.parametrize("nelem", _nelems)
def test_pool_memory_resource(dtype, nelem):
@pytest.mark.parametrize("alloc", _allocs)
def test_pool_memory_resource(dtype, nelem, alloc):
mr = rmm.mr.PoolMemoryResource(
rmm.mr.CudaMemoryResource(),
initial_pool_size="4MiB",
maximum_pool_size="8MiB",
)
rmm.mr.set_current_device_resource(mr)
assert rmm.mr.get_current_device_resource_type() is type(mr)
array_tester(dtype, nelem)
array_tester(dtype, nelem, alloc)


@pytest.mark.parametrize("dtype", _dtypes)
@pytest.mark.parametrize("nelem", _nelems)
@pytest.mark.parametrize("alloc", _allocs)
@pytest.mark.parametrize(
"upstream",
[
@@ -417,21 +449,22 @@ def test_pool_memory_resource(dtype, nelem):
lambda: rmm.mr.SystemMemoryResource(),
lambda: rmm.mr.SamHeadroomMemoryResource(headroom=1 << 20),
]
if system_memory_supported()
if _SYSTEM_MEMORY_SUPPORTED
else []
),
)
def test_fixed_size_memory_resource(dtype, nelem, upstream):
def test_fixed_size_memory_resource(dtype, nelem, alloc, upstream):
mr = rmm.mr.FixedSizeMemoryResource(
upstream(), block_size=1 << 20, blocks_to_preallocate=128
)
rmm.mr.set_current_device_resource(mr)
assert rmm.mr.get_current_device_resource_type() is type(mr)
array_tester(dtype, nelem)
array_tester(dtype, nelem, alloc)


@pytest.mark.parametrize("dtype", _dtypes)
@pytest.mark.parametrize("nelem", _nelems)
@pytest.mark.parametrize("alloc", _allocs)
@pytest.mark.parametrize(
"upstream_mr",
[
@@ -446,11 +479,11 @@ def test_fixed_size_memory_resource(dtype, nelem, upstream):
lambda: rmm.mr.SystemMemoryResource(),
lambda: rmm.mr.SamHeadroomMemoryResource(headroom=1 << 20),
]
if system_memory_supported()
if _SYSTEM_MEMORY_SUPPORTED
else []
),
)
def test_binning_memory_resource(dtype, nelem, upstream_mr):
def test_binning_memory_resource(dtype, nelem, alloc, upstream_mr):
upstream = upstream_mr()

# Add fixed-size bins 256KiB, 512KiB, 1MiB, 2MiB, 4MiB
@@ -464,11 +497,12 @@ def test_binning_memory_resource(dtype, nelem, upstream_mr):

rmm.mr.set_current_device_resource(mr)
assert rmm.mr.get_current_device_resource_type() is type(mr)
array_tester(dtype, nelem)
array_tester(dtype, nelem, alloc)


@pytest.mark.parametrize("dtype", _dtypes)
@pytest.mark.parametrize("nelem", _nelems)
@pytest.mark.parametrize("alloc", _allocs)
@pytest.mark.parametrize(
"upstream_mr",
[
@@ -479,13 +513,13 @@ def test_binning_memory_resource(dtype, nelem, upstream_mr):
),
],
)
def test_arena_memory_resource(dtype, nelem, upstream_mr):
def test_arena_memory_resource(dtype, nelem, alloc, upstream_mr):
upstream = upstream_mr()
mr = rmm.mr.ArenaMemoryResource(upstream)

rmm.mr.set_current_device_resource(mr)
assert rmm.mr.get_current_device_resource_type() is type(mr)
array_tester(dtype, nelem)
array_tester(dtype, nelem, alloc)


def test_reinitialize_max_pool_size():
@@ -533,14 +567,15 @@ def test_reinitialize_with_invalid_str_arg_pool_size():

@pytest.mark.parametrize("dtype", _dtypes)
@pytest.mark.parametrize("nelem", _nelems)
def test_rmm_enable_disable_logging(dtype, nelem, tmpdir):
@pytest.mark.parametrize("alloc", _allocs)
def test_rmm_enable_disable_logging(dtype, nelem, alloc, tmpdir):
suffix = ".csv"

base_name = str(tmpdir.join("rmm_log.csv"))

rmm.enable_logging(log_file_name=base_name)
print(rmm.mr.get_per_device_resource(0))
array_tester(dtype, nelem)
array_tester(dtype, nelem, alloc)
rmm.mr._flush_logs()

# Need to open separately because the device ID is appended to filename
@@ -619,11 +654,12 @@ def test_mr_upstream_lifetime():

@pytest.mark.parametrize("dtype", _dtypes)
@pytest.mark.parametrize("nelem", _nelems)
def test_cuda_async_memory_resource(dtype, nelem):
@pytest.mark.parametrize("alloc", _allocs)
def test_cuda_async_memory_resource(dtype, nelem, alloc):
mr = rmm.mr.CudaAsyncMemoryResource()
rmm.mr.set_current_device_resource(mr)
assert rmm.mr.get_current_device_resource_type() is type(mr)
array_tester(dtype, nelem)
array_tester(dtype, nelem, alloc)


def test_cuda_async_memory_resource_ipc():
@@ -667,14 +703,15 @@ def test_cuda_async_memory_resource_stream(nelems):


@pytest.mark.parametrize("nelem", _nelems)
def test_cuda_async_memory_resource_threshold(nelem):
@pytest.mark.parametrize("alloc", _allocs)
def test_cuda_async_memory_resource_threshold(nelem, alloc):
# initial pool size == 0
mr = rmm.mr.CudaAsyncMemoryResource(
initial_pool_size=0, release_threshold=nelem
)
rmm.mr.set_current_device_resource(mr)
array_tester("u1", nelem) # should not trigger release
array_tester("u1", 2 * nelem) # should trigger release
array_tester("u1", nelem, alloc) # should not trigger release
array_tester("u1", 2 * nelem, alloc) # should trigger release


@pytest.mark.parametrize(
@@ -971,11 +1008,18 @@ def func_with_arg(x):
assert L == [2]


@pytest.mark.parametrize(
"cuda_ary",
[
lambda: rmm.DeviceBuffer.to_device(b"abc"),
lambda: cuda.to_device(np.array([97, 98, 99, 0, 0], dtype="u1")),
],
)
@pytest.mark.parametrize(
"make_copy", [lambda db: db.copy(), lambda db: copy.copy(db)]
)
def test_rmm_device_buffer_copy(make_copy):
cuda_ary = rmm.DeviceBuffer.to_device(b"abc")
def test_rmm_device_buffer_copy(cuda_ary, make_copy):
cuda_ary = cuda_ary()
db = rmm.DeviceBuffer.to_device(np.zeros(5, dtype="u1"))
db.copy_from_device(cuda_ary)
db_copy = make_copy(db)
Loading