Skip to content

Commit d74f38e

Browse files
authored
Merge branch 'release/25.12' into java/rmm-memory-pool-functions
2 parents 6dd2fa7 + 2f424b1 commit d74f38e

File tree

5 files changed

+246
-70
lines changed

5 files changed

+246
-70
lines changed

.github/workflows/pr.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -187,7 +187,7 @@ jobs:
187187
with:
188188
build_type: pull-request
189189
arch: "amd64"
190-
date: ${{ inputs.date }}_c
190+
date: ${{ inputs.date }}
191191
container_image: "rapidsai/ci-wheel:25.12-cuda${{ matrix.cuda_version }}-rockylinux8-py3.10"
192192
node_type: "cpu16"
193193
# requires_license_builder: false
@@ -210,7 +210,7 @@ jobs:
210210
build_type: pull-request
211211
node_type: "gpu-l4-latest-1"
212212
arch: "amd64"
213-
date: ${{ inputs.date }}_c
213+
date: ${{ inputs.date }}
214214
container_image: "rapidsai/ci-wheel:25.12-cuda${{ matrix.cuda_version }}-rockylinux8-py3.10"
215215
script: "ci/test_standalone_c.sh"
216216
sha: ${{ inputs.sha }}

c/src/core/c_api.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -185,7 +185,7 @@ extern "C" cuvsError_t cuvsRMMPoolMemoryResourceEnable(int initial_pool_size_per
185185
extern "C" cuvsError_t cuvsRMMMemoryResourceReset()
186186
{
187187
return cuvs::core::translate_exceptions([=] {
188-
rmm::mr::set_current_device_resource(nullptr);
188+
rmm::mr::set_current_device_resource(rmm::mr::detail::initial_resource());
189189
pool_mr.reset();
190190
});
191191
}

cpp/src/cluster/detail/single_linkage.cuh

Lines changed: 74 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -61,73 +61,80 @@ void build_mr_linkage(
6161
size_t n = X.extent(1);
6262
auto stream = raft::resource::get_cuda_stream(handle);
6363

64-
auto mr_indptr = raft::make_device_vector<value_idx, value_idx>(handle, m + 1);
65-
raft::sparse::COO<value_t, value_idx, nnz_t> mr_coo(stream, min_samples * m * 2);
66-
67-
auto inds = raft::make_device_matrix<value_idx, value_idx>(handle, m, min_samples);
68-
auto dists = raft::make_device_matrix<value_t, value_idx>(handle, m, min_samples);
69-
70-
if (all_neighbors_p.metric != metric) {
71-
RAFT_LOG_WARN("Setting all neighbors metric to given metrix for build_mr_linkage");
72-
all_neighbors_p.metric = metric;
73-
}
74-
cuvs::neighbors::all_neighbors::build(
75-
handle, all_neighbors_p, X, inds.view(), dists.view(), core_dists, alpha);
76-
77-
// self-loops get max distance
78-
auto coo_rows = raft::make_device_vector<value_idx, value_idx>(handle, min_samples * m);
79-
raft::linalg::map_offset(handle, coo_rows.view(), raft::div_const_op<value_idx>(min_samples));
80-
81-
raft::sparse::linalg::symmetrize(handle,
82-
coo_rows.data_handle(),
83-
inds.data_handle(),
84-
dists.data_handle(),
85-
static_cast<value_idx>(m),
86-
static_cast<value_idx>(m),
87-
static_cast<nnz_t>(min_samples * m),
88-
mr_coo);
89-
90-
raft::sparse::convert::sorted_coo_to_csr(
91-
mr_coo.rows(), mr_coo.nnz, mr_indptr.data_handle(), m + 1, stream);
92-
93-
auto rows_view = raft::make_device_vector_view<const value_idx, nnz_t>(mr_coo.rows(), mr_coo.nnz);
94-
auto cols_view = raft::make_device_vector_view<const value_idx, nnz_t>(mr_coo.cols(), mr_coo.nnz);
95-
auto vals_in_view =
96-
raft::make_device_vector_view<const value_t, nnz_t>(mr_coo.vals(), mr_coo.nnz);
97-
auto vals_out_view = raft::make_device_vector_view<value_t, nnz_t>(mr_coo.vals(), mr_coo.nnz);
98-
99-
raft::linalg::map(
100-
handle,
101-
vals_out_view,
102-
[=] __device__(const value_idx row, const value_idx col, const value_t val) {
103-
return row == col ? std::numeric_limits<value_t>::max() : val;
104-
},
105-
rows_view,
106-
cols_view,
107-
vals_in_view);
108-
109-
rmm::device_uvector<value_idx> color(m, raft::resource::get_cuda_stream(handle));
110-
cuvs::sparse::neighbors::MutualReachabilityFixConnectivitiesRedOp<value_idx, value_t>
111-
reduction_op(core_dists.data_handle(), m);
112-
113-
size_t nnz = m * min_samples;
114-
115-
detail::build_sorted_mst<value_idx, value_t>(handle,
116-
X.data_handle(),
117-
mr_indptr.data_handle(),
118-
mr_coo.cols(),
119-
mr_coo.vals(),
120-
m,
121-
n,
122-
out_mst.structure_view().get_rows().data(),
123-
out_mst.structure_view().get_cols().data(),
124-
out_mst.get_elements().data(),
125-
color.data(),
126-
mr_coo.nnz,
127-
reduction_op,
128-
metric,
129-
10);
130-
64+
{ // scope to drop mr_coo and mr_indptr early
65+
std::optional<raft::sparse::COO<value_t, value_idx, nnz_t>> mr_coo;
66+
67+
{ // scope to drop inds and dists matrices early
68+
auto inds = raft::make_device_matrix<value_idx, value_idx>(handle, m, min_samples);
69+
auto dists = raft::make_device_matrix<value_t, value_idx>(handle, m, min_samples);
70+
71+
if (all_neighbors_p.metric != metric) {
72+
RAFT_LOG_WARN("Setting all neighbors metric to given metrix for build_mr_linkage");
73+
all_neighbors_p.metric = metric;
74+
}
75+
cuvs::neighbors::all_neighbors::build(
76+
handle, all_neighbors_p, X, inds.view(), dists.view(), core_dists, alpha);
77+
78+
// allocate memory after all neighbors build
79+
mr_coo.emplace(stream, min_samples * m * 2);
80+
// self-loops get max distance
81+
auto coo_rows = raft::make_device_vector<value_idx, value_idx>(handle, min_samples * m);
82+
raft::linalg::map_offset(handle, coo_rows.view(), raft::div_const_op<value_idx>(min_samples));
83+
84+
raft::sparse::linalg::symmetrize(handle,
85+
coo_rows.data_handle(),
86+
inds.data_handle(),
87+
dists.data_handle(),
88+
static_cast<value_idx>(m),
89+
static_cast<value_idx>(m),
90+
static_cast<nnz_t>(min_samples * m),
91+
mr_coo.value());
92+
} // scope to drop inds and dists matrices early
93+
auto mr_indptr = raft::make_device_vector<value_idx, value_idx>(handle, m + 1);
94+
raft::sparse::convert::sorted_coo_to_csr(
95+
mr_coo.value().rows(), mr_coo.value().nnz, mr_indptr.data_handle(), m + 1, stream);
96+
97+
auto rows_view = raft::make_device_vector_view<const value_idx, nnz_t>(mr_coo.value().rows(),
98+
mr_coo.value().nnz);
99+
auto cols_view = raft::make_device_vector_view<const value_idx, nnz_t>(mr_coo.value().cols(),
100+
mr_coo.value().nnz);
101+
auto vals_in_view = raft::make_device_vector_view<const value_t, nnz_t>(mr_coo.value().vals(),
102+
mr_coo.value().nnz);
103+
auto vals_out_view =
104+
raft::make_device_vector_view<value_t, nnz_t>(mr_coo.value().vals(), mr_coo.value().nnz);
105+
106+
raft::linalg::map(
107+
handle,
108+
vals_out_view,
109+
[=] __device__(const value_idx row, const value_idx col, const value_t val) {
110+
return row == col ? std::numeric_limits<value_t>::max() : val;
111+
},
112+
rows_view,
113+
cols_view,
114+
vals_in_view);
115+
116+
rmm::device_uvector<value_idx> color(m, raft::resource::get_cuda_stream(handle));
117+
cuvs::sparse::neighbors::MutualReachabilityFixConnectivitiesRedOp<value_idx, value_t>
118+
reduction_op(core_dists.data_handle(), m);
119+
120+
size_t nnz = m * min_samples;
121+
122+
detail::build_sorted_mst<value_idx, value_t>(handle,
123+
X.data_handle(),
124+
mr_indptr.data_handle(),
125+
mr_coo.value().cols(),
126+
mr_coo.value().vals(),
127+
m,
128+
n,
129+
out_mst.structure_view().get_rows().data(),
130+
out_mst.structure_view().get_cols().data(),
131+
out_mst.get_elements().data(),
132+
color.data(),
133+
mr_coo.value().nnz,
134+
reduction_op,
135+
metric,
136+
10);
137+
} // scope to drop mr_coo and mr_indptr early
131138
/**
132139
* Perform hierarchical labeling
133140
*/

examples/c/CMakeLists.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,3 +45,7 @@ target_link_libraries(IVF_FLAT_C_EXAMPLE PRIVATE cuvs::c_api $<TARGET_NAME_IF_EX
4545
add_executable(IVF_PQ_C_EXAMPLE src/ivf_pq_c_example.c)
4646
target_include_directories(IVF_PQ_C_EXAMPLE PUBLIC "$<BUILD_INTERFACE:${DLPACK_INCLUDE_DIR}>")
4747
target_link_libraries(IVF_PQ_C_EXAMPLE PRIVATE cuvs::c_api $<TARGET_NAME_IF_EXISTS:conda_env>)
48+
49+
add_executable(BRUTEFORCE_C_EXAMPLE src/bruteforce_c_example.c)
50+
target_include_directories(BRUTEFORCE_C_EXAMPLE PUBLIC "$<BUILD_INTERFACE:${DLPACK_INCLUDE_DIR}>")
51+
target_link_libraries(BRUTEFORCE_C_EXAMPLE PRIVATE cuvs::c_api $<TARGET_NAME_IF_EXISTS:conda_env>)
Lines changed: 165 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,165 @@
1+
/*
2+
* SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
3+
* SPDX-License-Identifier: Apache-2.0
4+
*/
5+
6+
#include <cuvs/neighbors/brute_force.h>
7+
#include <stdint.h>
8+
9+
static const char dataset[] = {0.74021935f,
10+
0.9209938f,
11+
0.03902049f,
12+
0.9689629f,
13+
0.92514056f,
14+
0.4463501f,
15+
0.6673192f,
16+
0.10993068f};
17+
18+
static const char queries[] = {0.48216683f,
19+
0.0428398f,
20+
0.5084142f,
21+
0.6545497f,
22+
0.51260436f,
23+
0.2643005f,
24+
0.05198065f,
25+
0.5789965f};
26+
27+
void index_and_search()
28+
{
29+
int64_t n_rows = 4;
30+
int64_t n_queries = 4;
31+
int64_t n_dim = 2;
32+
uint32_t n_neighbors = 2;
33+
34+
float* index_data;
35+
float* query_data;
36+
37+
long indexBytes = sizeof(float) * n_rows * n_dim;
38+
long queriesBytes = sizeof(float) * n_queries * n_dim;
39+
long neighborsBytes = sizeof(long) * n_queries * n_neighbors;
40+
long distanceBytes = sizeof(float) * n_queries * n_neighbors;
41+
42+
uint32_t* prefilter_data = NULL;
43+
enum cuvsFilterType prefilter_type = NO_FILTER;
44+
45+
float* distances_data;
46+
int64_t* neighbors_data;
47+
48+
// create cuvsResources_t
49+
cuvsResources_t res;
50+
cuvsResourcesCreate(&res);
51+
52+
cuvsRMMAlloc(res, (void**)&index_data, indexBytes);
53+
cuvsRMMAlloc(res, (void**)&query_data, queriesBytes);
54+
cuvsRMMAlloc(res, (void**)&distances_data, distanceBytes);
55+
cuvsRMMAlloc(res, (void**)&neighbors_data, neighborsBytes);
56+
57+
cudaMemcpy(index_data, dataset, indexBytes, cudaMemcpyHostToDevice);
58+
cudaMemcpy(query_data, queries, queriesBytes, cudaMemcpyHostToDevice);
59+
60+
// create dataset DLTensor
61+
DLManagedTensor dataset_tensor;
62+
dataset_tensor.dl_tensor.data = index_data;
63+
dataset_tensor.dl_tensor.device.device_type = kDLCUDA;
64+
dataset_tensor.dl_tensor.ndim = 2;
65+
dataset_tensor.dl_tensor.dtype.code = kDLFloat;
66+
dataset_tensor.dl_tensor.dtype.bits = 32;
67+
dataset_tensor.dl_tensor.dtype.lanes = 1;
68+
int64_t dataset_shape[2] = {n_rows, n_dim};
69+
dataset_tensor.dl_tensor.shape = dataset_shape;
70+
dataset_tensor.dl_tensor.strides = NULL;
71+
72+
// create index
73+
cuvsBruteForceIndex_t index;
74+
cuvsBruteForceIndexCreate(&index);
75+
76+
// build index
77+
cuvsBruteForceBuild(res, &dataset_tensor, 0, 0.0f, index);
78+
79+
// create queries DLTensor
80+
DLManagedTensor queries_tensor;
81+
queries_tensor.dl_tensor.data = (void*)query_data;
82+
queries_tensor.dl_tensor.device.device_type = kDLCUDA;
83+
queries_tensor.dl_tensor.ndim = 2;
84+
queries_tensor.dl_tensor.dtype.code = kDLFloat;
85+
queries_tensor.dl_tensor.dtype.bits = 32;
86+
queries_tensor.dl_tensor.dtype.lanes = 1;
87+
int64_t queries_shape[2] = {n_queries, n_dim};
88+
queries_tensor.dl_tensor.shape = queries_shape;
89+
queries_tensor.dl_tensor.strides = NULL;
90+
91+
// create neighbors DLTensor
92+
DLManagedTensor neighbors_tensor;
93+
neighbors_tensor.dl_tensor.data = (void*)neighbors_data;
94+
neighbors_tensor.dl_tensor.device.device_type = kDLCUDA;
95+
neighbors_tensor.dl_tensor.ndim = 2;
96+
neighbors_tensor.dl_tensor.dtype.code = kDLInt;
97+
neighbors_tensor.dl_tensor.dtype.bits = 64;
98+
neighbors_tensor.dl_tensor.dtype.lanes = 1;
99+
int64_t neighbors_shape[2] = {n_queries, n_neighbors};
100+
neighbors_tensor.dl_tensor.shape = neighbors_shape;
101+
neighbors_tensor.dl_tensor.strides = NULL;
102+
103+
// create distances DLTensor
104+
DLManagedTensor distances_tensor;
105+
distances_tensor.dl_tensor.data = (void*)distances_data;
106+
distances_tensor.dl_tensor.device.device_type = kDLCUDA;
107+
distances_tensor.dl_tensor.ndim = 2;
108+
distances_tensor.dl_tensor.dtype.code = kDLFloat;
109+
distances_tensor.dl_tensor.dtype.bits = 32;
110+
distances_tensor.dl_tensor.dtype.lanes = 1;
111+
int64_t distances_shape[2] = {n_queries, n_neighbors};
112+
distances_tensor.dl_tensor.shape = distances_shape;
113+
distances_tensor.dl_tensor.strides = NULL;
114+
115+
cuvsFilter prefilter;
116+
117+
DLManagedTensor prefilter_tensor;
118+
if (prefilter_data == NULL || prefilter_type == NO_FILTER) {
119+
prefilter.type = NO_FILTER;
120+
prefilter.addr = (uintptr_t)NULL;
121+
} else {
122+
prefilter_tensor.dl_tensor.data = (void*)prefilter_data;
123+
prefilter_tensor.dl_tensor.device.device_type = kDLCUDA;
124+
prefilter_tensor.dl_tensor.ndim = 1;
125+
prefilter_tensor.dl_tensor.dtype.code = kDLUInt;
126+
prefilter_tensor.dl_tensor.dtype.bits = 32;
127+
prefilter_tensor.dl_tensor.dtype.lanes = 1;
128+
129+
int64_t prefilter_bits_num = (prefilter_type == BITMAP) ? n_queries * n_rows : n_rows;
130+
int64_t prefilter_shape[1] = {(prefilter_bits_num + 31) / 32};
131+
132+
prefilter_tensor.dl_tensor.shape = prefilter_shape;
133+
prefilter_tensor.dl_tensor.strides = NULL;
134+
135+
prefilter.type = prefilter_type;
136+
prefilter.addr = (uintptr_t)&prefilter_tensor;
137+
}
138+
139+
// search index
140+
cuvsBruteForceSearch(
141+
res, index, &queries_tensor, &neighbors_tensor, &distances_tensor, prefilter);
142+
143+
// de-allocate index and res
144+
cuvsBruteForceIndexDestroy(index);
145+
146+
cuvsRMMFree(res, index_data, indexBytes);
147+
cuvsRMMFree(res, query_data, queriesBytes);
148+
cuvsRMMFree(res, distances_data, distanceBytes);
149+
cuvsRMMFree(res, neighbors_data, neighborsBytes);
150+
151+
cuvsResourcesDestroy(res);
152+
}
153+
154+
int main()
155+
{
156+
// Perform indexing and search with pooled resources
157+
cuvsRMMPoolMemoryResourceEnable(10, 60, false);
158+
index_and_search();
159+
160+
// Perform indexing and search with the default memory resources
161+
cuvsRMMMemoryResourceReset();
162+
index_and_search();
163+
164+
return 0;
165+
}

0 commit comments

Comments
 (0)