2525#include < vector>
2626
2727#include " arrow/buffer.h"
28+ #include " arrow/sparse_tensor.h"
2829#include " arrow/tensor.h"
2930#include " arrow/type.h"
31+ #include " arrow/util/logging.h"
3032
3133#include " arrow/python/common.h"
3234#include " arrow/python/pyarrow.h"
@@ -186,7 +188,9 @@ Status NumPyDtypeToArrow(PyArray_Descr* descr, std::shared_ptr<DataType>* out) {
186188
187189#undef TO_ARROW_TYPE_CASE
188190
189- Status NdarrayToTensor (MemoryPool* pool, PyObject* ao, std::shared_ptr<Tensor>* out) {
191+ Status NdarrayToTensor (MemoryPool* pool, PyObject* ao,
192+ const std::vector<std::string>& dim_names,
193+ std::shared_ptr<Tensor>* out) {
190194 if (!PyArray_Check (ao)) {
191195 return Status::TypeError (" Did not pass ndarray object" );
192196 }
@@ -197,35 +201,29 @@ Status NdarrayToTensor(MemoryPool* pool, PyObject* ao, std::shared_ptr<Tensor>*
197201
198202 int ndim = PyArray_NDIM (ndarray);
199203
200- // This is also holding the GIL, so don't already draw it.
201204 std::shared_ptr<Buffer> data = std::make_shared<NumPyBuffer>(ao);
202205 std::vector<int64_t > shape (ndim);
203206 std::vector<int64_t > strides (ndim);
204207
205- {
206- PyAcquireGIL lock;
207- npy_intp* array_strides = PyArray_STRIDES (ndarray);
208- npy_intp* array_shape = PyArray_SHAPE (ndarray);
209- for (int i = 0 ; i < ndim; ++i) {
210- if (array_strides[i] < 0 ) {
211- return Status::Invalid (" Negative ndarray strides not supported" );
212- }
213- shape[i] = array_shape[i];
214- strides[i] = array_strides[i];
208+ npy_intp* array_strides = PyArray_STRIDES (ndarray);
209+ npy_intp* array_shape = PyArray_SHAPE (ndarray);
210+ for (int i = 0 ; i < ndim; ++i) {
211+ if (array_strides[i] < 0 ) {
212+ return Status::Invalid (" Negative ndarray strides not supported" );
215213 }
216-
217- std::shared_ptr<DataType> type;
218- RETURN_NOT_OK (
219- GetTensorType (reinterpret_cast <PyObject*>(PyArray_DESCR (ndarray)), &type));
220- *out = std::make_shared<Tensor>(type, data, shape, strides);
221- return Status::OK ();
214+ shape[i] = array_shape[i];
215+ strides[i] = array_strides[i];
222216 }
217+
218+ std::shared_ptr<DataType> type;
219+ RETURN_NOT_OK (
220+ GetTensorType (reinterpret_cast <PyObject*>(PyArray_DESCR (ndarray)), &type));
221+ *out = std::make_shared<Tensor>(type, data, shape, strides, dim_names);
222+ return Status::OK ();
223223}
224224
225225Status TensorToNdarray (const std::shared_ptr<Tensor>& tensor, PyObject* base,
226226 PyObject** out) {
227- PyAcquireGIL lock;
228-
229227 int type_num;
230228 RETURN_NOT_OK (GetNumPyType (*tensor->type (), &type_num));
231229 PyArray_Descr* dtype = PyArray_DescrNewFromType (type_num);
@@ -274,5 +272,140 @@ Status TensorToNdarray(const std::shared_ptr<Tensor>& tensor, PyObject* base,
274272 return Status::OK ();
275273}
276274
275+ // Wrap the dense data of a sparse tensor in a ndarray
276+ static Status SparseTensorDataToNdarray (const SparseTensor& sparse_tensor,
277+ std::vector<npy_intp> data_shape, PyObject* base,
278+ PyObject** out_data) {
279+ int type_num_data;
280+ RETURN_NOT_OK (GetNumPyType (*sparse_tensor.type (), &type_num_data));
281+ PyArray_Descr* dtype_data = PyArray_DescrNewFromType (type_num_data);
282+ RETURN_IF_PYERROR ();
283+
284+ const void * immutable_data = sparse_tensor.data ()->data ();
285+ // Remove const =(
286+ void * mutable_data = const_cast <void *>(immutable_data);
287+ int array_flags = NPY_ARRAY_C_CONTIGUOUS | NPY_ARRAY_F_CONTIGUOUS;
288+ if (sparse_tensor.is_mutable ()) {
289+ array_flags |= NPY_ARRAY_WRITEABLE;
290+ }
291+
292+ *out_data = PyArray_NewFromDescr (&PyArray_Type, dtype_data,
293+ static_cast <int >(data_shape.size ()), data_shape.data (),
294+ nullptr , mutable_data, array_flags, nullptr );
295+ RETURN_IF_PYERROR ()
296+ Py_XINCREF (base);
297+ PyArray_SetBaseObject (reinterpret_cast <PyArrayObject*>(*out_data), base);
298+ return Status::OK ();
299+ }
300+
301+ Status SparseTensorCOOToNdarray (const std::shared_ptr<SparseTensorCOO>& sparse_tensor,
302+ PyObject* base, PyObject** out_data,
303+ PyObject** out_coords) {
304+ const auto & sparse_index = arrow::internal::checked_cast<const SparseCOOIndex&>(
305+ *sparse_tensor->sparse_index ());
306+
307+ // Wrap tensor data
308+ OwnedRef result_data;
309+ RETURN_NOT_OK (SparseTensorDataToNdarray (
310+ *sparse_tensor, {sparse_index.non_zero_length (), 1 }, base, result_data.ref ()));
311+
312+ // Wrap indices
313+ PyObject* result_coords;
314+ RETURN_NOT_OK (TensorToNdarray (sparse_index.indices (), base, &result_coords));
315+
316+ *out_data = result_data.detach ();
317+ *out_coords = result_coords;
318+ return Status::OK ();
319+ }
320+
321+ Status SparseTensorCSRToNdarray (const std::shared_ptr<SparseTensorCSR>& sparse_tensor,
322+ PyObject* base, PyObject** out_data,
323+ PyObject** out_indptr, PyObject** out_indices) {
324+ const auto & sparse_index = arrow::internal::checked_cast<const SparseCSRIndex&>(
325+ *sparse_tensor->sparse_index ());
326+
327+ // Wrap tensor data
328+ OwnedRef result_data;
329+ RETURN_NOT_OK (SparseTensorDataToNdarray (
330+ *sparse_tensor, {sparse_index.non_zero_length (), 1 }, base, result_data.ref ()));
331+
332+ // Wrap indices
333+ OwnedRef result_indptr;
334+ OwnedRef result_indices;
335+ RETURN_NOT_OK (TensorToNdarray (sparse_index.indptr (), base, result_indptr.ref ()));
336+ RETURN_NOT_OK (TensorToNdarray (sparse_index.indices (), base, result_indices.ref ()));
337+
338+ *out_data = result_data.detach ();
339+ *out_indptr = result_indptr.detach ();
340+ *out_indices = result_indices.detach ();
341+ return Status::OK ();
342+ }
343+
344+ Status NdarraysToSparseTensorCOO (MemoryPool* pool, PyObject* data_ao, PyObject* coords_ao,
345+ const std::vector<int64_t >& shape,
346+ const std::vector<std::string>& dim_names,
347+ std::shared_ptr<SparseTensorCOO>* out) {
348+ if (!PyArray_Check (data_ao) || !PyArray_Check (coords_ao)) {
349+ return Status::TypeError (" Did not pass ndarray object" );
350+ }
351+
352+ PyArrayObject* ndarray_data = reinterpret_cast <PyArrayObject*>(data_ao);
353+ std::shared_ptr<Buffer> data = std::make_shared<NumPyBuffer>(data_ao);
354+ std::shared_ptr<DataType> type_data;
355+ RETURN_NOT_OK (GetTensorType (reinterpret_cast <PyObject*>(PyArray_DESCR (ndarray_data)),
356+ &type_data));
357+
358+ std::shared_ptr<Tensor> coords;
359+ RETURN_NOT_OK (NdarrayToTensor (pool, coords_ao, {}, &coords));
360+ ARROW_CHECK_EQ (coords->type_id (), Type::INT64); // Should be ensured by caller
361+
362+ std::shared_ptr<SparseCOOIndex> sparse_index = std::make_shared<SparseCOOIndex>(
363+ std::static_pointer_cast<NumericTensor<Int64Type>>(coords));
364+ *out = std::make_shared<SparseTensorImpl<SparseCOOIndex>>(sparse_index, type_data, data,
365+ shape, dim_names);
366+ return Status::OK ();
367+ }
368+
369+ Status NdarraysToSparseTensorCSR (MemoryPool* pool, PyObject* data_ao, PyObject* indptr_ao,
370+ PyObject* indices_ao, const std::vector<int64_t >& shape,
371+ const std::vector<std::string>& dim_names,
372+ std::shared_ptr<SparseTensorCSR>* out) {
373+ if (!PyArray_Check (data_ao) || !PyArray_Check (indptr_ao) ||
374+ !PyArray_Check (indices_ao)) {
375+ return Status::TypeError (" Did not pass ndarray object" );
376+ }
377+
378+ PyArrayObject* ndarray_data = reinterpret_cast <PyArrayObject*>(data_ao);
379+ std::shared_ptr<Buffer> data = std::make_shared<NumPyBuffer>(data_ao);
380+ std::shared_ptr<DataType> type_data;
381+ RETURN_NOT_OK (GetTensorType (reinterpret_cast <PyObject*>(PyArray_DESCR (ndarray_data)),
382+ &type_data));
383+
384+ std::shared_ptr<Tensor> indptr, indices;
385+ RETURN_NOT_OK (NdarrayToTensor (pool, indptr_ao, {}, &indptr));
386+ RETURN_NOT_OK (NdarrayToTensor (pool, indices_ao, {}, &indices));
387+ ARROW_CHECK_EQ (indptr->type_id (), Type::INT64); // Should be ensured by caller
388+ ARROW_CHECK_EQ (indices->type_id (), Type::INT64); // Should be ensured by caller
389+
390+ auto sparse_index = std::make_shared<SparseCSRIndex>(
391+ std::static_pointer_cast<NumericTensor<Int64Type>>(indptr),
392+ std::static_pointer_cast<NumericTensor<Int64Type>>(indices));
393+ *out = std::make_shared<SparseTensorImpl<SparseCSRIndex>>(sparse_index, type_data, data,
394+ shape, dim_names);
395+ return Status::OK ();
396+ }
397+
398+ Status TensorToSparseTensorCOO (const std::shared_ptr<Tensor>& tensor,
399+ std::shared_ptr<SparseTensorCOO>* out) {
400+ *out = std::make_shared<SparseTensorCOO>(*tensor);
401+ return Status::OK ();
402+ }
403+
404+ Status TensorToSparseTensorCSR (const std::shared_ptr<Tensor>& tensor,
405+ std::shared_ptr<SparseTensorCSR>* out) {
406+ *out = std::make_shared<SparseTensorCSR>(*tensor);
407+ return Status::OK ();
408+ }
409+
277410} // namespace py
278411} // namespace arrow
0 commit comments