@@ -281,36 +281,28 @@ Status SparseTensorCOOToNdarray(const std::shared_ptr<SparseTensorCOO>& sparse_t
281281 PyAcquireGIL lock;
282282
283283 int type_num_data;
284- int type_num_coords;
284+ RETURN_NOT_OK (GetNumPyType (*sparse_tensor->type (), &type_num_data));
285+ PyArray_Descr* dtype_data = PyArray_DescrNewFromType (type_num_data);
286+ RETURN_IF_PYERROR ();
287+
285288 const auto & sparse_index = arrow::internal::checked_cast<const SparseCOOIndex&>(
286289 *sparse_tensor->sparse_index ());
287290 const std::shared_ptr<arrow::NumericTensor<arrow::Int64Type>> sparse_index_coords =
288291 sparse_index.indices ();
289292
290- RETURN_NOT_OK (GetNumPyType (*sparse_tensor->type (), &type_num_data));
291- PyArray_Descr* dtype_data = PyArray_DescrNewFromType (type_num_data);
292- RETURN_NOT_OK (GetNumPyType (*sparse_index_coords->type (), &type_num_coords));
293- PyArray_Descr* dtype_coords = PyArray_DescrNewFromType (type_num_coords);
294293 RETURN_IF_PYERROR ();
295294
295+ std::vector<npy_intp> npy_shape_data ({sparse_index.non_zero_length (), 1 });
296+
296297 const int ndim_coords = sparse_tensor->ndim ();
297298 std::vector<npy_intp> npy_shape_coords (ndim_coords);
298299
299300 for (int i = 0 ; i < ndim_coords; ++i) {
300301 npy_shape_coords[i] = sparse_index_coords->shape ()[i];
301302 }
302303
303- std::vector<npy_intp> npy_shape_data ({sparse_index.non_zero_length (), 1 });
304-
305- const void * immutable_data = nullptr ;
306- if (sparse_tensor->data ()) {
307- immutable_data = sparse_tensor->data ()->data ();
308- }
309-
310- const void * immutable_coords = nullptr ;
311- if (sparse_index_coords->data ()) {
312- immutable_coords = sparse_index_coords->data ()->data ();
313- }
304+ const void * immutable_data = sparse_tensor->data ()->data ();
305+ const void * immutable_coords = sparse_index_coords->data ()->data ();
314306
315307 // Remove const =(
316308 void * mutable_data = const_cast <void *>(immutable_data);
@@ -321,20 +313,21 @@ Status SparseTensorCOOToNdarray(const std::shared_ptr<SparseTensorCOO>& sparse_t
321313 array_flags |= NPY_ARRAY_WRITEABLE;
322314 }
323315
316+ int type_num_coords;
317+ RETURN_NOT_OK (GetNumPyType (*sparse_index_coords->type (), &type_num_coords));
318+ PyArray_Descr* dtype_coords = PyArray_DescrNewFromType (type_num_coords);
319+
324320 PyObject* result_data =
325- PyArray_NewFromDescr (&PyArray_Type, dtype_data, 1 , npy_shape_data.data (), nullptr ,
321+ PyArray_NewFromDescr (&PyArray_Type, dtype_data, 2 , npy_shape_data.data (), nullptr ,
326322 mutable_data, array_flags, nullptr );
327- PyObject* result_coords =
328- PyArray_NewFromDescr (&PyArray_Type, dtype_coords, 2 , npy_shape_coords.data (),
329- nullptr , mutable_coords, array_flags, nullptr );
330-
323+ PyObject* result_coords = PyArray_NewFromDescr (&PyArray_Type, dtype_coords, ndim_coords,
324+ npy_shape_coords.data (), nullptr ,
325+ mutable_coords, array_flags, nullptr );
331326 RETURN_IF_PYERROR ()
332327
333- if (base == Py_None || base == nullptr ) {
334- base = py::wrap_sparse_tensor_coo (sparse_tensor);
335- } else {
336- Py_XINCREF (base);
337- }
328+ Py_XINCREF (base);
329+ Py_XINCREF (base);
330+
338331 PyArray_SetBaseObject (reinterpret_cast <PyArrayObject*>(result_data), base);
339332 PyArray_SetBaseObject (reinterpret_cast <PyArrayObject*>(result_coords), base);
340333
@@ -376,20 +369,9 @@ Status SparseTensorCSRToNdarray(const std::shared_ptr<SparseTensorCSR>& sparse_t
376369 npy_shape_indices[i] = sparse_index_indices->shape ()[i];
377370 }
378371
379- const void * immutable_data = nullptr ;
380- if (sparse_tensor->data ()) {
381- immutable_data = sparse_tensor->data ()->data ();
382- }
383-
384- const void * immutable_indptr = nullptr ;
385- if (sparse_index_indptr->data ()) {
386- immutable_indptr = sparse_index_indptr->data ()->data ();
387- }
388-
389- const void * immutable_indices = nullptr ;
390- if (sparse_index_indices->data ()) {
391- immutable_indices = sparse_index_indices->data ()->data ();
392- }
372+ const void * immutable_data = sparse_tensor->data ()->data ();
373+ const void * immutable_indptr = sparse_index_indptr->data ()->data ();
374+ const void * immutable_indices = sparse_index_indices->data ()->data ();
393375
394376 // Remove const =(
395377 void * mutable_data = const_cast <void *>(immutable_data);
@@ -410,21 +392,20 @@ Status SparseTensorCSRToNdarray(const std::shared_ptr<SparseTensorCSR>& sparse_t
410392 PyArray_Descr* dtype_indices = PyArray_DescrNewFromType (type_num_indices);
411393
412394 PyObject* result_data =
413- PyArray_NewFromDescr (&PyArray_Type, dtype_data, 1 , npy_shape_data.data (), nullptr ,
395+ PyArray_NewFromDescr (&PyArray_Type, dtype_data, 2 , npy_shape_data.data (), nullptr ,
414396 mutable_data, array_flags, nullptr );
415- PyObject* result_indptr =
416- PyArray_NewFromDescr (&PyArray_Type, dtype_indptr, 1 , npy_shape_indptr.data (),
417- nullptr , mutable_indptr, array_flags, nullptr );
418- PyObject* result_indices =
419- PyArray_NewFromDescr ( &PyArray_Type, dtype_indices, 1 , npy_shape_indices.data (),
420- nullptr , mutable_indices, array_flags, nullptr );
397+ PyObject* result_indptr = PyArray_NewFromDescr (&PyArray_Type, dtype_indptr, ndim_indptr,
398+ npy_shape_indptr.data (), nullptr ,
399+ mutable_indptr, array_flags, nullptr );
400+ PyObject* result_indices = PyArray_NewFromDescr (
401+ &PyArray_Type, dtype_indices, ndim_indices , npy_shape_indices.data (), nullptr ,
402+ mutable_indices, array_flags, nullptr );
421403 RETURN_IF_PYERROR ()
422404
423- if (base == Py_None || base == nullptr ) {
424- base = py::wrap_sparse_tensor_csr (sparse_tensor);
425- } else {
426- Py_XINCREF (base);
427- }
405+ Py_XINCREF (base);
406+ Py_XINCREF (base);
407+ Py_XINCREF (base);
408+
428409 PyArray_SetBaseObject (reinterpret_cast <PyArrayObject*>(result_data), base);
429410 PyArray_SetBaseObject (reinterpret_cast <PyArrayObject*>(result_indptr), base);
430411 PyArray_SetBaseObject (reinterpret_cast <PyArrayObject*>(result_indices), base);
@@ -435,19 +416,17 @@ Status SparseTensorCSRToNdarray(const std::shared_ptr<SparseTensorCSR>& sparse_t
435416 return Status::OK ();
436417}
437418
438- Status NdarrayToSparseTensorCOO (MemoryPool* pool, PyObject* ao,
439- std::shared_ptr<SparseTensorCOO>* out) {
419+ Status DenseNdarrayToSparseTensorCOO (MemoryPool* pool, PyObject* ao,
420+ const std::vector<std::string>& dim_names,
421+ std::shared_ptr<SparseTensorCOO>* out) {
440422 if (!PyArray_Check (ao)) {
441423 return Status::TypeError (" Did not pass ndarray object" );
442424 }
443425
444426 PyArrayObject* ndarray = reinterpret_cast <PyArrayObject*>(ao);
445427
446- // TODO(wesm): What do we want to do with non-contiguous memory and negative strides?
447-
448428 int ndim = PyArray_NDIM (ndarray);
449429
450- // This is also holding the GIL, so don't already draw it.
451430 std::shared_ptr<Buffer> data = std::make_shared<NumPyBuffer>(ao);
452431 std::vector<int64_t > shape (ndim);
453432 std::vector<int64_t > strides (ndim);
@@ -467,25 +446,23 @@ Status NdarrayToSparseTensorCOO(MemoryPool* pool, PyObject* ao,
467446 std::shared_ptr<DataType> type;
468447 RETURN_NOT_OK (
469448 GetTensorType (reinterpret_cast <PyObject*>(PyArray_DESCR (ndarray)), &type));
470- Tensor tensor (type, data, shape, strides);
449+ Tensor tensor (type, data, shape, strides, dim_names );
471450 *out = std::make_shared<SparseTensorCOO>(tensor);
472451 return Status::OK ();
473452 }
474453}
475454
476- Status NdarrayToSparseTensorCSR (MemoryPool* pool, PyObject* ao,
477- std::shared_ptr<SparseTensorCSR>* out) {
455+ Status DenseNdarrayToSparseTensorCSR (MemoryPool* pool, PyObject* ao,
456+ const std::vector<std::string>& dim_names,
457+ std::shared_ptr<SparseTensorCSR>* out) {
478458 if (!PyArray_Check (ao)) {
479459 return Status::TypeError (" Did not pass ndarray object" );
480460 }
481461
482462 PyArrayObject* ndarray = reinterpret_cast <PyArrayObject*>(ao);
483463
484- // TODO(wesm): What do we want to do with non-contiguous memory and negative strides?
485-
486464 int ndim = PyArray_NDIM (ndarray);
487465
488- // This is also holding the GIL, so don't already draw it.
489466 std::shared_ptr<Buffer> data = std::make_shared<NumPyBuffer>(ao);
490467 std::vector<int64_t > shape (ndim);
491468 std::vector<int64_t > strides (ndim);
@@ -505,7 +482,7 @@ Status NdarrayToSparseTensorCSR(MemoryPool* pool, PyObject* ao,
505482 std::shared_ptr<DataType> type;
506483 RETURN_NOT_OK (
507484 GetTensorType (reinterpret_cast <PyObject*>(PyArray_DESCR (ndarray)), &type));
508- Tensor tensor (type, data, shape, strides);
485+ Tensor tensor (type, data, shape, strides, dim_names );
509486 *out = std::make_shared<SparseTensorCSR>(tensor);
510487 return Status::OK ();
511488 }
@@ -529,22 +506,34 @@ Status NdarraysToSparseTensorCOO(MemoryPool* pool, PyObject* data_ao, PyObject*
529506
530507 int coords_ndim = PyArray_NDIM (ndarray_coords);
531508
532- std::shared_ptr<DataType> type;
509+ std::shared_ptr<DataType> type_data;
510+ std::shared_ptr<DataType> type_coords;
511+ RETURN_NOT_OK (GetTensorType (reinterpret_cast <PyObject*>(PyArray_DESCR (ndarray_data)),
512+ &type_data));
513+ RETURN_NOT_OK (GetTensorType (reinterpret_cast <PyObject*>(PyArray_DESCR (ndarray_coords)),
514+ &type_coords));
515+ ARROW_CHECK_EQ (type_coords->id (), Type::INT64);
516+
517+ const int64_t i64_size = sizeof (int64_t );
533518 npy_intp* coords_array_shape = PyArray_SHAPE (ndarray_coords);
534519 std::vector<int64_t > coords_shape (coords_ndim);
520+ std::vector<int64_t > coords_strides (coords_ndim);
535521
536522 for (int i = 0 ; i < coords_ndim; ++i) {
537523 coords_shape[i] = coords_array_shape[i];
524+ if (i == 0 ) {
525+ coords_strides[i] = i64_size;
526+ } else {
527+ coords_strides[i] = coords_strides[i - 1 ] * coords_array_shape[i - 1 ];
528+ }
538529 }
539530
540531 std::shared_ptr<NumericTensor<Int64Type>> coords =
541- std::make_shared<NumericTensor<Int64Type>>(coords_buffer, coords_shape);
542-
543- RETURN_NOT_OK (
544- GetTensorType (reinterpret_cast <PyObject*>(PyArray_DESCR (ndarray_data)), &type));
532+ std::make_shared<NumericTensor<Int64Type>>(coords_buffer, coords_shape,
533+ coords_strides);
545534
546535 std::shared_ptr<SparseCOOIndex> sparse_index = std::make_shared<SparseCOOIndex>(coords);
547- *out = std::make_shared<SparseTensorImpl<SparseCOOIndex>>(sparse_index, type , data,
536+ *out = std::make_shared<SparseTensorImpl<SparseCOOIndex>>(sparse_index, type_data , data,
548537 shape, dim_names);
549538 return Status::OK ();
550539}
@@ -564,15 +553,16 @@ Status NdarraysToSparseTensorCSR(MemoryPool* pool, PyObject* data_ao, PyObject*
564553 PyArrayObject* ndarray_indptr = reinterpret_cast <PyArrayObject*>(indptr_ao);
565554 PyArrayObject* ndarray_indices = reinterpret_cast <PyArrayObject*>(indices_ao);
566555
567- // This is also holding the GIL, so don't already draw it.
568556 std::shared_ptr<Buffer> data = std::make_shared<NumPyBuffer>(data_ao);
569557 std::shared_ptr<Buffer> indptr_buffer = std::make_shared<NumPyBuffer>(indptr_ao);
570558 std::shared_ptr<Buffer> indices_buffer = std::make_shared<NumPyBuffer>(indices_ao);
571559
572560 int indptr_ndim = PyArray_NDIM (ndarray_indptr);
573561 int indices_ndim = PyArray_NDIM (ndarray_indices);
574562
575- std::shared_ptr<DataType> type;
563+ std::shared_ptr<DataType> type_data;
564+ std::shared_ptr<DataType> type_indptr;
565+ std::shared_ptr<DataType> type_indices;
576566 npy_intp* indptr_array_shape = PyArray_SHAPE (ndarray_indptr);
577567 npy_intp* indices_array_shape = PyArray_SHAPE (ndarray_indices);
578568 std::vector<int64_t > indptr_shape (indptr_ndim);
@@ -585,17 +575,23 @@ Status NdarraysToSparseTensorCSR(MemoryPool* pool, PyObject* data_ao, PyObject*
585575 indices_shape[i] = indices_array_shape[i];
586576 }
587577
578+ RETURN_NOT_OK (GetTensorType (reinterpret_cast <PyObject*>(PyArray_DESCR (ndarray_data)),
579+ &type_data));
580+ RETURN_NOT_OK (GetTensorType (reinterpret_cast <PyObject*>(PyArray_DESCR (ndarray_indptr)),
581+ &type_indptr));
582+ RETURN_NOT_OK (GetTensorType (reinterpret_cast <PyObject*>(PyArray_DESCR (ndarray_indices)),
583+ &type_indices));
584+ ARROW_CHECK_EQ (type_indptr->id (), Type::INT64);
585+ ARROW_CHECK_EQ (type_indices->id (), Type::INT64);
586+
588587 std::shared_ptr<SparseCSRIndex::IndexTensor> indptr =
589588 std::make_shared<SparseCSRIndex::IndexTensor>(indptr_buffer, indptr_shape);
590589 std::shared_ptr<SparseCSRIndex::IndexTensor> indices =
591590 std::make_shared<SparseCSRIndex::IndexTensor>(indices_buffer, indices_shape);
592591
593- RETURN_NOT_OK (
594- GetTensorType (reinterpret_cast <PyObject*>(PyArray_DESCR (ndarray_data)), &type));
595-
596592 std::shared_ptr<SparseCSRIndex> sparse_index =
597593 std::make_shared<SparseCSRIndex>(indptr, indices);
598- *out = std::make_shared<SparseTensorImpl<SparseCSRIndex>>(sparse_index, type , data,
594+ *out = std::make_shared<SparseTensorImpl<SparseCSRIndex>>(sparse_index, type_data , data,
599595 shape, dim_names);
600596 return Status::OK ();
601597}
0 commit comments