Skip to content

Commit 4d7d2b0

Browse files
committed
Implementing review feedback.
1 parent 22e864d commit 4d7d2b0

File tree

5 files changed

+33
-26
lines changed

5 files changed

+33
-26
lines changed

cpp/src/arrow/python/deserialize.cc

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -187,15 +187,15 @@ Status GetValue(PyObject* context, const Array& arr, int64_t index, int8_t type,
187187
case PythonType::SPARSECOOTENSOR: {
188188
int32_t ref = checked_cast<const Int32Array&>(arr).Value(index);
189189
const std::shared_ptr<SparseCOOTensor>& sparse_coo_tensor =
190-
reinterpret_cast<const std::shared_ptr<SparseCOOTensor>&>(
190+
arrow::internal::checked_pointer_cast<SparseCOOTensor>(
191191
blobs.sparse_tensors[ref]);
192192
*result = wrap_sparse_coo_tensor(sparse_coo_tensor);
193193
return Status::OK();
194194
}
195195
case PythonType::SPARSECSRMATRIX: {
196196
int32_t ref = checked_cast<const Int32Array&>(arr).Value(index);
197197
const std::shared_ptr<SparseCSRMatrix>& sparse_csr_matrix =
198-
reinterpret_cast<const std::shared_ptr<SparseCSRMatrix>&>(
198+
arrow::internal::checked_pointer_cast<SparseCSRMatrix>(
199199
blobs.sparse_tensors[ref]);
200200
*result = wrap_sparse_csr_matrix(sparse_csr_matrix);
201201
return Status::OK();
@@ -389,6 +389,7 @@ Status GetSerializedFromComponents(int num_tensors,
389389
}
390390

391391
auto GetBuffer = [&data](Py_ssize_t index, std::shared_ptr<Buffer>* out) {
392+
ARROW_CHECK_LE(index, PyList_Size(data));
392393
PyObject* py_buf = PyList_GET_ITEM(data, index);
393394
return unwrap_buffer(py_buf, out);
394395
};

cpp/src/arrow/python/deserialize.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ Status ReadSerializedObject(io::RandomAccessFile* src, SerializedPyObject* out);
6262
/// \param[in] num_ndarrays number of numpy Ndarrays in the object
6363
/// \param[in] num_buffers number of buffers in the object
6464
/// \param[in] data a list containing pyarrow.Buffer instances. Must be 1 +
65-
/// num_tensors * 2 + num_sparse_tensors * 2 + num_buffers in length
65+
/// num_tensors * 2 + num_coo_tensors * 3 + num_csr_tensors * 4 + num_buffers in length
6666
/// \param[out] out the reconstructed object
6767
/// \return Status
6868
ARROW_PYTHON_EXPORT

cpp/src/arrow/python/serialize.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -140,9 +140,9 @@ struct PythonType {
140140
TENSOR,
141141
NDARRAY,
142142
BUFFER,
143-
NUM_PYTHON_TYPES,
144143
SPARSECOOTENSOR,
145-
SPARSECSRMATRIX
144+
SPARSECSRMATRIX,
145+
NUM_PYTHON_TYPES
146146
};
147147
};
148148

python/pyarrow/tests/test_serialization.py

Lines changed: 13 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -152,11 +152,9 @@ def assert_equal(obj1, obj2):
152152
]
153153

154154

155-
# index_types = ('i1', 'i2', 'i4', 'i8', 'u1', 'u2', 'u4', 'u8')
156-
# tensor_types = ('i1', 'i2', 'i4', 'i8', 'u1', 'u2', 'u4', 'u8',
157-
# 'f2', 'f4', 'f8')
158-
index_types = ('i8',)
159-
tensor_types = ('i8',)
155+
index_types = ('i1', 'i2', 'i4', 'i8', 'u1', 'u2', 'u4', 'u8')
156+
tensor_types = ('i1', 'i2', 'i4', 'i8', 'u1', 'u2', 'u4', 'u8',
157+
'f2', 'f4', 'f8')
160158

161159

162160
if sys.version_info >= (3, 0):
@@ -544,10 +542,10 @@ def deserializer(data):
544542
def test_sparse_coo_tensor_serialization(index_type, tensor_type):
545543
tensor_dtype = np.dtype(tensor_type)
546544
index_dtype = np.dtype(index_type)
547-
data = np.array([[1, 2, 3, 4, 5, 6, 7]]).T.astype(tensor_dtype)
545+
data = np.array([[1, 2, 3, 4, 5, 6]]).T.astype(tensor_dtype)
548546
coords = np.array([
549-
[0, 0, 2, 3, 1, 3, 0],
550-
[0, 2, 0, 4, 5, 5, 0],
547+
[0, 0, 2, 3, 1, 3],
548+
[0, 2, 0, 4, 5, 5],
551549
]).T.astype(index_dtype)
552550
shape = (4, 6)
553551
dim_names = ('x', 'y')
@@ -558,7 +556,7 @@ def test_sparse_coo_tensor_serialization(index_type, tensor_type):
558556
context = pa.default_serialization_context()
559557
serialized = pa.serialize(sparse_tensor, context=context).to_buffer()
560558
result = pa.deserialize(serialized)
561-
result.equals(sparse_tensor)
559+
assert_equal(result, sparse_tensor)
562560

563561
data_result, coords_result = result.to_numpy()
564562
assert np.array_equal(data_result, data)
@@ -572,10 +570,10 @@ def test_sparse_coo_tensor_components_serialization(large_buffer,
572570
index_type, tensor_type):
573571
tensor_dtype = np.dtype(tensor_type)
574572
index_dtype = np.dtype(index_type)
575-
data = np.array([[1, 2, 3, 4, 5, 6, 7]]).T.astype(tensor_dtype)
573+
data = np.array([[1, 2, 3, 4, 5, 6]]).T.astype(tensor_dtype)
576574
coords = np.array([
577-
[0, 0, 2, 3, 1, 3, 0],
578-
[0, 2, 0, 4, 5, 5, 0],
575+
[0, 0, 2, 3, 1, 3],
576+
[0, 2, 0, 4, 5, 5],
579577
]).T.astype(index_dtype)
580578
shape = (4, 6)
581579
dim_names = ('x', 'y')
@@ -587,9 +585,9 @@ def test_sparse_coo_tensor_components_serialization(large_buffer,
587585

588586
@pytest.mark.skipif(not coo_matrix, reason="requires scipy")
589587
def test_scipy_sparse_coo_tensor_serialization():
590-
data = np.array([1, 2, 3, 4, 5, 6, 7])
591-
row = np.array([0, 0, 2, 3, 1, 3, 0])
592-
col = np.array([0, 2, 0, 4, 5, 5, 0])
588+
data = np.array([1, 2, 3, 4, 5, 6])
589+
row = np.array([0, 0, 2, 3, 1, 3])
590+
col = np.array([0, 2, 0, 4, 5, 5])
593591
shape = (4, 6)
594592

595593
sparse_array = coo_matrix((data, (row, col)), shape=shape)

python/pyarrow/tests/test_sparse_tensor.py

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -206,10 +206,10 @@ def test_sparse_csr_matrix_from_dense(dtype_str, arrow_type):
206206
@pytest.mark.parametrize('dtype_str,arrow_type', tensor_type_pairs)
207207
def test_sparse_coo_tensor_numpy_roundtrip(dtype_str, arrow_type):
208208
dtype = np.dtype(dtype_str)
209-
data = np.array([[1, 2, 3, 4, 5, 6, 7]]).T.astype(dtype)
209+
data = np.array([[1, 2, 3, 4, 5, 6]]).T.astype(dtype)
210210
coords = np.array([
211-
[0, 0, 2, 3, 1, 3, 0],
212-
[0, 2, 0, 4, 5, 5, 0],
211+
[0, 0, 2, 3, 1, 3],
212+
[0, 2, 0, 4, 5, 5],
213213
]).T
214214
shape = (4, 6)
215215
dim_names = ('x', 'y')
@@ -271,9 +271,9 @@ def test_dense_to_sparse_tensor(dtype_str, arrow_type, sparse_tensor_type):
271271
@pytest.mark.parametrize('dtype_str,arrow_type', tensor_type_pairs)
272272
def test_sparse_coo_tensor_scipy_roundtrip(dtype_str, arrow_type):
273273
dtype = np.dtype(dtype_str)
274-
data = np.array([1, 2, 3, 4, 5, 6, 7]).astype(dtype)
275-
row = np.array([0, 0, 2, 3, 1, 3, 0])
276-
col = np.array([0, 2, 0, 4, 5, 5, 0])
274+
data = np.array([1, 2, 3, 4, 5, 6]).astype(dtype)
275+
row = np.array([0, 0, 2, 3, 1, 3])
276+
col = np.array([0, 2, 0, 4, 5, 5])
277277
shape = (4, 6)
278278
dim_names = ('x', 'y')
279279

@@ -288,6 +288,10 @@ def test_sparse_coo_tensor_scipy_roundtrip(dtype_str, arrow_type):
288288
assert np.array_equal(sparse_array.data, out_sparse_array.data)
289289
assert np.array_equal(sparse_array.row, out_sparse_array.row)
290290
assert np.array_equal(sparse_array.col, out_sparse_array.col)
291+
if dtype_str != 'f2':
292+
assert np.array_equal(
293+
sparse_array.astype(np.float32).toarray().astype(np.float16),
294+
sparse_tensor.to_tensor().to_numpy())
291295

292296

293297
@pytest.mark.skipif(not csr_matrix, reason="requires scipy")
@@ -311,3 +315,7 @@ def test_sparse_csr_matrix_scipy_roundtrip(dtype_str, arrow_type):
311315
assert np.array_equal(sparse_array.data, out_sparse_array.data)
312316
assert np.array_equal(sparse_array.indptr, out_sparse_array.indptr)
313317
assert np.array_equal(sparse_array.indices, out_sparse_array.indices)
318+
if dtype_str != 'f2':
319+
assert np.array_equal(
320+
sparse_array.astype(np.float32).toarray().astype(np.float16),
321+
sparse_tensor.to_tensor().to_numpy())

0 commit comments

Comments
 (0)