@@ -97,8 +97,6 @@ static int64_t ValuesToBitmap(PyArrayObject* arr, uint8_t* bitmap) {
9797 int64_t null_count = 0 ;
9898
9999 Ndarray1DIndexer<T> values (arr);
100-
101- // TODO(wesm): striding
102100 for (int i = 0 ; i < values.size (); ++i) {
103101 if (traits::isnull (values[i])) {
104102 ++null_count;
@@ -125,37 +123,42 @@ static int64_t MaskToBitmap(PyArrayObject* mask, int64_t length, uint8_t* bitmap
125123 return null_count;
126124}
127125
128- template <int TYPE>
129- static int64_t ValuesToValidBytes (const void * data, int64_t length,
130- uint8_t * valid_bytes) {
126+ template <int TYPE, typename BuilderType>
127+ static Status AppendNdarrayToBuilder (PyArrayObject* array, BuilderType* builder) {
131128 typedef internal::npy_traits<TYPE> traits;
132129 typedef typename traits::value_type T;
133130
134- int64_t null_count = 0 ;
135- const T* values = reinterpret_cast <const T*>(data);
136-
137- // TODO(wesm): striding
138- for (int i = 0 ; i < length; ++i) {
139- valid_bytes[i] = !traits::isnull (values[i]);
140- if (traits::isnull (values[i])) null_count++;
131+ // TODO(wesm): Vector append when not strided
132+ Ndarray1DIndexer<T> values (array);
133+ if (traits::supports_nulls) {
134+ for (int64_t i = 0 ; i < values.size (); ++i) {
135+ if (traits::isnull (values[i])) {
136+ RETURN_NOT_OK (builder->AppendNull ());
137+ } else {
138+ RETURN_NOT_OK (builder->Append (values[i]));
139+ }
140+ }
141+ } else {
142+ for (int64_t i = 0 ; i < values.size (); ++i) {
143+ RETURN_NOT_OK (builder->Append (values[i]));
144+ }
141145 }
142-
143- return null_count;
146+ return Status::OK ();
144147}
145148
146149Status CheckFlatNumpyArray (PyArrayObject* numpy_array, int np_type) {
147150 if (PyArray_NDIM (numpy_array) != 1 ) {
148151 return Status::Invalid (" only handle 1-dimensional arrays" );
149152 }
150153
151- if (PyArray_DESCR (numpy_array)->type_num != np_type) {
152- return Status::Invalid (" can only handle exact conversions" );
154+ const int received_type = PyArray_DESCR (numpy_array)->type_num ;
155+ if (received_type != np_type) {
156+ std::stringstream ss;
157+ ss << " trying to convert NumPy type " << GetNumPyTypeName (np_type) << " but got "
158+ << GetNumPyTypeName (received_type);
159+ return Status::Invalid (ss.str ());
153160 }
154161
155- npy_intp* astrides = PyArray_STRIDES (numpy_array);
156- if (astrides[0 ] != PyArray_DESCR (numpy_array)->elsize ) {
157- return Status::Invalid (" No support for strided arrays in lists yet" );
158- }
159162 return Status::OK ();
160163}
161164
@@ -577,7 +580,7 @@ Status PandasConverter::ConvertDecimals() {
577580 RETURN_NOT_OK (ImportModule (" decimal" , &decimal));
578581 RETURN_NOT_OK (ImportFromModule (decimal, " Decimal" , &Decimal));
579582
580- PyObject** objects = reinterpret_cast <PyObject**>( PyArray_DATA ( arr_) );
583+ Ndarray1DIndexer <PyObject*> objects ( arr_);
581584 PyObject* object = objects[0 ];
582585
583586 int precision;
@@ -618,7 +621,7 @@ Status PandasConverter::ConvertTimes() {
618621 PyAcquireGIL lock;
619622 PyDateTime_IMPORT;
620623
621- PyObject** objects = reinterpret_cast <PyObject**>( PyArray_DATA ( arr_) );
624+ Ndarray1DIndexer <PyObject*> objects ( arr_);
622625
623626 // datetime.time stores microsecond resolution
624627 Time64Builder builder (::arrow::time64 (TimeUnit::MICRO), pool_);
@@ -906,7 +909,7 @@ Status LoopPySequence(PyObject* sequence, T func) {
906909 Py_ssize_t size = PySequence_Size (sequence);
907910 if (PyArray_Check (sequence)) {
908911 auto array = reinterpret_cast <PyArrayObject*>(sequence);
909- PyObject** objects = reinterpret_cast <PyObject**>( PyArray_DATA ( array) );
912+ Ndarray1DIndexer <PyObject*> objects ( array);
910913 for (int64_t i = 0 ; i < size; ++i) {
911914 RETURN_NOT_OK (func (objects[i]));
912915 }
@@ -934,7 +937,6 @@ template <int ITEM_TYPE, typename ArrowType>
934937inline Status PandasConverter::ConvertTypedLists (const std::shared_ptr<DataType>& type,
935938 ListBuilder* builder, PyObject* list) {
936939 typedef internal::npy_traits<ITEM_TYPE> traits;
937- typedef typename traits::value_type T;
938940 typedef typename traits::BuilderClass BuilderT;
939941
940942 PyAcquireGIL lock;
@@ -956,24 +958,13 @@ inline Status PandasConverter::ConvertTypedLists(const std::shared_ptr<DataType>
956958 // TODO(uwe): Support more complex numpy array structures
957959 RETURN_NOT_OK (CheckFlatNumpyArray (numpy_array, ITEM_TYPE));
958960
959- int64_t size = PyArray_DIM (numpy_array, 0 );
960- auto data = reinterpret_cast <const T*>(PyArray_DATA (numpy_array));
961- if (traits::supports_nulls) {
962- RETURN_NOT_OK (null_bitmap_->Resize (size, false ));
963- // TODO(uwe): A bitmap would be more space-efficient but the Builder API doesn't
964- // currently support this.
965- // ValuesToBitmap<ITEM_TYPE>(data, size, null_bitmap_->mutable_data());
966- ValuesToValidBytes<ITEM_TYPE>(data, size, null_bitmap_->mutable_data ());
967- return value_builder->Append (data, size, null_bitmap_->data ());
968- } else {
969- return value_builder->Append (data, size);
970- }
961+ return AppendNdarrayToBuilder<ITEM_TYPE, BuilderT>(numpy_array, value_builder);
971962 } else if (PyList_Check (object)) {
972963 int64_t size;
973964 std::shared_ptr<DataType> inferred_type;
974965 RETURN_NOT_OK (builder->Append (true ));
975966 RETURN_NOT_OK (InferArrowTypeAndSize (object, &size, &inferred_type));
976- if (inferred_type->id () != type->id ()) {
967+ if (inferred_type->id () != Type::NA && inferred_type-> id () != type->id ()) {
977968 std::stringstream ss;
978969 ss << inferred_type->ToString () << " cannot be converted to " << type->ToString ();
979970 return Status::TypeError (ss.str ());
@@ -1064,7 +1055,7 @@ inline Status PandasConverter::ConvertTypedLists<NPY_OBJECT, StringType>(
10641055 std::shared_ptr<DataType> inferred_type;
10651056 RETURN_NOT_OK (builder->Append (true ));
10661057 RETURN_NOT_OK (InferArrowTypeAndSize (object, &size, &inferred_type));
1067- if (inferred_type->id () != Type::STRING) {
1058+ if (inferred_type->id () != Type::NA && inferred_type-> id () != Type:: STRING) {
10681059 std::stringstream ss;
10691060 ss << inferred_type->ToString () << " cannot be converted to STRING." ;
10701061 return Status::TypeError (ss.str ());
0 commit comments