-
Notifications
You must be signed in to change notification settings - Fork 3.9k
GH-33984: [C++][Python] DLPack implementation for Arrow Arrays (producer) #38472
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
f8f430f
cf143cd
d43367f
5778a33
1f0e100
e23249e
3d8c581
82a270f
00ac266
aeb20a6
fadf1f9
72ffcf3
c1ec84e
eabc58f
f834d27
3556b07
57ceee0
78363df
22739e8
1bcf161
c8d8799
2a5bf42
21b95d8
1762544
fedd464
01997a6
f8dbb0b
c6ee1bb
bcd05ea
6626a91
d169d4c
11d48af
5742e1d
024f535
e6d927c
da9baf2
5e2bb80
3af44e1
1f81277
cb0a942
46206ec
0619c35
55246ea
0fa84de
010f28e
53f2867
f9fbf2c
ab88549
c832edd
0089e23
804878f
fea6fe3
8071c9b
9f21208
8a10e68
672043b
ac85f4e
7e0d5f8
0fc962c
0d81ae0
d232de4
602e7b5
d9b3182
015ac3d
d2eb7c8
15ead8f
5e87138
2160ecd
8bb7173
9389941
94dec4b
811d2b5
49a978f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,133 @@ | ||
| // Licensed to the Apache Software Foundation (ASF) under one | ||
| // or more contributor license agreements. See the NOTICE file | ||
| // distributed with this work for additional information | ||
| // regarding copyright ownership. The ASF licenses this file | ||
| // to you under the Apache License, Version 2.0 (the | ||
| // "License"); you may not use this file except in compliance | ||
| // with the License. You may obtain a copy of the License at | ||
| // | ||
| // http://www.apache.org/licenses/LICENSE-2.0 | ||
| // | ||
| // Unless required by applicable law or agreed to in writing, | ||
| // software distributed under the License is distributed on an | ||
| // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
| // KIND, either express or implied. See the License for the | ||
| // specific language governing permissions and limitations | ||
| // under the License. | ||
|
|
||
| #include "arrow/c/dlpack.h" | ||
|
|
||
| #include "arrow/array/array_base.h" | ||
| #include "arrow/c/dlpack_abi.h" | ||
| #include "arrow/device.h" | ||
| #include "arrow/type.h" | ||
| #include "arrow/type_traits.h" | ||
|
|
||
| namespace arrow::dlpack { | ||
|
|
||
| namespace { | ||
|
|
||
| Result<DLDataType> GetDLDataType(const DataType& type) { | ||
| DLDataType dtype; | ||
| dtype.lanes = 1; | ||
| dtype.bits = type.bit_width(); | ||
| switch (type.id()) { | ||
| case Type::INT8: | ||
| case Type::INT16: | ||
| case Type::INT32: | ||
| case Type::INT64: | ||
| dtype.code = DLDataTypeCode::kDLInt; | ||
| return dtype; | ||
| case Type::UINT8: | ||
| case Type::UINT16: | ||
| case Type::UINT32: | ||
| case Type::UINT64: | ||
| dtype.code = DLDataTypeCode::kDLUInt; | ||
| return dtype; | ||
| case Type::HALF_FLOAT: | ||
| case Type::FLOAT: | ||
| case Type::DOUBLE: | ||
| dtype.code = DLDataTypeCode::kDLFloat; | ||
| return dtype; | ||
| case Type::BOOL: | ||
| // DLPack supports byte-packed boolean values | ||
| return Status::TypeError("Bit-packed boolean data type not supported by DLPack."); | ||
| default: | ||
| return Status::TypeError("DataType is not compatible with DLPack spec: ", | ||
| type.ToString()); | ||
| } | ||
| } | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we plan to later support
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
See #38868 for that |
||
|
|
||
| struct ManagerCtx { | ||
| std::shared_ptr<ArrayData> array; | ||
| DLManagedTensor tensor; | ||
| }; | ||
|
|
||
| } // namespace | ||
|
|
||
| Result<DLManagedTensor*> ExportArray(const std::shared_ptr<Array>& arr) { | ||
| // Define DLDevice struct nad check if array type is supported | ||
| // by the DLPack protocol at the same time. Raise TypeError if not. | ||
| // Supported data types: int, uint, float with no validity buffer. | ||
| ARROW_ASSIGN_OR_RAISE(auto device, ExportDevice(arr)) | ||
|
|
||
| // Define the DLDataType struct | ||
| const DataType& type = *arr->type(); | ||
| std::shared_ptr<ArrayData> data = arr->data(); | ||
| ARROW_ASSIGN_OR_RAISE(auto dlpack_type, GetDLDataType(type)); | ||
|
|
||
| // Create ManagerCtx that will serve as the owner of the DLManagedTensor | ||
| std::unique_ptr<ManagerCtx> ctx(new ManagerCtx); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nitpick: I think it's best practice to use
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Happy to change to
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Given that we have the explicit
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Unless that |
||
|
|
||
| // Define the data pointer to the DLTensor | ||
| // If array is of length 0, data pointer should be NULL | ||
pitrou marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| if (arr->length() == 0) { | ||
| ctx->tensor.dl_tensor.data = NULL; | ||
| } else { | ||
| const auto data_offset = data->offset * type.byte_width(); | ||
| ctx->tensor.dl_tensor.data = | ||
| const_cast<uint8_t*>(data->buffers[1]->data() + data_offset); | ||
| } | ||
|
|
||
| ctx->tensor.dl_tensor.device = device; | ||
| ctx->tensor.dl_tensor.ndim = 1; | ||
| ctx->tensor.dl_tensor.dtype = dlpack_type; | ||
| ctx->tensor.dl_tensor.shape = const_cast<int64_t*>(&data->length); | ||
| ctx->tensor.dl_tensor.strides = NULL; | ||
| ctx->tensor.dl_tensor.byte_offset = 0; | ||
|
|
||
| ctx->array = std::move(data); | ||
| ctx->tensor.manager_ctx = ctx.get(); | ||
| ctx->tensor.deleter = [](struct DLManagedTensor* self) { | ||
| delete reinterpret_cast<ManagerCtx*>(self->manager_ctx); | ||
| }; | ||
| return &ctx.release()->tensor; | ||
| } | ||
|
|
||
| Result<DLDevice> ExportDevice(const std::shared_ptr<Array>& arr) { | ||
| // Check if array is supported by the DLPack protocol. | ||
| if (arr->null_count() > 0) { | ||
| return Status::TypeError("Can only use DLPack on arrays with no nulls."); | ||
| } | ||
| const DataType& type = *arr->type(); | ||
| if (type.id() == Type::BOOL) { | ||
| return Status::TypeError("Bit-packed boolean data type not supported by DLPack."); | ||
| } | ||
| if (!is_integer(type.id()) && !is_floating(type.id())) { | ||
| return Status::TypeError("DataType is not compatible with DLPack spec: ", | ||
| type.ToString()); | ||
| } | ||
|
|
||
| // Define DLDevice struct | ||
| DLDevice device; | ||
| if (arr->data()->buffers[1]->device_type() == DeviceAllocationType::kCPU) { | ||
AlenkaF marked this conversation as resolved.
Show resolved
Hide resolved
AlenkaF marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| device.device_id = 0; | ||
| device.device_type = DLDeviceType::kDLCPU; | ||
| return device; | ||
| } else { | ||
| return Status::NotImplemented( | ||
| "DLPack support is implemented only for buffers on CPU device."); | ||
| } | ||
| } | ||
|
|
||
| } // namespace arrow::dlpack | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,51 @@ | ||
| // Licensed to the Apache Software Foundation (ASF) under one | ||
| // or more contributor license agreements. See the NOTICE file | ||
| // distributed with this work for additional information | ||
| // regarding copyright ownership. The ASF licenses this file | ||
| // to you under the Apache License, Version 2.0 (the | ||
| // "License"); you may not use this file except in compliance | ||
| // with the License. You may obtain a copy of the License at | ||
| // | ||
| // http://www.apache.org/licenses/LICENSE-2.0 | ||
| // | ||
| // Unless required by applicable law or agreed to in writing, | ||
| // software distributed under the License is distributed on an | ||
| // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
| // KIND, either express or implied. See the License for the | ||
| // specific language governing permissions and limitations | ||
| // under the License. | ||
|
|
||
| #pragma once | ||
|
|
||
| #include "arrow/array/array_base.h" | ||
| #include "arrow/c/dlpack_abi.h" | ||
|
|
||
| namespace arrow::dlpack { | ||
|
|
||
| /// \brief Export Arrow array as DLPack tensor. | ||
| /// | ||
| /// DLMangedTensor is produced as defined by the DLPack protocol, | ||
| /// see https://dmlc.github.io/dlpack/latest/. | ||
| /// | ||
| /// Data types for which the protocol is supported are | ||
| /// integer and floating-point data types. | ||
| /// | ||
| /// DLPack protocol only supports arrays with one contiguous | ||
| /// memory region which means Arrow Arrays with validity buffers | ||
| /// are not supported. | ||
| /// | ||
| /// \param[in] arr Arrow array | ||
| /// \return DLManagedTensor struct | ||
| ARROW_EXPORT | ||
| Result<DLManagedTensor*> ExportArray(const std::shared_ptr<Array>& arr); | ||
|
|
||
| /// \brief Get DLDevice with enumerator specifying the | ||
| /// type of the device data is stored on and index of the | ||
| /// device which is 0 by default for CPU. | ||
| /// | ||
| /// \param[in] arr Arrow array | ||
| /// \return DLDevice struct | ||
| ARROW_EXPORT | ||
| Result<DLDevice> ExportDevice(const std::shared_ptr<Array>& arr); | ||
|
|
||
| } // namespace arrow::dlpack |
Uh oh!
There was an error while loading. Please reload this page.