apache · tqchen · Sep 22, 2018 · Aug 3, 2018 · Aug 3, 2018 · Aug 6, 2018
@@ -93,6 +93,8 @@ typedef void *CudaModuleHandle;
 typedef void *CudaKernelHandle;
 /*! \brief handle to a Profile object (domain, duration, counter, etc.) */
 typedef void *ProfileHandle;
+/*! \brief handle to DLManagedTensor*/
+typedef void *DLManagedTensorHandle;
 
 typedef void (*ExecutorMonitorCallback)(const char*,
                                         NDArrayHandle,
@@ -746,6 +748,40 @@ MXNET_DLL int MXNDArrayGetShape(NDArrayHandle handle,
  */
 MXNET_DLL int MXNDArrayGetData(NDArrayHandle handle,
                                void **out_pdata);
+/*!
+* \brief Create a reference view of NDArray that
+*  represents as DLManagedTensor
+*  Notice: MXNet uses asynchronous execution. Please call MXNDArrayWaitToRead or
+*          MXNDArrayWaitToWrite before calling MXNDArrayToDLPack.
+* \param handle the handle to the ndarray
+* \param out_dlpack pointer holder to get pointer of DLManagedTensor
+* \return 0 when success, -1 when failure happens
+*/
+MXNET_DLL int MXNDArrayToDLPack(NDArrayHandle handle,
+                                       DLManagedTensorHandle *out_dlpack);
+
+/*!
+* \brief Create a NDArray backed by a dlpack tensor.
+*
+* This allows us to create a NDArray using the memory
+* allocated by an external deep learning framework
+* that is DLPack compatible.
+*
+* The memory is retained until the NDArray went out of scope.
+*
+* \param dlpack the pointer of the input DLManagedTensor
+* \param out_handle pointer holder to get pointer of NDArray
+* \return 0 when success, -1 when failure happens
+*/
+MXNET_DLL int MXNDArrayFromDLPack(DLManagedTensorHandle dlpack,
+                                  NDArrayHandle *out_handle);
+/*!
+ * \brief Delete a dlpack tensor
+ * \param dlpack the pointer of the input DLManagedTensor
+ * \return 0 when success, -1 when failure happens
+ */
+MXNET_DLL int MXNDArrayCallDLPackDeleter(DLManagedTensorHandle dlpack);
+
 /*!
  * \brief get the type of the data in NDArray
  * \param handle the handle to the narray

@@ -519,6 +519,26 @@ class NDArray {
     return ret;
   }
 
+  /*!
+   * \brief Create a reference view of NDArray that
+   *  represents as DLManagedTensor.
+   * \return A DLManagedTensor
+   */
+  DLManagedTensor* ToDLPack() const;
+
+  /*!
+   * \brief Create a NDArray backed by a dlpack tensor.
+   *
+   * This allows us to create a NDArray using the memory
+   * allocated by an external deep learning framework
+   * that is DLPack compatible.
+   *
+   * The memory is retained until the NDArray went out of scope.
+   *
+   * \return The created NDArray view.
+   */
+  static NDArray FromDLPack(const DLManagedTensor* tensor);
+
   /*!
    * \brief Update ndarray chunk storage handles using existing ndarray storage handles
    * Also update the aux_handle, aux_shapes and aux_types.
@@ -765,13 +785,15 @@ class NDArray {
     // The shape of aux data. The default value for the shape depends on the type of storage.
     // If aux_shapes[i].Size() is zero, aux data i is empty.
     std::vector<TShape> aux_shapes;
+    // The pointer to the deleter function
+    std::function<void()> deleter_;
 
     /*! \brief default cosntructor */
-    Chunk() : static_data(true), delay_alloc(false) {}
+    Chunk() : static_data(true), delay_alloc(false), deleter_(nullptr) {}
 
     /*! \brief construct a new chunk */
     Chunk(TShape shape, Context ctx_, bool delay_alloc_, int dtype)
-        : static_data(false), delay_alloc(true), ctx(ctx_) {
+        : static_data(false), delay_alloc(true), ctx(ctx_), deleter_(nullptr) {
       auto size = shape.Size();
       storage_shape = shape;
       var = Engine::Get()->NewVariable();
@@ -795,10 +817,11 @@ class NDArray {
       shandle.dptr = data.dptr_;
       shandle.size = data.shape_.Size() * mshadow::mshadow_sizeof(data.type_flag_);
       storage_shape = data.shape_;
+      deleter_ = data.deleter_;
     }
 
     Chunk(int shared_pid, int shared_id, const TShape& shape, int dtype)
-        : static_data(false), delay_alloc(false) {
+        : static_data(false), delay_alloc(false), deleter_(nullptr) {
       var = Engine::Get()->NewVariable();
       ctx = Context::CPUShared(0);
       shandle.size = shape.Size() * mshadow::mshadow_sizeof(dtype);
@@ -814,7 +837,8 @@ class NDArray {
           const std::vector<TShape> &aux_shapes_)
         : static_data(false), delay_alloc(delay_alloc_), storage_type(storage_type_),
           aux_types(aux_types_), ctx(ctx_), storage_shape(storage_shape_),
-          aux_shapes(aux_shapes_) {
+          aux_shapes(aux_shapes_),
+          deleter_(nullptr) {
       shandle.ctx = ctx;
       var = Engine::Get()->NewVariable();
       // aux_handles always reflect the correct number of aux data
@@ -831,7 +855,7 @@ class NDArray {
 
     Chunk(const NDArrayStorageType storage_type_, const TBlob &data,
           const std::vector<TBlob> &aux_data, int dev_id)
-        : static_data(true), delay_alloc(false), storage_type(storage_type_) {
+        : static_data(true), delay_alloc(false), storage_type(storage_type_), deleter_(nullptr) {
       using namespace mshadow;
       CHECK_NE(storage_type, kDefaultStorage);
       // init var

@@ -72,11 +72,14 @@ class TBlob {
   TShape shape_;
   /*! \brief type flag of the tensor blob */
   int type_flag_;
+  // The pointer to the deleter function
+  std::function<void()> deleter_;
 
   /*! \brief default constructor, default copy assign will work */
   TBlob(void)
       : dptr_(NULL),
-        type_flag_(mshadow::DataType<real_t>::kFlag) {
+        type_flag_(mshadow::DataType<real_t>::kFlag),
+        deleter_(nullptr) {
     SetDLTensor(cpu::kDevMask, 0);
   }
   /*!
@@ -89,7 +92,8 @@ class TBlob {
   template<typename DType>
   TBlob(DType *dptr, const TShape &shape, int dev_mask, int dev_id = -1)
       : dptr_(dptr), shape_(shape),
-        type_flag_(mshadow::DataType<DType>::kFlag) {
+        type_flag_(mshadow::DataType<DType>::kFlag),
+        deleter_(nullptr) {
     SetDLTensor(dev_mask, dev_id);
   }
   /*!
@@ -101,9 +105,44 @@ class TBlob {
    * \param dev_id the device id
    */
   TBlob(void *dptr, const TShape &shape, int dev_mask, int type_flag, int dev_id = -1)
-      : dptr_(dptr), shape_(shape), type_flag_(type_flag) {
+      : dptr_(dptr), shape_(shape), type_flag_(type_flag),
+        deleter_(nullptr) {
     SetDLTensor(dev_mask, dev_id);
   }
+  /*!
+   * \brief constructor that construct TBlob from DLTensor
+   * \param DLTensor Object, the deleter function
+   */
+  explicit TBlob(const DLTensor &dltensor, const std::function<void()>& deleter = nullptr)
+      : dptr_(dltensor.data),
+        shape_(TShape(dltensor.shape, dltensor.shape + dltensor.ndim)),
+        type_flag_(DLDataTypeTransform(dltensor.dtype)),
+        deleter_(deleter),
+        dltensor_(dltensor) {
+    // compactness check for DLTensor
+    if (dltensor.strides != nullptr) {
+      // check strides
+      const int &ndim = dltensor.ndim;
+      const int64_t *shape = dltensor.shape;
+      const int64_t *strides = dltensor.strides;
+      if (ndim >= 1) {
+        bool err = false;
+        if (strides[ndim - 1] != 1) {
+          err = true;
+        } else {
+          for (int i = ndim - 2; i >= 0; --i) {
+            if (strides[i] != shape[i + 1] * strides[i + 1]) {
+              err = true;
+              break;
+            }
+          }
+        }
+        if (err) {
+          LOG(FATAL) << "Unsupported DLPack because MXNet only support compact tensor now";
+        }
+      }
+    }
+  }
   /*!
    * \brief constructor from tensor
    * \param src source tensor
@@ -336,14 +375,44 @@ class TBlob {
       }
     }
   }
+  static int DLDataTypeTransform(DLDataType dldata_type) {
+    if (dldata_type.lanes != 1) {
+      LOG(FATAL) << "Unsupported DLDataType whose lanes != 1";
+    }
+    switch (dldata_type.code) {
+      case kDLFloat:
+        switch (dldata_type.bits) {
+          case 16: return mshadow::kFloat16;
+          case 32: return mshadow::kFloat32;
+          case 64: return mshadow::kFloat64;
+        }
+        break;
+      case kDLUInt:
+        switch (dldata_type.bits) {
+          case 8: return mshadow::kUint8;
+        }
+        break;
+      case kDLInt:
+        switch (dldata_type.bits) {
+          case 8: return mshadow::kInt8;
+          case 32: return mshadow::kInt32;
+          case 64: return mshadow::kInt64;
+        }
+        break;
+    }
+    LOG(FATAL) << "Unknown DLDataType{" << dldata_type.code
+               << ", " << dldata_type.bits
+               << ", " << dldata_type.lanes << "}";
+    return mshadow::kFloat32;
+  }
 
   inline void SetDLTensor(int dev_mask, int dev_id) {
     dltensor_.data = dptr_;
     dltensor_.ctx = DLContext{static_cast<DLDeviceType>(dev_mask), dev_id};
     dltensor_.ndim = shape_.ndim();
     dltensor_.dtype = DTypeTransform(type_flag_);
     dltensor_.shape = shape_.data();
-    dltensor_.strides = NULL;
+    dltensor_.strides = nullptr;
     dltensor_.byte_offset = 0;
   }
 

@@ -235,6 +235,7 @@ def _load_lib():
 CudaModuleHandle = ctypes.c_void_p
 CudaKernelHandle = ctypes.c_void_p
 ProfileHandle = ctypes.c_void_p
+DLPackHandle = ctypes.c_void_p
 
 
 #----------------------------
@@ -729,3 +730,6 @@ def write_all_str(module_file, module_all_list):
     module_op_file.close()
     write_all_str(module_internal_file, module_internal_all)
     module_internal_file.close()
+
+ctypes.pythonapi.PyCapsule_New.restype = ctypes.py_object
+ctypes.pythonapi.PyCapsule_GetPointer.restype = ctypes.c_void_p