Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
112 changes: 76 additions & 36 deletions src/pyzed/sl.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,8 @@ from libc.string cimport memcpy
from cpython cimport bool
import enum

import cupy as cp

import numpy as np
cimport numpy as np
#https://github.com/cython/cython/wiki/tutorials-numpy#c-api-initialization
Expand Down Expand Up @@ -1512,6 +1514,7 @@ class SVO_COMPRESSION_MODE(enum.Enum):
# | CPU | Data will be stored on the CPU (processor side). |
class MEM(enum.Enum):
CPU = <int>c_MEM.CPU
GPU = <int>c_MEM.GPU

##
# Lists available copy operation on sl.Mat.
Expand Down Expand Up @@ -5094,68 +5097,105 @@ cdef class Mat:
# \return NumPy array containing the sl.Mat data.
# \note The fastest is \b deep_copy at False but the sl.Mat memory must not be released to use the NumPy array.
def get_data(self, memory_type=MEM.CPU, deep_copy=False) -> np.array:

shape = None

if not isinstance(memory_type, MEM):
raise TypeError("Argument is not of MEM type.")

if self.get_memory_type().value != memory_type.value:
raise ValueError("Provided MEM type doesn't match Mat's memory_type.")

cdef np.npy_intp cython_shape[3]
cython_shape[0] = <np.npy_intp> self.mat.getHeight()
cython_shape[1] = <np.npy_intp> self.mat.getWidth()
cython_shape[2] = <np.npy_intp> self.mat.getChannels()

if self.mat.getChannels() == 1:
shape = (self.mat.getHeight(), self.mat.getWidth())
else:
shape = (self.mat.getHeight(), self.mat.getWidth(), self.mat.getChannels())

cdef size_t size = 0
dtype = None
nptype = None
npdim = None
itemsize = None
if self.mat.getDataType() in (c_MAT_TYPE.U8_C1, c_MAT_TYPE.U8_C2, c_MAT_TYPE.U8_C3, c_MAT_TYPE.U8_C4):
size = self.mat.getHeight()*self.mat.getWidth()*self.mat.getChannels()
itemsize = 1
dtype = np.uint8
nptype = np.NPY_UINT8
elif self.mat.getDataType() in (c_MAT_TYPE.F32_C1, c_MAT_TYPE.F32_C2, c_MAT_TYPE.F32_C3, c_MAT_TYPE.F32_C4):
size = self.mat.getHeight()*self.mat.getWidth()*self.mat.getChannels()*sizeof(float)
itemsize = sizeof(float)
dtype = np.float32
nptype = np.NPY_FLOAT32
elif self.mat.getDataType() == c_MAT_TYPE.U16_C1:
size = self.mat.getHeight()*self.mat.getWidth()*self.mat.getChannels()*sizeof(ushort)
itemsize = sizeof(ushort)
dtype = np.ushort
nptype = np.NPY_UINT16
else:
raise RuntimeError("Unknown Mat data_type value: {0}".format(<int>self.mat.getDataType()))

shape = None
if self.mat.getChannels() == 1:
shape = (self.mat.getHeight(), self.mat.getWidth())
strides = (self.get_step_bytes(memory_type), self.get_pixel_bytes())
else:
shape = (self.mat.getHeight(), self.mat.getWidth(), self.mat.getChannels())
strides = (self.get_step_bytes(memory_type), self.get_pixel_bytes(), itemsize)

size = self.mat.getHeight()*self.get_step(memory_type)*self.mat.getChannels()*itemsize

if self.mat.getDataType() in (c_MAT_TYPE.U8_C1, c_MAT_TYPE.F32_C1, c_MAT_TYPE.U16_C1):
npdim = 2
else:
npdim = 3

cdef np.ndarray arr = np.empty(shape, dtype=dtype)

if isinstance(memory_type, MEM):
if deep_copy:
if self.mat.getDataType() == c_MAT_TYPE.U8_C1:
memcpy(<void*>arr.data, <void*>getPointerUchar1(self.mat, <c_MEM>(<unsigned int>memory_type.value)), size)
elif self.mat.getDataType() == c_MAT_TYPE.U8_C2:
memcpy(<void*>arr.data, <void*>getPointerUchar2(self.mat, <c_MEM>(<unsigned int>memory_type.value)), size)
elif self.mat.getDataType() == c_MAT_TYPE.U8_C3:
memcpy(<void*>arr.data, <void*>getPointerUchar3(self.mat, <c_MEM>(<unsigned int>memory_type.value)), size)
elif self.mat.getDataType() == c_MAT_TYPE.U8_C4:
memcpy(<void*>arr.data, <void*>getPointerUchar4(self.mat, <c_MEM>(<unsigned int>memory_type.value)), size)
elif self.mat.getDataType() == c_MAT_TYPE.U16_C1:
memcpy(<void*>arr.data, <void*>getPointerUshort1(self.mat, <c_MEM>(<unsigned int>memory_type.value)), size)
elif self.mat.getDataType() == c_MAT_TYPE.F32_C1:
memcpy(<void*>arr.data, <void*>getPointerFloat1(self.mat, <c_MEM>(<unsigned int>memory_type.value)), size)
elif self.mat.getDataType() == c_MAT_TYPE.F32_C2:
memcpy(<void*>arr.data, <void*>getPointerFloat2(self.mat, <c_MEM>(<unsigned int>memory_type.value)), size)
elif self.mat.getDataType() == c_MAT_TYPE.F32_C3:
memcpy(<void*>arr.data, <void*>getPointerFloat3(self.mat, <c_MEM>(<unsigned int>memory_type.value)), size)
elif self.mat.getDataType() == c_MAT_TYPE.F32_C4:
memcpy(<void*>arr.data, <void*>getPointerFloat4(self.mat, <c_MEM>(<unsigned int>memory_type.value)), size)
else: # Thanks to BDO for the initial implementation!
arr = np.PyArray_SimpleNewFromData(npdim, cython_shape, nptype, <void*>getPointerUchar1(self.mat, <c_MEM>(<unsigned int>memory_type.value)))
else:
raise TypeError("Argument is not of MEM type.")
cdef np.ndarray nparr # Placeholder for the np.ndarray since memcpy on CPU only works on cdef types
arr = None # Could be either an `np.ndarray` or a `cp.ndarray`

if memory_type.value == MEM.CPU.value and deep_copy:
nparr = np.empty(shape, dtype=dtype)
if self.mat.getDataType() == c_MAT_TYPE.U8_C1:
memcpy(<void*>nparr.data, <void*>getPointerUchar1(self.mat, <c_MEM>(<unsigned int>memory_type.value)), size)
elif self.mat.getDataType() == c_MAT_TYPE.U8_C2:
memcpy(<void*>nparr.data, <void*>getPointerUchar2(self.mat, <c_MEM>(<unsigned int>memory_type.value)), size)
elif self.mat.getDataType() == c_MAT_TYPE.U8_C3:
memcpy(<void*>nparr.data, <void*>getPointerUchar3(self.mat, <c_MEM>(<unsigned int>memory_type.value)), size)
elif self.mat.getDataType() == c_MAT_TYPE.U8_C4:
memcpy(<void*>nparr.data, <void*>getPointerUchar4(self.mat, <c_MEM>(<unsigned int>memory_type.value)), size)
elif self.mat.getDataType() == c_MAT_TYPE.U16_C1:
memcpy(<void*>nparr.data, <void*>getPointerUshort1(self.mat, <c_MEM>(<unsigned int>memory_type.value)), size)
elif self.mat.getDataType() == c_MAT_TYPE.F32_C1:
memcpy(<void*>nparr.data, <void*>getPointerFloat1(self.mat, <c_MEM>(<unsigned int>memory_type.value)), size)
elif self.mat.getDataType() == c_MAT_TYPE.F32_C2:
memcpy(<void*>nparr.data, <void*>getPointerFloat2(self.mat, <c_MEM>(<unsigned int>memory_type.value)), size)
elif self.mat.getDataType() == c_MAT_TYPE.F32_C3:
memcpy(<void*>nparr.data, <void*>getPointerFloat3(self.mat, <c_MEM>(<unsigned int>memory_type.value)), size)
elif self.mat.getDataType() == c_MAT_TYPE.F32_C4:
memcpy(<void*>nparr.data, <void*>getPointerFloat4(self.mat, <c_MEM>(<unsigned int>memory_type.value)), size)

# This is the workaround since cdef statements couldn't be performed in sub-scopes
arr = nparr

elif memory_type.value == MEM.CPU.value and not deep_copy:
# Thanks to BDO for the initial implementation!
arr = np.PyArray_SimpleNewFromData(npdim, cython_shape, nptype, <void*>getPointerUchar1(self.mat, <c_MEM>(<unsigned int>memory_type.value)))

##
# Ref: https://stackoverflow.com/questions/71344734/cupy-array-construction-from-existing-gpu-pointer
# Ref: https://docs.cupy.dev/en/stable/reference/generated/cupy.cuda.runtime.memcpy.html
# Ref: https://developer.download.nvidia.com/compute/DevZone/docs/html/C/doc/CUDA_Toolkit_Reference_Manual.pdf
elif memory_type.value == MEM.GPU.value and deep_copy:
in_mem_shape = (self.mat.getHeight(), self.get_step(memory_type), self.mat.getChannels())
dst_arr = cp.empty(in_mem_shape, dtype=dtype)
dst_ptr = dst_arr.data.ptr
src_ptr = self.get_pointer(memory_type=memory_type)
TRANSFER_KIND_GPU_GPU = 3
cp.cuda.runtime.memcpy(dst_ptr, src_ptr, size, TRANSFER_KIND_GPU_GPU)
arr = cp.ndarray(shape, dtype=dtype, memptr=dst_arr.data, strides=strides)

##
# Ref: https://github.com/cupy/cupy/issues/4644
# Ref: https://docs.cupy.dev/en/stable/user_guide/interoperability.html#device-memory-pointers
elif memory_type.value == MEM.GPU.value and not deep_copy:
src_ptr = self.get_pointer(memory_type=memory_type)
mem = cp.cuda.UnownedMemory(src_ptr, size, self)
memptr = cp.cuda.MemoryPointer(mem, offset=0)
arr = cp.ndarray(shape, dtype=dtype, memptr=memptr, strides=strides)

return arr

Expand Down
1 change: 1 addition & 0 deletions src/pyzed/sl_c.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -645,6 +645,7 @@ cdef extern from "sl/Camera.hpp" namespace "sl":

ctypedef enum MEM 'sl::MEM':
CPU 'sl::MEM::CPU'
GPU 'sl::MEM::GPU'

MEM operator|(MEM a, MEM b)

Expand Down