Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
201 changes: 188 additions & 13 deletions coremltools/modelrunner/ModelRunner/ModelService.swift
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

import Foundation
import CoreML
import CoreVideo
import Darwin


Expand Down Expand Up @@ -38,11 +39,16 @@ extension LoadedModel {
/// or other characteristics that are not supported by the current system.
case unsupportedOutput(details: String)

/// Indicates that image conversion failed.
case imageConversionFailed(details: String)

/// A human-readable description of the error.
var description: String {
switch self {
case .unsupportedOutput(let details):
return "Unsupported output: \(details)"
case .imageConversionFailed(let details):
return "Image conversion failed: \(details)"
}
}
}
Expand All @@ -57,6 +63,153 @@ extension LoadedModel {
return try mlModel.prediction(from: inputs)
}

fileprivate func createPixelBuffer(
descriptor: TensorDescriptor,
rawPtr: UnsafeRawPointer,
imageConstraint: MLImageConstraint
) throws -> CVPixelBuffer {
let shape = descriptor.shape
let dataPtr = rawPtr.advanced(by: descriptor.storage.offset)

let height: Int
let width: Int
let channels: Int

if shape.count == 3 {
height = shape[0]
width = shape[1]
channels = shape[2]
} else if shape.count == 2 {
height = shape[0]
width = shape[1]
channels = 1
} else {
throw Error.imageConversionFailed(
details: "Invalid tensor shape for image input. Expected [H, W] or [H, W, C], got \(shape)"
)
}

let pixelFormat: OSType
if channels == 1 {
pixelFormat = kCVPixelFormatType_OneComponent8
} else if channels == 3 || channels == 4 {
pixelFormat = kCVPixelFormatType_32BGRA
} else {
throw Error.imageConversionFailed(
details: "Unsupported number of channels: \(channels). Expected 1, 3, or 4."
)
}

var pixelBuffer: CVPixelBuffer?
let status = CVPixelBufferCreate(
kCFAllocatorDefault,
width,
height,
pixelFormat,
nil,
&pixelBuffer
)

guard status == kCVReturnSuccess, let buffer = pixelBuffer else {
throw Error.imageConversionFailed(
details: "Failed to create CVPixelBuffer. Status: \(status)"
)
}

CVPixelBufferLockBaseAddress(buffer, [])
defer { CVPixelBufferUnlockBaseAddress(buffer, []) }

guard let baseAddress = CVPixelBufferGetBaseAddress(buffer) else {
throw Error.imageConversionFailed(details: "Failed to get pixel buffer base address.")
}

let bytesPerRow = CVPixelBufferGetBytesPerRow(buffer)

if channels == 1 {
let srcPtr = dataPtr.assumingMemoryBound(to: UInt8.self)
let dstPtr = baseAddress.assumingMemoryBound(to: UInt8.self)
for y in 0..<height {
memcpy(dstPtr.advanced(by: y * bytesPerRow),
srcPtr.advanced(by: y * width),
width)
}
} else if channels == 3 {
// RGB -> BGRA
let srcPtr = dataPtr.assumingMemoryBound(to: UInt8.self)
let dstPtr = baseAddress.assumingMemoryBound(to: UInt8.self)
for y in 0..<height {
for x in 0..<width {
let srcOffset = (y * width + x) * 3
let dstOffset = y * bytesPerRow + x * 4
dstPtr[dstOffset + 0] = srcPtr[srcOffset + 2]
dstPtr[dstOffset + 1] = srcPtr[srcOffset + 1]
dstPtr[dstOffset + 2] = srcPtr[srcOffset + 0]
dstPtr[dstOffset + 3] = 255
}
}
} else if channels == 4 {
// RGBA -> BGRA
let srcPtr = dataPtr.assumingMemoryBound(to: UInt8.self)
let dstPtr = baseAddress.assumingMemoryBound(to: UInt8.self)
for y in 0..<height {
for x in 0..<width {
let srcOffset = (y * width + x) * 4
let dstOffset = y * bytesPerRow + x * 4
dstPtr[dstOffset + 0] = srcPtr[srcOffset + 2]
dstPtr[dstOffset + 1] = srcPtr[srcOffset + 1]
dstPtr[dstOffset + 2] = srcPtr[srcOffset + 0]
dstPtr[dstOffset + 3] = srcPtr[srcOffset + 3]
}
}
}

return buffer
}

fileprivate func extractPixelBufferData(_ pixelBuffer: CVPixelBuffer) throws -> (Data, [Int], [Int]) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: Could we please make it a named tuple (data: [Data], shape: [Int], strides: [Int])?

let pixelFormat = CVPixelBufferGetPixelFormatType(pixelBuffer)
let width = CVPixelBufferGetWidth(pixelBuffer)
let height = CVPixelBufferGetHeight(pixelBuffer)

CVPixelBufferLockBaseAddress(pixelBuffer, .readOnly)
defer { CVPixelBufferUnlockBaseAddress(pixelBuffer, .readOnly) }

guard let baseAddress = CVPixelBufferGetBaseAddress(pixelBuffer) else {
throw Error.imageConversionFailed(details: "Failed to get pixel buffer base address.")
}

let bytesPerRow = CVPixelBufferGetBytesPerRow(pixelBuffer)
let srcPtr = baseAddress.assumingMemoryBound(to: UInt8.self)

if pixelFormat == kCVPixelFormatType_OneComponent8 {
var data = Data(count: height * width)
data.withUnsafeMutableBytes { dstBuffer in
let dstPtr = dstBuffer.baseAddress!.assumingMemoryBound(to: UInt8.self)
for y in 0..<height {
memcpy(dstPtr.advanced(by: y * width), srcPtr.advanced(by: y * bytesPerRow), width)
}
}
return (data, [height, width], [width, 1])
} else if pixelFormat == kCVPixelFormatType_32BGRA {
var data = Data(count: height * width * 3)
data.withUnsafeMutableBytes { dstBuffer in
let dstPtr = dstBuffer.baseAddress!.assumingMemoryBound(to: UInt8.self)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: Instead of force unwrapping baseAddress, could we do a guard and throw an error instead?

for y in 0..<height {
for x in 0..<width {
let srcOffset = y * bytesPerRow + x * 4
let dstOffset = (y * width + x) * 3
dstPtr[dstOffset + 0] = srcPtr[srcOffset + 2]
dstPtr[dstOffset + 1] = srcPtr[srcOffset + 1]
dstPtr[dstOffset + 2] = srcPtr[srcOffset + 0]
}
}
}
return (data, [height, width, 3], [width * 3, 3, 1])
} else {
throw Error.imageConversionFailed(details: "Unsupported pixel format: \(pixelFormat)")
}
}

fileprivate func predict(inputs: [String: TensorDescriptor], storage: Data) throws -> (outputs: [String: TensorDescriptor], storage: Data, duration: UInt64) {
var outputs = [String : TensorDescriptor]()
var buffer = Data()
Expand All @@ -68,8 +221,19 @@ extension LoadedModel {

var modelInputs = [String : MLFeatureValue]()
for (name, descriptor) in inputs {
let multiArray = try MLMultiArray(descriptor: descriptor, rawPtr: rawPtr)
modelInputs[name] = MLFeatureValue(multiArray: multiArray)
if let inputDescription = mlModel.modelDescription.inputDescriptionsByName[name],
inputDescription.type == .image,
let imageConstraint = inputDescription.imageConstraint {
let pixelBuffer = try createPixelBuffer(
descriptor: descriptor,
rawPtr: rawPtr,
imageConstraint: imageConstraint
)
modelInputs[name] = MLFeatureValue(pixelBuffer: pixelBuffer)
} else {
let multiArray = try MLMultiArray(descriptor: descriptor, rawPtr: rawPtr)
modelInputs[name] = MLFeatureValue(multiArray: multiArray)
}
}

let (modelOutputs, predictDuration) = try measure {
Expand All @@ -80,20 +244,31 @@ extension LoadedModel {

var segment = TensorStorage()
for name in modelOutputs.featureNames {
guard let multiArrayValue = modelOutputs.featureValue(for: name)?.multiArrayValue else {
throw Error.unsupportedOutput(details:"Incompatible output format detected for model ID '\(id)'.")
guard let featureValue = modelOutputs.featureValue(for: name) else {
throw Error.unsupportedOutput(details: "Missing output '\(name)' for model ID '\(id)'.")
}

multiArrayValue.withUnsafeBytes { ptr in
segment.size += ptr.count
buffer.append(contentsOf: ptr)
if let multiArrayValue = featureValue.multiArrayValue {
multiArrayValue.withUnsafeBytes { ptr in
segment.size += ptr.count
buffer.append(contentsOf: ptr)
}
let descriptor = TensorDescriptor(dataType: multiArrayValue.dataType.representation,
shape: multiArrayValue.shape.map { $0.intValue },
strides: multiArrayValue.strides.map { $0.intValue },
storage: segment)
outputs[name] = descriptor
segment.offset = segment.size
} else if let pixelBuffer = featureValue.imageBufferValue {
let (data, shape, strides) = try extractPixelBufferData(pixelBuffer)
segment.size += data.count
buffer.append(data)
let descriptor = TensorDescriptor(dataType: "UInt8", shape: shape, strides: strides, storage: segment)
outputs[name] = descriptor
segment.offset = segment.size
} else {
throw Error.unsupportedOutput(details: "Incompatible output format for '\(name)' in model ID '\(id)'.")
}
let descriptor = TensorDescriptor(dataType: multiArrayValue.dataType.representation,
shape: multiArrayValue.shape.map { $0.intValue },
strides: multiArrayValue.strides.map { $0.intValue },
storage: segment)
outputs[name] = descriptor
segment.offset = segment.size
}
}

Expand Down
9 changes: 9 additions & 0 deletions coremltools/models/ml_program/experimental/async_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,13 @@
if _HAS_TORCH:
from torch import Tensor

_HAS_PIL = True
try:
from PIL import Image as _PIL_IMAGE
except:
_HAS_PIL = False


class MLModelAsyncWrapper(ABC):
@staticmethod
def init_check(
Expand Down Expand Up @@ -386,6 +393,8 @@ async def predict(
def convert_to_np_array(input: Any):
if isinstance(input, np.ndarray):
return input
elif _HAS_PIL and isinstance(input, _PIL_IMAGE.Image):
return np.array(input)
elif _HAS_TORCH and isinstance(input, Tensor):
return input.detach().numpy()
else:
Expand Down
5 changes: 5 additions & 0 deletions coremltools/models/ml_program/experimental/remote_device.py
Original file line number Diff line number Diff line change
Expand Up @@ -1930,6 +1930,7 @@ class DataType(_Enum):
Float32 = "Float32"
Float64 = "Float64"
Int32 = "Int32"
UInt8 = "UInt8"

shape: _List[int]
strides: _List[int]
Expand All @@ -1948,6 +1949,8 @@ def _to_multi_array_dtype(
return _TensorDescriptor.DataType.Float64
elif dtype == _np.int32:
return _TensorDescriptor.DataType.Int32
elif dtype == _np.uint8:
return _TensorDescriptor.DataType.UInt8
else:
raise ValueError(f"{dtype} is not supported")

Expand All @@ -1963,6 +1966,8 @@ def _to_numpy_dtype(
return _np.float64
elif dtype == _TensorDescriptor.DataType.Int32:
return _np.int32
elif dtype == _TensorDescriptor.DataType.UInt8:
return _np.uint8
else:
raise ValueError(f"{dtype} is not supported")

Expand Down
22 changes: 22 additions & 0 deletions coremltools/test/ml_program/experimental/test_remote_device.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

import coremltools as ct
from coremltools.converters.mil import Builder as mb

from coremltools.models._compiled_model import CompiledMLModel
from coremltools.models.ml_program.experimental.remote_device import (
Device,
Expand Down Expand Up @@ -412,3 +413,24 @@ def can_sudo_without_password():
f"STDOUT:\n{stdout}\n"
f"STDERR:\n{stderr}"
)


class TestTensorDescriptorUInt8:

def test_uint8_dtype_conversion(self):
assert _TensorDescriptor._to_multi_array_dtype(np.dtype(np.uint8)) == _TensorDescriptor.DataType.UInt8
assert _TensorDescriptor._to_numpy_dtype(_TensorDescriptor.DataType.UInt8) == np.uint8

def test_uint8_tensor_descriptor_roundtrip(self):
import io
original = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.uint8)
buf = io.BytesIO()

descriptor = _TensorDescriptor.from_array(original, buf)
assert descriptor.data_type == _TensorDescriptor.DataType.UInt8
assert descriptor.shape == [2, 3]

buf.seek(0)
restored = descriptor.to_array(buf)
assert restored.dtype == np.uint8
assert np.array_equal(original, restored)