Skip to content

Commit

Permalink
Add NMS to CoreML exports (ultralytics#11361)
Browse files Browse the repository at this point in the history
* Add NMS to CoreML exports

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
  • Loading branch information
glenn-jocher and pre-commit-ci[bot] authored Apr 14, 2023
1 parent 60e29e2 commit a66fa83
Showing 1 changed file with 148 additions and 2 deletions.
150 changes: 148 additions & 2 deletions export.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,25 @@
MACOS = platform.system() == 'Darwin' # macOS environment


class iOSModel(torch.nn.Module):

def __init__(self, model, im):
super().__init__()
b, c, h, w = im.shape # batch, channel, height, width
self.model = model
self.nc = model.nc # number of classes
if w == h:
self.normalize = 1. / w
else:
self.normalize = torch.tensor([1. / w, 1. / h, 1. / w, 1. / h]) # broadcast (slower, smaller)
# np = model(im)[0].shape[1] # number of points
# self.normalize = torch.tensor([1. / w, 1. / h, 1. / w, 1. / h]).expand(np, 4) # explicit (faster, larger)

def forward(self, x):
xywh, conf, cls = self.model(x)[0].squeeze().split((4, 1, self.nc), 1)
return cls * conf, xywh * self.normalize # confidence (3780, 80), coordinates (3780, 4)


def export_formats():
# YOLOv5 export formats
x = [
Expand Down Expand Up @@ -223,14 +242,16 @@ def export_paddle(model, im, file, metadata, prefix=colorstr('PaddlePaddle:')):


@try_export
def export_coreml(model, im, file, int8, half, prefix=colorstr('CoreML:')):
def export_coreml(model, im, file, int8, half, nms, prefix=colorstr('CoreML:')):
# YOLOv5 CoreML export
check_requirements('coremltools')
import coremltools as ct

LOGGER.info(f'\n{prefix} starting export with coremltools {ct.__version__}...')
f = file.with_suffix('.mlmodel')

if nms:
model = iOSModel(model, im)
ts = torch.jit.trace(model, im, strict=False) # TorchScript model
ct_model = ct.convert(ts, inputs=[ct.ImageType('image', shape=im.shape, scale=1 / 255, bias=[0, 0, 0])])
bits, mode = (8, 'kmeans_lut') if int8 else (16, 'linear') if half else (32, None)
Expand Down Expand Up @@ -506,6 +527,129 @@ def add_tflite_metadata(file, metadata, num_outputs):
tmp_file.unlink()


def pipeline_coreml(model, im, file, names, y, prefix=colorstr('CoreML Pipeline:')):
# YOLOv5 CoreML pipeline
import coremltools as ct
from PIL import Image

print(f'{prefix} starting pipeline with coremltools {ct.__version__}...')
batch_size, ch, h, w = list(im.shape) # BCHW
t = time.time()

# Output shapes
spec = model.get_spec()
out0, out1 = iter(spec.description.output)
if platform.system() == 'Darwin':
img = Image.new('RGB', (w, h)) # img(192 width, 320 height)
# img = torch.zeros((*opt.img_size, 3)).numpy() # img size(320,192,3) iDetection
out = model.predict({'image': img})
out0_shape, out1_shape = out[out0.name].shape, out[out1.name].shape
else: # linux and windows can not run model.predict(), get sizes from pytorch output y
s = tuple(y[0].shape)
out0_shape, out1_shape = (s[1], s[2] - 5), (s[1], 4) # (3780, 80), (3780, 4)

# Checks
nx, ny = spec.description.input[0].type.imageType.width, spec.description.input[0].type.imageType.height
na, nc = out0_shape
# na, nc = out0.type.multiArrayType.shape # number anchors, classes
assert len(names) == nc, f'{len(names)} names found for nc={nc}' # check

# Define output shapes (missing)
out0.type.multiArrayType.shape[:] = out0_shape # (3780, 80)
out1.type.multiArrayType.shape[:] = out1_shape # (3780, 4)
# spec.neuralNetwork.preprocessing[0].featureName = '0'

# Flexible input shapes
# from coremltools.models.neural_network import flexible_shape_utils
# s = [] # shapes
# s.append(flexible_shape_utils.NeuralNetworkImageSize(320, 192))
# s.append(flexible_shape_utils.NeuralNetworkImageSize(640, 384)) # (height, width)
# flexible_shape_utils.add_enumerated_image_sizes(spec, feature_name='image', sizes=s)
# r = flexible_shape_utils.NeuralNetworkImageSizeRange() # shape ranges
# r.add_height_range((192, 640))
# r.add_width_range((192, 640))
# flexible_shape_utils.update_image_size_range(spec, feature_name='image', size_range=r)

# Print
print(spec.description)

# Model from spec
model = ct.models.MLModel(spec)

# 3. Create NMS protobuf
nms_spec = ct.proto.Model_pb2.Model()
nms_spec.specificationVersion = 5
for i in range(2):
decoder_output = model._spec.description.output[i].SerializeToString()
nms_spec.description.input.add()
nms_spec.description.input[i].ParseFromString(decoder_output)
nms_spec.description.output.add()
nms_spec.description.output[i].ParseFromString(decoder_output)

nms_spec.description.output[0].name = 'confidence'
nms_spec.description.output[1].name = 'coordinates'

output_sizes = [nc, 4]
for i in range(2):
ma_type = nms_spec.description.output[i].type.multiArrayType
ma_type.shapeRange.sizeRanges.add()
ma_type.shapeRange.sizeRanges[0].lowerBound = 0
ma_type.shapeRange.sizeRanges[0].upperBound = -1
ma_type.shapeRange.sizeRanges.add()
ma_type.shapeRange.sizeRanges[1].lowerBound = output_sizes[i]
ma_type.shapeRange.sizeRanges[1].upperBound = output_sizes[i]
del ma_type.shape[:]

nms = nms_spec.nonMaximumSuppression
nms.confidenceInputFeatureName = out0.name # 1x507x80
nms.coordinatesInputFeatureName = out1.name # 1x507x4
nms.confidenceOutputFeatureName = 'confidence'
nms.coordinatesOutputFeatureName = 'coordinates'
nms.iouThresholdInputFeatureName = 'iouThreshold'
nms.confidenceThresholdInputFeatureName = 'confidenceThreshold'
nms.iouThreshold = 0.45
nms.confidenceThreshold = 0.25
nms.pickTop.perClass = True
nms.stringClassLabels.vector.extend(names.values())
nms_model = ct.models.MLModel(nms_spec)

# 4. Pipeline models together
pipeline = ct.models.pipeline.Pipeline(input_features=[('image', ct.models.datatypes.Array(3, ny, nx)),
('iouThreshold', ct.models.datatypes.Double()),
('confidenceThreshold', ct.models.datatypes.Double())],
output_features=['confidence', 'coordinates'])
pipeline.add_model(model)
pipeline.add_model(nms_model)

# Correct datatypes
pipeline.spec.description.input[0].ParseFromString(model._spec.description.input[0].SerializeToString())
pipeline.spec.description.output[0].ParseFromString(nms_model._spec.description.output[0].SerializeToString())
pipeline.spec.description.output[1].ParseFromString(nms_model._spec.description.output[1].SerializeToString())

# Update metadata
pipeline.spec.specificationVersion = 5
pipeline.spec.description.metadata.versionString = 'https://github.com/ultralytics/yolov5'
pipeline.spec.description.metadata.shortDescription = 'https://github.com/ultralytics/yolov5'
pipeline.spec.description.metadata.author = 'glenn.jocher@ultralytics.com'
pipeline.spec.description.metadata.license = 'https://github.com/ultralytics/yolov5/blob/master/LICENSE'
pipeline.spec.description.metadata.userDefined.update({
'classes': ','.join(names.values()),
'iou_threshold': str(nms.iouThreshold),
'confidence_threshold': str(nms.confidenceThreshold)})

# Save the model
f = file.with_suffix('.mlmodel') # filename
model = ct.models.MLModel(pipeline.spec)
model.input_description['image'] = 'Input image'
model.input_description['iouThreshold'] = f'(optional) IOU Threshold override (default: {nms.iouThreshold})'
model.input_description['confidenceThreshold'] = \
f'(optional) Confidence Threshold override (default: {nms.confidenceThreshold})'
model.output_description['confidence'] = 'Boxes × Class confidence (see user-defined metadata "classes")'
model.output_description['coordinates'] = 'Boxes × [x, y, width, height] (relative to image size)'
model.save(f) # pipelined
print(f'{prefix} pipeline success ({time.time() - t:.2f}s), saved as {f} ({file_size(f):.1f} MB)')


@smart_inference_mode()
def run(
data=ROOT / 'data/coco128.yaml', # 'dataset.yaml path'
Expand Down Expand Up @@ -584,7 +728,9 @@ def run(
if xml: # OpenVINO
f[3], _ = export_openvino(file, metadata, half)
if coreml: # CoreML
f[4], _ = export_coreml(model, im, file, int8, half)
f[4], ct_model = export_coreml(model, im, file, int8, half, nms)
if nms:
pipeline_coreml(ct_model, im, file, model.names, y)
if any((saved_model, pb, tflite, edgetpu, tfjs)): # TensorFlow formats
assert not tflite or not tfjs, 'TFLite and TF.js models must be exported separately, please pass only one type.'
assert not isinstance(model, ClassificationModel), 'ClassificationModel export to TF formats not yet supported.'
Expand Down

0 comments on commit a66fa83

Please sign in to comment.