diff --git a/export.py b/export.py index 73f4bb8c0870..5f8e1c4821da 100644 --- a/export.py +++ b/export.py @@ -77,6 +77,25 @@ MACOS = platform.system() == 'Darwin' # macOS environment +class iOSModel(torch.nn.Module): + + def __init__(self, model, im): + super().__init__() + b, c, h, w = im.shape # batch, channel, height, width + self.model = model + self.nc = model.nc # number of classes + if w == h: + self.normalize = 1. / w + else: + self.normalize = torch.tensor([1. / w, 1. / h, 1. / w, 1. / h]) # broadcast (slower, smaller) + # np = model(im)[0].shape[1] # number of points + # self.normalize = torch.tensor([1. / w, 1. / h, 1. / w, 1. / h]).expand(np, 4) # explicit (faster, larger) + + def forward(self, x): + xywh, conf, cls = self.model(x)[0].squeeze().split((4, 1, self.nc), 1) + return cls * conf, xywh * self.normalize # confidence (3780, 80), coordinates (3780, 4) + + def export_formats(): # YOLOv5 export formats x = [ @@ -223,7 +242,7 @@ def export_paddle(model, im, file, metadata, prefix=colorstr('PaddlePaddle:')): @try_export -def export_coreml(model, im, file, int8, half, prefix=colorstr('CoreML:')): +def export_coreml(model, im, file, int8, half, nms, prefix=colorstr('CoreML:')): # YOLOv5 CoreML export check_requirements('coremltools') import coremltools as ct @@ -231,6 +250,8 @@ def export_coreml(model, im, file, int8, half, prefix=colorstr('CoreML:')): LOGGER.info(f'\n{prefix} starting export with coremltools {ct.__version__}...') f = file.with_suffix('.mlmodel') + if nms: + model = iOSModel(model, im) ts = torch.jit.trace(model, im, strict=False) # TorchScript model ct_model = ct.convert(ts, inputs=[ct.ImageType('image', shape=im.shape, scale=1 / 255, bias=[0, 0, 0])]) bits, mode = (8, 'kmeans_lut') if int8 else (16, 'linear') if half else (32, None) @@ -506,6 +527,129 @@ def add_tflite_metadata(file, metadata, num_outputs): tmp_file.unlink() +def pipeline_coreml(model, im, file, names, y, prefix=colorstr('CoreML Pipeline:')): + # YOLOv5 CoreML pipeline + import coremltools as ct + from PIL import Image + + print(f'{prefix} starting pipeline with coremltools {ct.__version__}...') + batch_size, ch, h, w = list(im.shape) # BCHW + t = time.time() + + # Output shapes + spec = model.get_spec() + out0, out1 = iter(spec.description.output) + if platform.system() == 'Darwin': + img = Image.new('RGB', (w, h)) # img(192 width, 320 height) + # img = torch.zeros((*opt.img_size, 3)).numpy() # img size(320,192,3) iDetection + out = model.predict({'image': img}) + out0_shape, out1_shape = out[out0.name].shape, out[out1.name].shape + else: # linux and windows can not run model.predict(), get sizes from pytorch output y + s = tuple(y[0].shape) + out0_shape, out1_shape = (s[1], s[2] - 5), (s[1], 4) # (3780, 80), (3780, 4) + + # Checks + nx, ny = spec.description.input[0].type.imageType.width, spec.description.input[0].type.imageType.height + na, nc = out0_shape + # na, nc = out0.type.multiArrayType.shape # number anchors, classes + assert len(names) == nc, f'{len(names)} names found for nc={nc}' # check + + # Define output shapes (missing) + out0.type.multiArrayType.shape[:] = out0_shape # (3780, 80) + out1.type.multiArrayType.shape[:] = out1_shape # (3780, 4) + # spec.neuralNetwork.preprocessing[0].featureName = '0' + + # Flexible input shapes + # from coremltools.models.neural_network import flexible_shape_utils + # s = [] # shapes + # s.append(flexible_shape_utils.NeuralNetworkImageSize(320, 192)) + # s.append(flexible_shape_utils.NeuralNetworkImageSize(640, 384)) # (height, width) + # flexible_shape_utils.add_enumerated_image_sizes(spec, feature_name='image', sizes=s) + # r = flexible_shape_utils.NeuralNetworkImageSizeRange() # shape ranges + # r.add_height_range((192, 640)) + # r.add_width_range((192, 640)) + # flexible_shape_utils.update_image_size_range(spec, feature_name='image', size_range=r) + + # Print + print(spec.description) + + # Model from spec + model = ct.models.MLModel(spec) + + # 3. Create NMS protobuf + nms_spec = ct.proto.Model_pb2.Model() + nms_spec.specificationVersion = 5 + for i in range(2): + decoder_output = model._spec.description.output[i].SerializeToString() + nms_spec.description.input.add() + nms_spec.description.input[i].ParseFromString(decoder_output) + nms_spec.description.output.add() + nms_spec.description.output[i].ParseFromString(decoder_output) + + nms_spec.description.output[0].name = 'confidence' + nms_spec.description.output[1].name = 'coordinates' + + output_sizes = [nc, 4] + for i in range(2): + ma_type = nms_spec.description.output[i].type.multiArrayType + ma_type.shapeRange.sizeRanges.add() + ma_type.shapeRange.sizeRanges[0].lowerBound = 0 + ma_type.shapeRange.sizeRanges[0].upperBound = -1 + ma_type.shapeRange.sizeRanges.add() + ma_type.shapeRange.sizeRanges[1].lowerBound = output_sizes[i] + ma_type.shapeRange.sizeRanges[1].upperBound = output_sizes[i] + del ma_type.shape[:] + + nms = nms_spec.nonMaximumSuppression + nms.confidenceInputFeatureName = out0.name # 1x507x80 + nms.coordinatesInputFeatureName = out1.name # 1x507x4 + nms.confidenceOutputFeatureName = 'confidence' + nms.coordinatesOutputFeatureName = 'coordinates' + nms.iouThresholdInputFeatureName = 'iouThreshold' + nms.confidenceThresholdInputFeatureName = 'confidenceThreshold' + nms.iouThreshold = 0.45 + nms.confidenceThreshold = 0.25 + nms.pickTop.perClass = True + nms.stringClassLabels.vector.extend(names.values()) + nms_model = ct.models.MLModel(nms_spec) + + # 4. Pipeline models together + pipeline = ct.models.pipeline.Pipeline(input_features=[('image', ct.models.datatypes.Array(3, ny, nx)), + ('iouThreshold', ct.models.datatypes.Double()), + ('confidenceThreshold', ct.models.datatypes.Double())], + output_features=['confidence', 'coordinates']) + pipeline.add_model(model) + pipeline.add_model(nms_model) + + # Correct datatypes + pipeline.spec.description.input[0].ParseFromString(model._spec.description.input[0].SerializeToString()) + pipeline.spec.description.output[0].ParseFromString(nms_model._spec.description.output[0].SerializeToString()) + pipeline.spec.description.output[1].ParseFromString(nms_model._spec.description.output[1].SerializeToString()) + + # Update metadata + pipeline.spec.specificationVersion = 5 + pipeline.spec.description.metadata.versionString = 'https://github.com/ultralytics/yolov5' + pipeline.spec.description.metadata.shortDescription = 'https://github.com/ultralytics/yolov5' + pipeline.spec.description.metadata.author = 'glenn.jocher@ultralytics.com' + pipeline.spec.description.metadata.license = 'https://github.com/ultralytics/yolov5/blob/master/LICENSE' + pipeline.spec.description.metadata.userDefined.update({ + 'classes': ','.join(names.values()), + 'iou_threshold': str(nms.iouThreshold), + 'confidence_threshold': str(nms.confidenceThreshold)}) + + # Save the model + f = file.with_suffix('.mlmodel') # filename + model = ct.models.MLModel(pipeline.spec) + model.input_description['image'] = 'Input image' + model.input_description['iouThreshold'] = f'(optional) IOU Threshold override (default: {nms.iouThreshold})' + model.input_description['confidenceThreshold'] = \ + f'(optional) Confidence Threshold override (default: {nms.confidenceThreshold})' + model.output_description['confidence'] = 'Boxes × Class confidence (see user-defined metadata "classes")' + model.output_description['coordinates'] = 'Boxes × [x, y, width, height] (relative to image size)' + model.save(f) # pipelined + print(f'{prefix} pipeline success ({time.time() - t:.2f}s), saved as {f} ({file_size(f):.1f} MB)') + + @smart_inference_mode() def run( data=ROOT / 'data/coco128.yaml', # 'dataset.yaml path' @@ -584,7 +728,9 @@ def run( if xml: # OpenVINO f[3], _ = export_openvino(file, metadata, half) if coreml: # CoreML - f[4], _ = export_coreml(model, im, file, int8, half) + f[4], ct_model = export_coreml(model, im, file, int8, half, nms) + if nms: + pipeline_coreml(ct_model, im, file, model.names, y) if any((saved_model, pb, tflite, edgetpu, tfjs)): # TensorFlow formats assert not tflite or not tfjs, 'TFLite and TF.js models must be exported separately, please pass only one type.' assert not isinstance(model, ClassificationModel), 'ClassificationModel export to TF formats not yet supported.'