From d183dd927b5e2fcf773b5bc685a185e8f79d409f Mon Sep 17 00:00:00 2001 From: Pierluigi Ferrari Date: Fri, 19 Jan 2018 00:44:13 +0100 Subject: [PATCH] fix: Correct mean normalization channel order The order of the mean normalization values was BGR when it should have been RGB for this implementation. Fixed now. --- keras_ssd300.py | 2 +- keras_ssd512.py | 2 +- ssd300_evaluation_COCO.ipynb | 52 +++++++++++++++++------------------- ssd300_inference.ipynb | 2 +- ssd300_training.ipynb | 7 ++--- ssd512_inference.ipynb | 2 +- 6 files changed, 32 insertions(+), 35 deletions(-) diff --git a/keras_ssd300.py b/keras_ssd300.py index 8740a307..8988a0b1 100644 --- a/keras_ssd300.py +++ b/keras_ssd300.py @@ -45,7 +45,7 @@ def ssd_300(image_size, variances=[0.1, 0.1, 0.2, 0.2], coords='centroids', normalize_coords=False, - subtract_mean=[104, 117, 123], + subtract_mean=[123, 117, 104], divide_by_stddev=None, swap_channels=True, return_predictor_sizes=False): diff --git a/keras_ssd512.py b/keras_ssd512.py index f075da80..ae90a831 100644 --- a/keras_ssd512.py +++ b/keras_ssd512.py @@ -46,7 +46,7 @@ def ssd_512(image_size, variances=[0.1, 0.1, 0.2, 0.2], coords='centroids', normalize_coords=False, - subtract_mean=[104, 117, 123], + subtract_mean=[123, 117, 104], divide_by_stddev=None, swap_channels=True, return_predictor_sizes=False): diff --git a/ssd300_evaluation_COCO.ipynb b/ssd300_evaluation_COCO.ipynb index a9f17fb8..aa9ee199 100644 --- a/ssd300_evaluation_COCO.ipynb +++ b/ssd300_evaluation_COCO.ipynb @@ -18,9 +18,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "from keras import backend as K\n", @@ -83,9 +81,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "# 1: Build the Keras model\n", @@ -109,7 +105,7 @@ " variances=[0.1, 0.1, 0.2, 0.2],\n", " coords='centroids',\n", " normalize_coords=True,\n", - " subtract_mean=[104, 117, 123],\n", + " subtract_mean=[123, 117, 104],\n", " swap_channels=True)\n", "\n", "# 2: Load the trained weights into the model.\n", @@ -210,7 +206,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 6, "metadata": { "collapsed": true }, @@ -223,7 +219,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 7, "metadata": {}, "outputs": [ { @@ -231,7 +227,7 @@ "output_type": "stream", "text": [ "Number of images in the evaluation dataset: 5000\n", - "Producing results file: 100%|██████████| 250/250 [15:02<00:00, 3.79s/it]\n", + "Producing results file: 100%|██████████| 250/250 [17:07<00:00, 4.29s/it]\n", "Prediction results saved in 'detections_val2017_ssd300_results.json'\n" ] } @@ -263,7 +259,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 8, "metadata": {}, "outputs": [ { @@ -271,11 +267,11 @@ "output_type": "stream", "text": [ "loading annotations into memory...\n", - "Done (t=0.54s)\n", + "Done (t=0.41s)\n", "creating index...\n", "index created!\n", "Loading and preparing results...\n", - "DONE (t=5.32s)\n", + "DONE (t=5.34s)\n", "creating index...\n", "index created!\n" ] @@ -289,7 +285,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 9, "metadata": {}, "outputs": [ { @@ -298,21 +294,21 @@ "text": [ "Running per image evaluation...\n", "Evaluate annotation type *bbox*\n", - "DONE (t=68.27s).\n", + "DONE (t=69.19s).\n", "Accumulating evaluation results...\n", - "DONE (t=10.48s).\n", - " Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.241\n", - " Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.415\n", - " Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.247\n", - " Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.057\n", - " Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.257\n", - " Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.405\n", - " Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.229\n", - " Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.335\n", - " Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.356\n", - " Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.097\n", - " Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.395\n", - " Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.568\n" + "DONE (t=14.10s).\n", + " Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.247\n", + " Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.424\n", + " Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.253\n", + " Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.059\n", + " Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.264\n", + " Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.414\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.232\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.341\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.362\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.102\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.401\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.577\n" ] } ], diff --git a/ssd300_inference.ipynb b/ssd300_inference.ipynb index fc6b100d..9ba95da1 100644 --- a/ssd300_inference.ipynb +++ b/ssd300_inference.ipynb @@ -95,7 +95,7 @@ " variances=[0.1, 0.1, 0.2, 0.2],\n", " coords='centroids',\n", " normalize_coords=True,\n", - " subtract_mean=[104, 117, 123],\n", + " subtract_mean=[123, 117, 104],\n", " swap_channels=True)\n", "\n", "# 2: Load the trained weights into the model.\n", diff --git a/ssd300_training.ipynb b/ssd300_training.ipynb index 90f31ad1..94a8ca5a 100644 --- a/ssd300_training.ipynb +++ b/ssd300_training.ipynb @@ -76,10 +76,11 @@ "img_height = 300 # Height of the input images\n", "img_width = 300 # Width of the input images\n", "img_channels = 3 # Number of color channels of the input images\n", - "subtract_mean = [104, 117, 123] # The per-channel mean of the images in the dataset\n", + "subtract_mean = [123, 117, 104] # The per-channel mean of the images in the dataset\n", "swap_channels = True # The color channel order in the original SSD is BGR\n", "n_classes = 20 # Number of positive classes, e.g. 20 for Pascal VOC, 80 for MS COCO\n", - "scales = [0.1, 0.2, 0.37, 0.54, 0.71, 0.88, 1.05] # The anchor box scaling factors used in the original SSD300 for the Pascal VOC datasets, the factors for the MS COCO dataset are smaller, namely [0.07, 0.15, 0.33, 0.51, 0.69, 0.87, 1.05]\n", + "scales_voc = [0.1, 0.2, 0.37, 0.54, 0.71, 0.88, 1.05] # The anchor box scaling factors used in the original SSD300 for the Pascal VOC datasets\n", + "scales_coco = [0.07, 0.15, 0.33, 0.51, 0.69, 0.87, 1.05] # The anchor box scaling factors used in the original SSD300 for the MS COCO datasets\n", "aspect_ratios = [[1.0, 2.0, 0.5],\n", " [1.0, 2.0, 0.5, 3.0, 1.0/3.0],\n", " [1.0, 2.0, 0.5, 3.0, 1.0/3.0],\n", @@ -137,7 +138,7 @@ "model = ssd_300(image_size=(img_height, img_width, img_channels),\n", " n_classes=n_classes,\n", " l2_regularization=0.0005,\n", - " scales=scales,\n", + " scales=scales_voc,\n", " aspect_ratios_per_layer=aspect_ratios,\n", " two_boxes_for_ar1=two_boxes_for_ar1,\n", " steps=steps,\n", diff --git a/ssd512_inference.ipynb b/ssd512_inference.ipynb index 4a53c66b..741b6b7e 100644 --- a/ssd512_inference.ipynb +++ b/ssd512_inference.ipynb @@ -96,7 +96,7 @@ " variances=[0.1, 0.1, 0.2, 0.2],\n", " coords='centroids',\n", " normalize_coords=True,\n", - " subtract_mean=[104, 117, 123],\n", + " subtract_mean=[123, 117, 104],\n", " swap_channels=True)\n", "\n", "# 2: Load the trained weights into the model.\n",