chore: Remove internal coordinate format options

The difference between the internal coordinate formats that the model can learn and the coordinate formats of the input and output data has recently caused some confustions and led to a bunch of questions, so that the option to change the internal coordinate format is now entirely omitted in the tutorial notebooks. It's not relevant at all to change the internal coordinate format and it only led to confusion, so it's probably best not confront users with this option to begin with.
nicocurat · Mar 29, 2018 · 9006bf6 · 9006bf6
1 parent 46e7f58
commit 9006bf6
Show file tree

Hide file tree

Showing 7 changed files with 26 additions and 54 deletions.
diff --git a/ssd300_evaluation_COCO.ipynb b/ssd300_evaluation_COCO.ipynb
@@ -18,7 +18,9 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {},
+   "metadata": {
+    "collapsed": true
+   },
    "outputs": [],
    "source": [
     "from keras import backend as K\n",
@@ -108,7 +110,6 @@
     "                offsets=[0.5, 0.5, 0.5, 0.5, 0.5, 0.5],\n",
     "                clip_boxes=False,\n",
     "                variances=[0.1, 0.1, 0.2, 0.2],\n",
-    "                coords='centroids',\n",
     "                normalize_coords=True,\n",
     "                subtract_mean=[123, 117, 104],\n",
     "                swap_channels=[2, 1, 0],\n",
@@ -186,7 +187,7 @@
    },
    "outputs": [],
    "source": [
-    "dataset = DataGenerator(labels_output_format=['class_id', 'xmin', 'ymin', 'xmax', 'ymax'])\n",
+    "dataset = DataGenerator()\n",
     "\n",
     "# TODO: Set the paths to the dataset here.\n",
     "MS_COCO_dataset_images_dir = '../../datasets/MicrosoftCOCO/val2017/'\n",
@@ -254,7 +255,6 @@
     "                    confidence_thresh=0.01,\n",
     "                    iou_threshold=0.45,\n",
     "                    top_k=200,\n",
-    "                    pred_coords='centroids',\n",
     "                    normalize_coords=True)"
    ]
   },

diff --git a/ssd300_evaluation_Pascal_VOC.ipynb b/ssd300_evaluation_Pascal_VOC.ipynb
@@ -14,7 +14,9 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {},
+   "metadata": {
+    "collapsed": true
+   },
    "outputs": [],
    "source": [
     "from keras import backend as K\n",
@@ -70,7 +72,9 @@
   {
    "cell_type": "code",
    "execution_count": 4,
-   "metadata": {},
+   "metadata": {
+    "collapsed": true
+   },
    "outputs": [],
    "source": [
     "# 1: Build the Keras model\n",
@@ -93,7 +97,6 @@
     "                offsets=[0.5, 0.5, 0.5, 0.5, 0.5, 0.5],\n",
     "                clip_boxes=False,\n",
     "                variances=[0.1, 0.1, 0.2, 0.2],\n",
-    "                coords='centroids',\n",
     "                normalize_coords=True,\n",
     "                subtract_mean=[123, 117, 104],\n",
     "                swap_channels=[2, 1, 0],\n",
@@ -177,7 +180,7 @@
     }
    ],
    "source": [
-    "dataset = DataGenerator(labels_output_format=['class_id', 'xmin', 'ymin', 'xmax', 'ymax'])\n",
+    "dataset = DataGenerator()\n",
     "\n",
     "# TODO: Set the paths to the dataset here.\n",
     "Pascal_VOC_dataset_images_dir = '../../datasets/VOCdevkit/VOC2007/JPEGImages/'\n",
@@ -253,7 +256,6 @@
     "                   confidence_thresh=0.01,\n",
     "                   iou_threshold=0.45,\n",
     "                   top_k=200,\n",
-    "                   pred_coords='centroids',\n",
     "                   normalize_coords=True)"
    ]
   },

diff --git a/ssd300_inference.ipynb b/ssd300_inference.ipynb
@@ -101,7 +101,6 @@
     "                offsets=[0.5, 0.5, 0.5, 0.5, 0.5, 0.5],\n",
     "                clip_boxes=False,\n",
     "                variances=[0.1, 0.1, 0.2, 0.2],\n",
-    "                coords='centroids',\n",
     "                normalize_coords=True,\n",
     "                subtract_mean=[123, 117, 104],\n",
     "                swap_channels=[2, 1, 0],\n",
@@ -324,7 +323,7 @@
    "source": [
     "# Create a `BatchGenerator` instance and parse the Pascal VOC labels.\n",
     "\n",
-    "dataset = DataGenerator(labels_output_format=('class_id', 'xmin', 'ymin', 'xmax', 'ymax'))\n",
+    "dataset = DataGenerator()\n",
     "\n",
     "# TODO: Set the paths to the datasets here.\n",
     "\n",

diff --git a/ssd300_training.ipynb b/ssd300_training.ipynb
@@ -96,7 +96,6 @@
     "offsets = [0.5, 0.5, 0.5, 0.5, 0.5, 0.5] # The offsets of the first anchor box center points from the top and left borders of the image as a fraction of the step size for each predictor layer.\n",
     "clip_boxes = False # Whether or not to clip ground truth and anchor boxes to lie entirely within the image boundaries\n",
     "variances = [0.1, 0.1, 0.2, 0.2] # The variances by which the encoded target coordinates are divided as in the original implementation\n",
-    "coords = 'centroids' # Whether the box coordinates to be used as targets for the model should be in the 'centroids', 'corners', or 'minmax' format, see documentation\n",
     "normalize_coords = True"
    ]
   },
@@ -150,7 +149,6 @@
     "                offsets=offsets,\n",
     "                clip_boxes=clip_boxes,\n",
     "                variances=variances,\n",
-    "                coords=coords,\n",
     "                normalize_coords=normalize_coords,\n",
     "                subtract_mean=mean_color,\n",
     "                swap_channels=swap_channels)\n",
@@ -226,20 +224,6 @@
     "In order to train the model on a dataset other than Pascal VOC, either choose `DataGenerator`'s appropriate parser method that corresponds to your data format, or, if `DataGenerator` does not provide a suitable parser for your data format, you can write an additional parser and add it. Out of the box, `DataGenerator` can handle datasets that use the Pascal VOC format (use `parse_xml()`), the MS COCO format (use `parse_json()`) and a wide range of CSV formats (use `parse_csv()`)."
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": 11,
-   "metadata": {
-    "collapsed": true
-   },
-   "outputs": [],
-   "source": [
-    "# 1: Instantiate to `BatchGenerator` objects: One for training, one for validation.\n",
-    "\n",
-    "train_dataset = DataGenerator(labels_output_format=['class_id', 'xmin', 'ymin', 'xmax', 'ymax'])\n",
-    "val_dataset = DataGenerator(labels_output_format=['class_id', 'xmin', 'ymin', 'xmax', 'ymax'])"
-   ]
-  },
   {
    "cell_type": "code",
    "execution_count": 12,
@@ -256,6 +240,11 @@
     }
    ],
    "source": [
+    "# 1: Instantiate to `BatchGenerator` objects: One for training, one for validation.\n",
+    "\n",
+    "train_dataset = DataGenerator()\n",
+    "val_dataset = DataGenerator()\n",
+    "\n",
     "# 2: Parse the image and label lists for the training and validation datasets. This can take a while.\n",
     "\n",
     "# TODO: Set the paths to the datasets here.\n",
@@ -360,7 +349,6 @@
     "                                    variances=variances,\n",
     "                                    pos_iou_threshold=0.5,\n",
     "                                    neg_iou_limit=0.4,\n",
-    "                                    coords=coords,\n",
     "                                    normalize_coords=normalize_coords)\n",
     "\n",
     "# 6: Create the generator handles that will be passed to Keras' `fit_generator()` function.\n",
@@ -578,7 +566,6 @@
     "                                   confidence_thresh=0.5,\n",
     "                                   iou_threshold=0.4,\n",
     "                                   top_k=200,\n",
-    "                                   input_coords='centroids',\n",
     "                                   normalize_coords=normalize_coords,\n",
     "                                   img_height=img_height,\n",
     "                                   img_width=img_width)"

diff --git a/ssd512_inference.ipynb b/ssd512_inference.ipynb
@@ -102,7 +102,6 @@
     "               offsets=[0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5],\n",
     "               clip_boxes=False,\n",
     "               variances=[0.1, 0.1, 0.2, 0.2],\n",
-    "               coords='centroids',\n",
     "               normalize_coords=True,\n",
     "               subtract_mean=[123, 117, 104],\n",
     "               swap_channels=[2, 1, 0],\n",
@@ -325,7 +324,7 @@
    "source": [
     "# Create a `BatchGenerator` instance and parse the Pascal VOC labels.\n",
     "\n",
-    "dataset = DataGenerator(labels_output_format=('class_id', 'xmin', 'ymin', 'xmax', 'ymax'))\n",
+    "dataset = DataGenerator()\n",
     "\n",
     "# TODO: Set the paths to the datasets here.\n",
     "\n",

diff --git a/ssd7_training.ipynb b/ssd7_training.ipynb
@@ -71,7 +71,6 @@
     "* If `two_boxes_for_ar1 == True`, then each predictor layer will predict two boxes with aspect ratio one, one a bit smaller, the other one a bit larger.\n",
     "* If `clip_boxes == True`, then the ground truth and anchor boxes will be clipped so that they lie entirely within the image boundaries. Even though it may seem counterintuitive at first, it is recommended not to clip the boxes. According to Wei Liu, the model performs slightly better when the boxes are not clipped.\n",
     "* In the matching process during the training, the anchor box offsets are being divided by the variances. Leaving them at 1.0 for each of the four box coordinates means that they have no effect. Setting them to less than 1.0 spreads the imagined anchor box offset distribution for the respective box coordinate.\n",
-    "* The `coords` argument lets you choose what coordinate format the model should learn internally. If you choose the 'centroids' format, the targets will be converted to the `(cx, cy, w, h)` coordinate format used in the original implementation. Note that the coordinate format that the model learns to predict and the coordinate format that the model outputs are not necessarily the same. For instance, the original SSD300 learns to predict `(cx, cy, w, h)` internally, but in the decoding stage, those predictions are converted to `(xmin, ymin, xmax, ymax)`, so that is what the model outputs at the end.\n",
     "* `normalize_coords` converts all coordinates from absolute coordinate to coordinates that are relative to the image height and width. This setting has no effect on the outcome of the training."
    ]
   },
@@ -96,7 +95,6 @@
     "offsets = None # In case you'd like to set the offsets for the anchor box grids manually; not recommended\n",
     "clip_boxes = False # Whether or not to clip the ground truth and anchor boxes to lie entirely within the image boundaries\n",
     "variances = [1.0, 1.0, 1.0, 1.0] # The list of variances by which the encoded target coordinates are scaled\n",
-    "coords = 'centroids' # Whether the box coordinates to be used should be in the 'centroids' or 'minmax' format, see documentation\n",
     "normalize_coords = True # Whether or not the model is supposed to use coordinates relative to the image size"
    ]
   },
@@ -149,7 +147,6 @@
     "                    offsets=offsets,\n",
     "                    clip_boxes=clip_boxes,\n",
     "                    variances=variances,\n",
-    "                    coords=coords,\n",
     "                    normalize_coords=normalize_coords,\n",
     "                    subtract_mean=intensity_mean,\n",
     "                    divide_by_stddev=intensity_range)\n",
@@ -226,20 +223,6 @@
     "The example setup below was used to train SSD7 on two road traffic datasets released by [Udacity](https://github.com/udacity/self-driving-car/tree/master/annotations) with around 20,000 images in total and 5 object classes (car, truck, pedestrian, bicyclist, traffic light), although the vast majority of the objects are cars. The original datasets have a constant image size of 1200x1920 RGB. I consolidated the two datasets, removed a few bad samples (although there are probably many more), and resized the images to 300x480 RGB, i.e. to one sixteenth of the original image size. In case you'd like to train a model on the same dataset, you can download the consolidated and resized dataset I used [here](https://drive.google.com/open?id=1uOqIUiJlDwoeL8vnNMacNbkDpDe1eRp-) (about 900 MB)."
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {
-    "collapsed": true
-   },
-   "outputs": [],
-   "source": [
-    "# 1: Instantiate to `BatchGenerator` objects: One for training, one for validation.\n",
-    "\n",
-    "train_dataset = DataGenerator(labels_output_format=['class_id', 'xmin', 'ymin', 'xmax', 'ymax'])\n",
-    "val_dataset = DataGenerator(labels_output_format=['class_id', 'xmin', 'ymin', 'xmax', 'ymax'])"
-   ]
-  },
   {
    "cell_type": "code",
    "execution_count": 5,
@@ -255,6 +238,11 @@
     }
    ],
    "source": [
+    "# 1: Instantiate to `BatchGenerator` objects: One for training, one for validation.\n",
+    "\n",
+    "train_dataset = DataGenerator()\n",
+    "val_dataset = DataGenerator()\n",
+    "\n",
     "# 2: Parse the image and label lists for the training and validation datasets.\n",
     "\n",
     "# TODO: Set the paths to your dataset here.\n",
@@ -334,7 +322,6 @@
     "                                    variances=variances,\n",
     "                                    pos_iou_threshold=0.5,\n",
     "                                    neg_iou_limit=0.3,\n",
-    "                                    coords=coords,\n",
     "                                    normalize_coords=normalize_coords)\n",
     "\n",
     "# 6: Create the generator handles that will be passed to Keras' `fit_generator()` function.\n",
@@ -410,6 +397,7 @@
    "cell_type": "code",
    "execution_count": null,
    "metadata": {
+    "collapsed": true,
     "scrolled": true
    },
    "outputs": [],
@@ -593,8 +581,7 @@
     "                                   confidence_thresh=0.5,\n",
     "                                   iou_threshold=0.45,\n",
     "                                   top_k=200,\n",
-    "                                   input_coords='centroids',\n",
-    "                                   normalize_coords=True,\n",
+    "                                   normalize_coords=normalize_coords,\n",
     "                                   img_height=img_height,\n",
     "                                   img_width=img_width)\n",
     "\n",

diff --git a/weight_sampling_tutorial.ipynb b/weight_sampling_tutorial.ipynb
@@ -472,7 +472,6 @@
     "offsets = [0.5, 0.5, 0.5, 0.5, 0.5, 0.5] # The offsets of the first anchor box center points from the top and left borders of the image as a fraction of the step size for each predictor layer.\n",
     "clip_boxes = False # Whether or not you want to limit the anchor boxes to lie entirely within the image boundaries\n",
     "variances = [0.1, 0.1, 0.2, 0.2] # The variances by which the encoded target coordinates are scaled as in the original implementation\n",
-    "coords = 'centroids' # Whether the box coordinates to be used as targets for the model should be in the 'centroids', 'corners', or 'minmax' format, see documentation\n",
     "normalize_coords = True"
    ]
   },
@@ -508,7 +507,6 @@
     "                offsets=offsets,\n",
     "                clip_boxes=clip_boxes,\n",
     "                variances=variances,\n",
-    "                coords=coords,\n",
     "                normalize_coords=normalize_coords,\n",
     "                subtract_mean=subtract_mean,\n",
     "                divide_by_stddev=None,\n",
@@ -562,7 +560,7 @@
     }
    ],
    "source": [
-    "dataset = DataGenerator(labels_output_format=['class_id', 'xmin', 'ymin', 'xmax', 'ymax'])\n",
+    "dataset = DataGenerator()\n",
     "\n",
     "# TODO: Set the paths to your dataset here.\n",
     "images_path = '../../datasets/Udacity_Driving/driving_dataset_consolidated_small/'\n",