Skip to content

Commit

Permalink
chore: Remove internal coordinate format options
Browse files Browse the repository at this point in the history
The difference between the internal coordinate formats that the model can learn and the coordinate formats of the input and output data has recently caused some confustions and led to a bunch of questions, so that the option to change the internal coordinate format is now entirely omitted in the tutorial notebooks. It's not relevant at all to change the internal coordinate format and it only led to confusion, so it's probably best not confront users with this option to begin with.
  • Loading branch information
pierluigiferrari committed Mar 29, 2018
1 parent 46e7f58 commit 9006bf6
Show file tree
Hide file tree
Showing 7 changed files with 26 additions and 54 deletions.
8 changes: 4 additions & 4 deletions ssd300_evaluation_COCO.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,9 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"from keras import backend as K\n",
Expand Down Expand Up @@ -108,7 +110,6 @@
" offsets=[0.5, 0.5, 0.5, 0.5, 0.5, 0.5],\n",
" clip_boxes=False,\n",
" variances=[0.1, 0.1, 0.2, 0.2],\n",
" coords='centroids',\n",
" normalize_coords=True,\n",
" subtract_mean=[123, 117, 104],\n",
" swap_channels=[2, 1, 0],\n",
Expand Down Expand Up @@ -186,7 +187,7 @@
},
"outputs": [],
"source": [
"dataset = DataGenerator(labels_output_format=['class_id', 'xmin', 'ymin', 'xmax', 'ymax'])\n",
"dataset = DataGenerator()\n",
"\n",
"# TODO: Set the paths to the dataset here.\n",
"MS_COCO_dataset_images_dir = '../../datasets/MicrosoftCOCO/val2017/'\n",
Expand Down Expand Up @@ -254,7 +255,6 @@
" confidence_thresh=0.01,\n",
" iou_threshold=0.45,\n",
" top_k=200,\n",
" pred_coords='centroids',\n",
" normalize_coords=True)"
]
},
Expand Down
12 changes: 7 additions & 5 deletions ssd300_evaluation_Pascal_VOC.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,9 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"from keras import backend as K\n",
Expand Down Expand Up @@ -70,7 +72,9 @@
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# 1: Build the Keras model\n",
Expand All @@ -93,7 +97,6 @@
" offsets=[0.5, 0.5, 0.5, 0.5, 0.5, 0.5],\n",
" clip_boxes=False,\n",
" variances=[0.1, 0.1, 0.2, 0.2],\n",
" coords='centroids',\n",
" normalize_coords=True,\n",
" subtract_mean=[123, 117, 104],\n",
" swap_channels=[2, 1, 0],\n",
Expand Down Expand Up @@ -177,7 +180,7 @@
}
],
"source": [
"dataset = DataGenerator(labels_output_format=['class_id', 'xmin', 'ymin', 'xmax', 'ymax'])\n",
"dataset = DataGenerator()\n",
"\n",
"# TODO: Set the paths to the dataset here.\n",
"Pascal_VOC_dataset_images_dir = '../../datasets/VOCdevkit/VOC2007/JPEGImages/'\n",
Expand Down Expand Up @@ -253,7 +256,6 @@
" confidence_thresh=0.01,\n",
" iou_threshold=0.45,\n",
" top_k=200,\n",
" pred_coords='centroids',\n",
" normalize_coords=True)"
]
},
Expand Down
3 changes: 1 addition & 2 deletions ssd300_inference.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,6 @@
" offsets=[0.5, 0.5, 0.5, 0.5, 0.5, 0.5],\n",
" clip_boxes=False,\n",
" variances=[0.1, 0.1, 0.2, 0.2],\n",
" coords='centroids',\n",
" normalize_coords=True,\n",
" subtract_mean=[123, 117, 104],\n",
" swap_channels=[2, 1, 0],\n",
Expand Down Expand Up @@ -324,7 +323,7 @@
"source": [
"# Create a `BatchGenerator` instance and parse the Pascal VOC labels.\n",
"\n",
"dataset = DataGenerator(labels_output_format=('class_id', 'xmin', 'ymin', 'xmax', 'ymax'))\n",
"dataset = DataGenerator()\n",
"\n",
"# TODO: Set the paths to the datasets here.\n",
"\n",
Expand Down
23 changes: 5 additions & 18 deletions ssd300_training.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,6 @@
"offsets = [0.5, 0.5, 0.5, 0.5, 0.5, 0.5] # The offsets of the first anchor box center points from the top and left borders of the image as a fraction of the step size for each predictor layer.\n",
"clip_boxes = False # Whether or not to clip ground truth and anchor boxes to lie entirely within the image boundaries\n",
"variances = [0.1, 0.1, 0.2, 0.2] # The variances by which the encoded target coordinates are divided as in the original implementation\n",
"coords = 'centroids' # Whether the box coordinates to be used as targets for the model should be in the 'centroids', 'corners', or 'minmax' format, see documentation\n",
"normalize_coords = True"
]
},
Expand Down Expand Up @@ -150,7 +149,6 @@
" offsets=offsets,\n",
" clip_boxes=clip_boxes,\n",
" variances=variances,\n",
" coords=coords,\n",
" normalize_coords=normalize_coords,\n",
" subtract_mean=mean_color,\n",
" swap_channels=swap_channels)\n",
Expand Down Expand Up @@ -226,20 +224,6 @@
"In order to train the model on a dataset other than Pascal VOC, either choose `DataGenerator`'s appropriate parser method that corresponds to your data format, or, if `DataGenerator` does not provide a suitable parser for your data format, you can write an additional parser and add it. Out of the box, `DataGenerator` can handle datasets that use the Pascal VOC format (use `parse_xml()`), the MS COCO format (use `parse_json()`) and a wide range of CSV formats (use `parse_csv()`)."
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# 1: Instantiate to `BatchGenerator` objects: One for training, one for validation.\n",
"\n",
"train_dataset = DataGenerator(labels_output_format=['class_id', 'xmin', 'ymin', 'xmax', 'ymax'])\n",
"val_dataset = DataGenerator(labels_output_format=['class_id', 'xmin', 'ymin', 'xmax', 'ymax'])"
]
},
{
"cell_type": "code",
"execution_count": 12,
Expand All @@ -256,6 +240,11 @@
}
],
"source": [
"# 1: Instantiate to `BatchGenerator` objects: One for training, one for validation.\n",
"\n",
"train_dataset = DataGenerator()\n",
"val_dataset = DataGenerator()\n",
"\n",
"# 2: Parse the image and label lists for the training and validation datasets. This can take a while.\n",
"\n",
"# TODO: Set the paths to the datasets here.\n",
Expand Down Expand Up @@ -360,7 +349,6 @@
" variances=variances,\n",
" pos_iou_threshold=0.5,\n",
" neg_iou_limit=0.4,\n",
" coords=coords,\n",
" normalize_coords=normalize_coords)\n",
"\n",
"# 6: Create the generator handles that will be passed to Keras' `fit_generator()` function.\n",
Expand Down Expand Up @@ -578,7 +566,6 @@
" confidence_thresh=0.5,\n",
" iou_threshold=0.4,\n",
" top_k=200,\n",
" input_coords='centroids',\n",
" normalize_coords=normalize_coords,\n",
" img_height=img_height,\n",
" img_width=img_width)"
Expand Down
3 changes: 1 addition & 2 deletions ssd512_inference.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,6 @@
" offsets=[0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5],\n",
" clip_boxes=False,\n",
" variances=[0.1, 0.1, 0.2, 0.2],\n",
" coords='centroids',\n",
" normalize_coords=True,\n",
" subtract_mean=[123, 117, 104],\n",
" swap_channels=[2, 1, 0],\n",
Expand Down Expand Up @@ -325,7 +324,7 @@
"source": [
"# Create a `BatchGenerator` instance and parse the Pascal VOC labels.\n",
"\n",
"dataset = DataGenerator(labels_output_format=('class_id', 'xmin', 'ymin', 'xmax', 'ymax'))\n",
"dataset = DataGenerator()\n",
"\n",
"# TODO: Set the paths to the datasets here.\n",
"\n",
Expand Down
27 changes: 7 additions & 20 deletions ssd7_training.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,6 @@
"* If `two_boxes_for_ar1 == True`, then each predictor layer will predict two boxes with aspect ratio one, one a bit smaller, the other one a bit larger.\n",
"* If `clip_boxes == True`, then the ground truth and anchor boxes will be clipped so that they lie entirely within the image boundaries. Even though it may seem counterintuitive at first, it is recommended not to clip the boxes. According to Wei Liu, the model performs slightly better when the boxes are not clipped.\n",
"* In the matching process during the training, the anchor box offsets are being divided by the variances. Leaving them at 1.0 for each of the four box coordinates means that they have no effect. Setting them to less than 1.0 spreads the imagined anchor box offset distribution for the respective box coordinate.\n",
"* The `coords` argument lets you choose what coordinate format the model should learn internally. If you choose the 'centroids' format, the targets will be converted to the `(cx, cy, w, h)` coordinate format used in the original implementation. Note that the coordinate format that the model learns to predict and the coordinate format that the model outputs are not necessarily the same. For instance, the original SSD300 learns to predict `(cx, cy, w, h)` internally, but in the decoding stage, those predictions are converted to `(xmin, ymin, xmax, ymax)`, so that is what the model outputs at the end.\n",
"* `normalize_coords` converts all coordinates from absolute coordinate to coordinates that are relative to the image height and width. This setting has no effect on the outcome of the training."
]
},
Expand All @@ -96,7 +95,6 @@
"offsets = None # In case you'd like to set the offsets for the anchor box grids manually; not recommended\n",
"clip_boxes = False # Whether or not to clip the ground truth and anchor boxes to lie entirely within the image boundaries\n",
"variances = [1.0, 1.0, 1.0, 1.0] # The list of variances by which the encoded target coordinates are scaled\n",
"coords = 'centroids' # Whether the box coordinates to be used should be in the 'centroids' or 'minmax' format, see documentation\n",
"normalize_coords = True # Whether or not the model is supposed to use coordinates relative to the image size"
]
},
Expand Down Expand Up @@ -149,7 +147,6 @@
" offsets=offsets,\n",
" clip_boxes=clip_boxes,\n",
" variances=variances,\n",
" coords=coords,\n",
" normalize_coords=normalize_coords,\n",
" subtract_mean=intensity_mean,\n",
" divide_by_stddev=intensity_range)\n",
Expand Down Expand Up @@ -226,20 +223,6 @@
"The example setup below was used to train SSD7 on two road traffic datasets released by [Udacity](https://github.com/udacity/self-driving-car/tree/master/annotations) with around 20,000 images in total and 5 object classes (car, truck, pedestrian, bicyclist, traffic light), although the vast majority of the objects are cars. The original datasets have a constant image size of 1200x1920 RGB. I consolidated the two datasets, removed a few bad samples (although there are probably many more), and resized the images to 300x480 RGB, i.e. to one sixteenth of the original image size. In case you'd like to train a model on the same dataset, you can download the consolidated and resized dataset I used [here](https://drive.google.com/open?id=1uOqIUiJlDwoeL8vnNMacNbkDpDe1eRp-) (about 900 MB)."
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# 1: Instantiate to `BatchGenerator` objects: One for training, one for validation.\n",
"\n",
"train_dataset = DataGenerator(labels_output_format=['class_id', 'xmin', 'ymin', 'xmax', 'ymax'])\n",
"val_dataset = DataGenerator(labels_output_format=['class_id', 'xmin', 'ymin', 'xmax', 'ymax'])"
]
},
{
"cell_type": "code",
"execution_count": 5,
Expand All @@ -255,6 +238,11 @@
}
],
"source": [
"# 1: Instantiate to `BatchGenerator` objects: One for training, one for validation.\n",
"\n",
"train_dataset = DataGenerator()\n",
"val_dataset = DataGenerator()\n",
"\n",
"# 2: Parse the image and label lists for the training and validation datasets.\n",
"\n",
"# TODO: Set the paths to your dataset here.\n",
Expand Down Expand Up @@ -334,7 +322,6 @@
" variances=variances,\n",
" pos_iou_threshold=0.5,\n",
" neg_iou_limit=0.3,\n",
" coords=coords,\n",
" normalize_coords=normalize_coords)\n",
"\n",
"# 6: Create the generator handles that will be passed to Keras' `fit_generator()` function.\n",
Expand Down Expand Up @@ -410,6 +397,7 @@
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true,
"scrolled": true
},
"outputs": [],
Expand Down Expand Up @@ -593,8 +581,7 @@
" confidence_thresh=0.5,\n",
" iou_threshold=0.45,\n",
" top_k=200,\n",
" input_coords='centroids',\n",
" normalize_coords=True,\n",
" normalize_coords=normalize_coords,\n",
" img_height=img_height,\n",
" img_width=img_width)\n",
"\n",
Expand Down
4 changes: 1 addition & 3 deletions weight_sampling_tutorial.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -472,7 +472,6 @@
"offsets = [0.5, 0.5, 0.5, 0.5, 0.5, 0.5] # The offsets of the first anchor box center points from the top and left borders of the image as a fraction of the step size for each predictor layer.\n",
"clip_boxes = False # Whether or not you want to limit the anchor boxes to lie entirely within the image boundaries\n",
"variances = [0.1, 0.1, 0.2, 0.2] # The variances by which the encoded target coordinates are scaled as in the original implementation\n",
"coords = 'centroids' # Whether the box coordinates to be used as targets for the model should be in the 'centroids', 'corners', or 'minmax' format, see documentation\n",
"normalize_coords = True"
]
},
Expand Down Expand Up @@ -508,7 +507,6 @@
" offsets=offsets,\n",
" clip_boxes=clip_boxes,\n",
" variances=variances,\n",
" coords=coords,\n",
" normalize_coords=normalize_coords,\n",
" subtract_mean=subtract_mean,\n",
" divide_by_stddev=None,\n",
Expand Down Expand Up @@ -562,7 +560,7 @@
}
],
"source": [
"dataset = DataGenerator(labels_output_format=['class_id', 'xmin', 'ymin', 'xmax', 'ymax'])\n",
"dataset = DataGenerator()\n",
"\n",
"# TODO: Set the paths to your dataset here.\n",
"images_path = '../../datasets/Udacity_Driving/driving_dataset_consolidated_small/'\n",
Expand Down

0 comments on commit 9006bf6

Please sign in to comment.